Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 9dfa16fc

History | View | Annotate | Download (444.1 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import query
58
from ganeti import qlang
59
from ganeti import opcodes
60
from ganeti import ht
61

    
62
import ganeti.masterd.instance # pylint: disable-msg=W0611
63

    
64

    
65
def _SupportsOob(cfg, node):
66
  """Tells if node supports OOB.
67

68
  @type cfg: L{config.ConfigWriter}
69
  @param cfg: The cluster configuration
70
  @type node: L{objects.Node}
71
  @param node: The node
72
  @return: The OOB script if supported or an empty string otherwise
73

74
  """
75
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
76

    
77

    
78
class ResultWithJobs:
79
  """Data container for LU results with jobs.
80

81
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83
  contained in the C{jobs} attribute and include the job IDs in the opcode
84
  result.
85

86
  """
87
  def __init__(self, jobs, **kwargs):
88
    """Initializes this class.
89

90
    Additional return values can be specified as keyword arguments.
91

92
    @type jobs: list of lists of L{opcode.OpCode}
93
    @param jobs: A list of lists of opcode objects
94

95
    """
96
    self.jobs = jobs
97
    self.other = kwargs
98

    
99

    
100
class LogicalUnit(object):
101
  """Logical Unit base class.
102

103
  Subclasses must follow these rules:
104
    - implement ExpandNames
105
    - implement CheckPrereq (except when tasklets are used)
106
    - implement Exec (except when tasklets are used)
107
    - implement BuildHooksEnv
108
    - implement BuildHooksNodes
109
    - redefine HPATH and HTYPE
110
    - optionally redefine their run requirements:
111
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
112

113
  Note that all commands require root permissions.
114

115
  @ivar dry_run_result: the value (if any) that will be returned to the caller
116
      in dry-run mode (signalled by opcode dry_run parameter)
117

118
  """
119
  HPATH = None
120
  HTYPE = None
121
  REQ_BGL = True
122

    
123
  def __init__(self, processor, op, context, rpc):
124
    """Constructor for LogicalUnit.
125

126
    This needs to be overridden in derived classes in order to check op
127
    validity.
128

129
    """
130
    self.proc = processor
131
    self.op = op
132
    self.cfg = context.cfg
133
    self.glm = context.glm
134
    self.context = context
135
    self.rpc = rpc
136
    # Dicts used to declare locking needs to mcpu
137
    self.needed_locks = None
138
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
139
    self.add_locks = {}
140
    self.remove_locks = {}
141
    # Used to force good behavior when calling helper functions
142
    self.recalculate_locks = {}
143
    # logging
144
    self.Log = processor.Log # pylint: disable-msg=C0103
145
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
146
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
147
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
148
    # support for dry-run
149
    self.dry_run_result = None
150
    # support for generic debug attribute
151
    if (not hasattr(self.op, "debug_level") or
152
        not isinstance(self.op.debug_level, int)):
153
      self.op.debug_level = 0
154

    
155
    # Tasklets
156
    self.tasklets = None
157

    
158
    # Validate opcode parameters and set defaults
159
    self.op.Validate(True)
160

    
161
    self.CheckArguments()
162

    
163
  def CheckArguments(self):
164
    """Check syntactic validity for the opcode arguments.
165

166
    This method is for doing a simple syntactic check and ensure
167
    validity of opcode parameters, without any cluster-related
168
    checks. While the same can be accomplished in ExpandNames and/or
169
    CheckPrereq, doing these separate is better because:
170

171
      - ExpandNames is left as as purely a lock-related function
172
      - CheckPrereq is run after we have acquired locks (and possible
173
        waited for them)
174

175
    The function is allowed to change the self.op attribute so that
176
    later methods can no longer worry about missing parameters.
177

178
    """
179
    pass
180

    
181
  def ExpandNames(self):
182
    """Expand names for this LU.
183

184
    This method is called before starting to execute the opcode, and it should
185
    update all the parameters of the opcode to their canonical form (e.g. a
186
    short node name must be fully expanded after this method has successfully
187
    completed). This way locking, hooks, logging, etc. can work correctly.
188

189
    LUs which implement this method must also populate the self.needed_locks
190
    member, as a dict with lock levels as keys, and a list of needed lock names
191
    as values. Rules:
192

193
      - use an empty dict if you don't need any lock
194
      - if you don't need any lock at a particular level omit that level
195
      - don't put anything for the BGL level
196
      - if you want all locks at a level use locking.ALL_SET as a value
197

198
    If you need to share locks (rather than acquire them exclusively) at one
199
    level you can modify self.share_locks, setting a true value (usually 1) for
200
    that level. By default locks are not shared.
201

202
    This function can also define a list of tasklets, which then will be
203
    executed in order instead of the usual LU-level CheckPrereq and Exec
204
    functions, if those are not defined by the LU.
205

206
    Examples::
207

208
      # Acquire all nodes and one instance
209
      self.needed_locks = {
210
        locking.LEVEL_NODE: locking.ALL_SET,
211
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
212
      }
213
      # Acquire just two nodes
214
      self.needed_locks = {
215
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
216
      }
217
      # Acquire no locks
218
      self.needed_locks = {} # No, you can't leave it to the default value None
219

220
    """
221
    # The implementation of this method is mandatory only if the new LU is
222
    # concurrent, so that old LUs don't need to be changed all at the same
223
    # time.
224
    if self.REQ_BGL:
225
      self.needed_locks = {} # Exclusive LUs don't need locks.
226
    else:
227
      raise NotImplementedError
228

    
229
  def DeclareLocks(self, level):
230
    """Declare LU locking needs for a level
231

232
    While most LUs can just declare their locking needs at ExpandNames time,
233
    sometimes there's the need to calculate some locks after having acquired
234
    the ones before. This function is called just before acquiring locks at a
235
    particular level, but after acquiring the ones at lower levels, and permits
236
    such calculations. It can be used to modify self.needed_locks, and by
237
    default it does nothing.
238

239
    This function is only called if you have something already set in
240
    self.needed_locks for the level.
241

242
    @param level: Locking level which is going to be locked
243
    @type level: member of ganeti.locking.LEVELS
244

245
    """
246

    
247
  def CheckPrereq(self):
248
    """Check prerequisites for this LU.
249

250
    This method should check that the prerequisites for the execution
251
    of this LU are fulfilled. It can do internode communication, but
252
    it should be idempotent - no cluster or system changes are
253
    allowed.
254

255
    The method should raise errors.OpPrereqError in case something is
256
    not fulfilled. Its return value is ignored.
257

258
    This method should also update all the parameters of the opcode to
259
    their canonical form if it hasn't been done by ExpandNames before.
260

261
    """
262
    if self.tasklets is not None:
263
      for (idx, tl) in enumerate(self.tasklets):
264
        logging.debug("Checking prerequisites for tasklet %s/%s",
265
                      idx + 1, len(self.tasklets))
266
        tl.CheckPrereq()
267
    else:
268
      pass
269

    
270
  def Exec(self, feedback_fn):
271
    """Execute the LU.
272

273
    This method should implement the actual work. It should raise
274
    errors.OpExecError for failures that are somewhat dealt with in
275
    code, or expected.
276

277
    """
278
    if self.tasklets is not None:
279
      for (idx, tl) in enumerate(self.tasklets):
280
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
281
        tl.Exec(feedback_fn)
282
    else:
283
      raise NotImplementedError
284

    
285
  def BuildHooksEnv(self):
286
    """Build hooks environment for this LU.
287

288
    @rtype: dict
289
    @return: Dictionary containing the environment that will be used for
290
      running the hooks for this LU. The keys of the dict must not be prefixed
291
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
292
      will extend the environment with additional variables. If no environment
293
      should be defined, an empty dictionary should be returned (not C{None}).
294
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
295
      will not be called.
296

297
    """
298
    raise NotImplementedError
299

    
300
  def BuildHooksNodes(self):
301
    """Build list of nodes to run LU's hooks.
302

303
    @rtype: tuple; (list, list)
304
    @return: Tuple containing a list of node names on which the hook
305
      should run before the execution and a list of node names on which the
306
      hook should run after the execution. No nodes should be returned as an
307
      empty list (and not None).
308
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309
      will not be called.
310

311
    """
312
    raise NotImplementedError
313

    
314
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
315
    """Notify the LU about the results of its hooks.
316

317
    This method is called every time a hooks phase is executed, and notifies
318
    the Logical Unit about the hooks' result. The LU can then use it to alter
319
    its result based on the hooks.  By default the method does nothing and the
320
    previous result is passed back unchanged but any LU can define it if it
321
    wants to use the local cluster hook-scripts somehow.
322

323
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
324
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
325
    @param hook_results: the results of the multi-node hooks rpc call
326
    @param feedback_fn: function used send feedback back to the caller
327
    @param lu_result: the previous Exec result this LU had, or None
328
        in the PRE phase
329
    @return: the new Exec result, based on the previous result
330
        and hook results
331

332
    """
333
    # API must be kept, thus we ignore the unused argument and could
334
    # be a function warnings
335
    # pylint: disable-msg=W0613,R0201
336
    return lu_result
337

    
338
  def _ExpandAndLockInstance(self):
339
    """Helper function to expand and lock an instance.
340

341
    Many LUs that work on an instance take its name in self.op.instance_name
342
    and need to expand it and then declare the expanded name for locking. This
343
    function does it, and then updates self.op.instance_name to the expanded
344
    name. It also initializes needed_locks as a dict, if this hasn't been done
345
    before.
346

347
    """
348
    if self.needed_locks is None:
349
      self.needed_locks = {}
350
    else:
351
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
352
        "_ExpandAndLockInstance called with instance-level locks set"
353
    self.op.instance_name = _ExpandInstanceName(self.cfg,
354
                                                self.op.instance_name)
355
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
356

    
357
  def _LockInstancesNodes(self, primary_only=False):
358
    """Helper function to declare instances' nodes for locking.
359

360
    This function should be called after locking one or more instances to lock
361
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
362
    with all primary or secondary nodes for instances already locked and
363
    present in self.needed_locks[locking.LEVEL_INSTANCE].
364

365
    It should be called from DeclareLocks, and for safety only works if
366
    self.recalculate_locks[locking.LEVEL_NODE] is set.
367

368
    In the future it may grow parameters to just lock some instance's nodes, or
369
    to just lock primaries or secondary nodes, if needed.
370

371
    If should be called in DeclareLocks in a way similar to::
372

373
      if level == locking.LEVEL_NODE:
374
        self._LockInstancesNodes()
375

376
    @type primary_only: boolean
377
    @param primary_only: only lock primary nodes of locked instances
378

379
    """
380
    assert locking.LEVEL_NODE in self.recalculate_locks, \
381
      "_LockInstancesNodes helper function called with no nodes to recalculate"
382

    
383
    # TODO: check if we're really been called with the instance locks held
384

    
385
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
386
    # future we might want to have different behaviors depending on the value
387
    # of self.recalculate_locks[locking.LEVEL_NODE]
388
    wanted_nodes = []
389
    for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
390
      instance = self.context.cfg.GetInstanceInfo(instance_name)
391
      wanted_nodes.append(instance.primary_node)
392
      if not primary_only:
393
        wanted_nodes.extend(instance.secondary_nodes)
394

    
395
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
396
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
397
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
398
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
399

    
400
    del self.recalculate_locks[locking.LEVEL_NODE]
401

    
402

    
403
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
404
  """Simple LU which runs no hooks.
405

406
  This LU is intended as a parent for other LogicalUnits which will
407
  run no hooks, in order to reduce duplicate code.
408

409
  """
410
  HPATH = None
411
  HTYPE = None
412

    
413
  def BuildHooksEnv(self):
414
    """Empty BuildHooksEnv for NoHooksLu.
415

416
    This just raises an error.
417

418
    """
419
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
420

    
421
  def BuildHooksNodes(self):
422
    """Empty BuildHooksNodes for NoHooksLU.
423

424
    """
425
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
426

    
427

    
428
class Tasklet:
429
  """Tasklet base class.
430

431
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
432
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
433
  tasklets know nothing about locks.
434

435
  Subclasses must follow these rules:
436
    - Implement CheckPrereq
437
    - Implement Exec
438

439
  """
440
  def __init__(self, lu):
441
    self.lu = lu
442

    
443
    # Shortcuts
444
    self.cfg = lu.cfg
445
    self.rpc = lu.rpc
446

    
447
  def CheckPrereq(self):
448
    """Check prerequisites for this tasklets.
449

450
    This method should check whether the prerequisites for the execution of
451
    this tasklet are fulfilled. It can do internode communication, but it
452
    should be idempotent - no cluster or system changes are allowed.
453

454
    The method should raise errors.OpPrereqError in case something is not
455
    fulfilled. Its return value is ignored.
456

457
    This method should also update all parameters to their canonical form if it
458
    hasn't been done before.
459

460
    """
461
    pass
462

    
463
  def Exec(self, feedback_fn):
464
    """Execute the tasklet.
465

466
    This method should implement the actual work. It should raise
467
    errors.OpExecError for failures that are somewhat dealt with in code, or
468
    expected.
469

470
    """
471
    raise NotImplementedError
472

    
473

    
474
class _QueryBase:
475
  """Base for query utility classes.
476

477
  """
478
  #: Attribute holding field definitions
479
  FIELDS = None
480

    
481
  def __init__(self, filter_, fields, use_locking):
482
    """Initializes this class.
483

484
    """
485
    self.use_locking = use_locking
486

    
487
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
488
                             namefield="name")
489
    self.requested_data = self.query.RequestedData()
490
    self.names = self.query.RequestedNames()
491

    
492
    # Sort only if no names were requested
493
    self.sort_by_name = not self.names
494

    
495
    self.do_locking = None
496
    self.wanted = None
497

    
498
  def _GetNames(self, lu, all_names, lock_level):
499
    """Helper function to determine names asked for in the query.
500

501
    """
502
    if self.do_locking:
503
      names = lu.glm.list_owned(lock_level)
504
    else:
505
      names = all_names
506

    
507
    if self.wanted == locking.ALL_SET:
508
      assert not self.names
509
      # caller didn't specify names, so ordering is not important
510
      return utils.NiceSort(names)
511

    
512
    # caller specified names and we must keep the same order
513
    assert self.names
514
    assert not self.do_locking or lu.glm.is_owned(lock_level)
515

    
516
    missing = set(self.wanted).difference(names)
517
    if missing:
518
      raise errors.OpExecError("Some items were removed before retrieving"
519
                               " their data: %s" % missing)
520

    
521
    # Return expanded names
522
    return self.wanted
523

    
524
  def ExpandNames(self, lu):
525
    """Expand names for this query.
526

527
    See L{LogicalUnit.ExpandNames}.
528

529
    """
530
    raise NotImplementedError()
531

    
532
  def DeclareLocks(self, lu, level):
533
    """Declare locks for this query.
534

535
    See L{LogicalUnit.DeclareLocks}.
536

537
    """
538
    raise NotImplementedError()
539

    
540
  def _GetQueryData(self, lu):
541
    """Collects all data for this query.
542

543
    @return: Query data object
544

545
    """
546
    raise NotImplementedError()
547

    
548
  def NewStyleQuery(self, lu):
549
    """Collect data and execute query.
550

551
    """
552
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
553
                                  sort_by_name=self.sort_by_name)
554

    
555
  def OldStyleQuery(self, lu):
556
    """Collect data and execute query.
557

558
    """
559
    return self.query.OldStyleQuery(self._GetQueryData(lu),
560
                                    sort_by_name=self.sort_by_name)
561

    
562

    
563
def _GetWantedNodes(lu, nodes):
564
  """Returns list of checked and expanded node names.
565

566
  @type lu: L{LogicalUnit}
567
  @param lu: the logical unit on whose behalf we execute
568
  @type nodes: list
569
  @param nodes: list of node names or None for all nodes
570
  @rtype: list
571
  @return: the list of nodes, sorted
572
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
573

574
  """
575
  if nodes:
576
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
577

    
578
  return utils.NiceSort(lu.cfg.GetNodeList())
579

    
580

    
581
def _GetWantedInstances(lu, instances):
582
  """Returns list of checked and expanded instance names.
583

584
  @type lu: L{LogicalUnit}
585
  @param lu: the logical unit on whose behalf we execute
586
  @type instances: list
587
  @param instances: list of instance names or None for all instances
588
  @rtype: list
589
  @return: the list of instances, sorted
590
  @raise errors.OpPrereqError: if the instances parameter is wrong type
591
  @raise errors.OpPrereqError: if any of the passed instances is not found
592

593
  """
594
  if instances:
595
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
596
  else:
597
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
598
  return wanted
599

    
600

    
601
def _GetUpdatedParams(old_params, update_dict,
602
                      use_default=True, use_none=False):
603
  """Return the new version of a parameter dictionary.
604

605
  @type old_params: dict
606
  @param old_params: old parameters
607
  @type update_dict: dict
608
  @param update_dict: dict containing new parameter values, or
609
      constants.VALUE_DEFAULT to reset the parameter to its default
610
      value
611
  @param use_default: boolean
612
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
613
      values as 'to be deleted' values
614
  @param use_none: boolean
615
  @type use_none: whether to recognise C{None} values as 'to be
616
      deleted' values
617
  @rtype: dict
618
  @return: the new parameter dictionary
619

620
  """
621
  params_copy = copy.deepcopy(old_params)
622
  for key, val in update_dict.iteritems():
623
    if ((use_default and val == constants.VALUE_DEFAULT) or
624
        (use_none and val is None)):
625
      try:
626
        del params_copy[key]
627
      except KeyError:
628
        pass
629
    else:
630
      params_copy[key] = val
631
  return params_copy
632

    
633

    
634
def _ReleaseLocks(lu, level, names=None, keep=None):
635
  """Releases locks owned by an LU.
636

637
  @type lu: L{LogicalUnit}
638
  @param level: Lock level
639
  @type names: list or None
640
  @param names: Names of locks to release
641
  @type keep: list or None
642
  @param keep: Names of locks to retain
643

644
  """
645
  assert not (keep is not None and names is not None), \
646
         "Only one of the 'names' and the 'keep' parameters can be given"
647

    
648
  if names is not None:
649
    should_release = names.__contains__
650
  elif keep:
651
    should_release = lambda name: name not in keep
652
  else:
653
    should_release = None
654

    
655
  if should_release:
656
    retain = []
657
    release = []
658

    
659
    # Determine which locks to release
660
    for name in lu.glm.list_owned(level):
661
      if should_release(name):
662
        release.append(name)
663
      else:
664
        retain.append(name)
665

    
666
    assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
667

    
668
    # Release just some locks
669
    lu.glm.release(level, names=release)
670

    
671
    assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
672
  else:
673
    # Release everything
674
    lu.glm.release(level)
675

    
676
    assert not lu.glm.is_owned(level), "No locks should be owned"
677

    
678

    
679
def _RunPostHook(lu, node_name):
680
  """Runs the post-hook for an opcode on a single node.
681

682
  """
683
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
684
  try:
685
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
686
  except:
687
    # pylint: disable-msg=W0702
688
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
689

    
690

    
691
def _CheckOutputFields(static, dynamic, selected):
692
  """Checks whether all selected fields are valid.
693

694
  @type static: L{utils.FieldSet}
695
  @param static: static fields set
696
  @type dynamic: L{utils.FieldSet}
697
  @param dynamic: dynamic fields set
698

699
  """
700
  f = utils.FieldSet()
701
  f.Extend(static)
702
  f.Extend(dynamic)
703

    
704
  delta = f.NonMatching(selected)
705
  if delta:
706
    raise errors.OpPrereqError("Unknown output fields selected: %s"
707
                               % ",".join(delta), errors.ECODE_INVAL)
708

    
709

    
710
def _CheckGlobalHvParams(params):
711
  """Validates that given hypervisor params are not global ones.
712

713
  This will ensure that instances don't get customised versions of
714
  global params.
715

716
  """
717
  used_globals = constants.HVC_GLOBALS.intersection(params)
718
  if used_globals:
719
    msg = ("The following hypervisor parameters are global and cannot"
720
           " be customized at instance level, please modify them at"
721
           " cluster level: %s" % utils.CommaJoin(used_globals))
722
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
723

    
724

    
725
def _CheckNodeOnline(lu, node, msg=None):
726
  """Ensure that a given node is online.
727

728
  @param lu: the LU on behalf of which we make the check
729
  @param node: the node to check
730
  @param msg: if passed, should be a message to replace the default one
731
  @raise errors.OpPrereqError: if the node is offline
732

733
  """
734
  if msg is None:
735
    msg = "Can't use offline node"
736
  if lu.cfg.GetNodeInfo(node).offline:
737
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
738

    
739

    
740
def _CheckNodeNotDrained(lu, node):
741
  """Ensure that a given node is not drained.
742

743
  @param lu: the LU on behalf of which we make the check
744
  @param node: the node to check
745
  @raise errors.OpPrereqError: if the node is drained
746

747
  """
748
  if lu.cfg.GetNodeInfo(node).drained:
749
    raise errors.OpPrereqError("Can't use drained node %s" % node,
750
                               errors.ECODE_STATE)
751

    
752

    
753
def _CheckNodeVmCapable(lu, node):
754
  """Ensure that a given node is vm capable.
755

756
  @param lu: the LU on behalf of which we make the check
757
  @param node: the node to check
758
  @raise errors.OpPrereqError: if the node is not vm capable
759

760
  """
761
  if not lu.cfg.GetNodeInfo(node).vm_capable:
762
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
763
                               errors.ECODE_STATE)
764

    
765

    
766
def _CheckNodeHasOS(lu, node, os_name, force_variant):
767
  """Ensure that a node supports a given OS.
768

769
  @param lu: the LU on behalf of which we make the check
770
  @param node: the node to check
771
  @param os_name: the OS to query about
772
  @param force_variant: whether to ignore variant errors
773
  @raise errors.OpPrereqError: if the node is not supporting the OS
774

775
  """
776
  result = lu.rpc.call_os_get(node, os_name)
777
  result.Raise("OS '%s' not in supported OS list for node %s" %
778
               (os_name, node),
779
               prereq=True, ecode=errors.ECODE_INVAL)
780
  if not force_variant:
781
    _CheckOSVariant(result.payload, os_name)
782

    
783

    
784
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
785
  """Ensure that a node has the given secondary ip.
786

787
  @type lu: L{LogicalUnit}
788
  @param lu: the LU on behalf of which we make the check
789
  @type node: string
790
  @param node: the node to check
791
  @type secondary_ip: string
792
  @param secondary_ip: the ip to check
793
  @type prereq: boolean
794
  @param prereq: whether to throw a prerequisite or an execute error
795
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
796
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
797

798
  """
799
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
800
  result.Raise("Failure checking secondary ip on node %s" % node,
801
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
802
  if not result.payload:
803
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
804
           " please fix and re-run this command" % secondary_ip)
805
    if prereq:
806
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
807
    else:
808
      raise errors.OpExecError(msg)
809

    
810

    
811
def _GetClusterDomainSecret():
812
  """Reads the cluster domain secret.
813

814
  """
815
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
816
                               strict=True)
817

    
818

    
819
def _CheckInstanceDown(lu, instance, reason):
820
  """Ensure that an instance is not running."""
821
  if instance.admin_up:
822
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
823
                               (instance.name, reason), errors.ECODE_STATE)
824

    
825
  pnode = instance.primary_node
826
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
827
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
828
              prereq=True, ecode=errors.ECODE_ENVIRON)
829

    
830
  if instance.name in ins_l.payload:
831
    raise errors.OpPrereqError("Instance %s is running, %s" %
832
                               (instance.name, reason), errors.ECODE_STATE)
833

    
834

    
835
def _ExpandItemName(fn, name, kind):
836
  """Expand an item name.
837

838
  @param fn: the function to use for expansion
839
  @param name: requested item name
840
  @param kind: text description ('Node' or 'Instance')
841
  @return: the resolved (full) name
842
  @raise errors.OpPrereqError: if the item is not found
843

844
  """
845
  full_name = fn(name)
846
  if full_name is None:
847
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
848
                               errors.ECODE_NOENT)
849
  return full_name
850

    
851

    
852
def _ExpandNodeName(cfg, name):
853
  """Wrapper over L{_ExpandItemName} for nodes."""
854
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
855

    
856

    
857
def _ExpandInstanceName(cfg, name):
858
  """Wrapper over L{_ExpandItemName} for instance."""
859
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
860

    
861

    
862
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
863
                          memory, vcpus, nics, disk_template, disks,
864
                          bep, hvp, hypervisor_name, tags):
865
  """Builds instance related env variables for hooks
866

867
  This builds the hook environment from individual variables.
868

869
  @type name: string
870
  @param name: the name of the instance
871
  @type primary_node: string
872
  @param primary_node: the name of the instance's primary node
873
  @type secondary_nodes: list
874
  @param secondary_nodes: list of secondary nodes as strings
875
  @type os_type: string
876
  @param os_type: the name of the instance's OS
877
  @type status: boolean
878
  @param status: the should_run status of the instance
879
  @type memory: string
880
  @param memory: the memory size of the instance
881
  @type vcpus: string
882
  @param vcpus: the count of VCPUs the instance has
883
  @type nics: list
884
  @param nics: list of tuples (ip, mac, mode, link) representing
885
      the NICs the instance has
886
  @type disk_template: string
887
  @param disk_template: the disk template of the instance
888
  @type disks: list
889
  @param disks: the list of (size, mode) pairs
890
  @type bep: dict
891
  @param bep: the backend parameters for the instance
892
  @type hvp: dict
893
  @param hvp: the hypervisor parameters for the instance
894
  @type hypervisor_name: string
895
  @param hypervisor_name: the hypervisor for the instance
896
  @type tags: list
897
  @param tags: list of instance tags as strings
898
  @rtype: dict
899
  @return: the hook environment for this instance
900

901
  """
902
  if status:
903
    str_status = "up"
904
  else:
905
    str_status = "down"
906
  env = {
907
    "OP_TARGET": name,
908
    "INSTANCE_NAME": name,
909
    "INSTANCE_PRIMARY": primary_node,
910
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
911
    "INSTANCE_OS_TYPE": os_type,
912
    "INSTANCE_STATUS": str_status,
913
    "INSTANCE_MEMORY": memory,
914
    "INSTANCE_VCPUS": vcpus,
915
    "INSTANCE_DISK_TEMPLATE": disk_template,
916
    "INSTANCE_HYPERVISOR": hypervisor_name,
917
  }
918

    
919
  if nics:
920
    nic_count = len(nics)
921
    for idx, (ip, mac, mode, link) in enumerate(nics):
922
      if ip is None:
923
        ip = ""
924
      env["INSTANCE_NIC%d_IP" % idx] = ip
925
      env["INSTANCE_NIC%d_MAC" % idx] = mac
926
      env["INSTANCE_NIC%d_MODE" % idx] = mode
927
      env["INSTANCE_NIC%d_LINK" % idx] = link
928
      if mode == constants.NIC_MODE_BRIDGED:
929
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
930
  else:
931
    nic_count = 0
932

    
933
  env["INSTANCE_NIC_COUNT"] = nic_count
934

    
935
  if disks:
936
    disk_count = len(disks)
937
    for idx, (size, mode) in enumerate(disks):
938
      env["INSTANCE_DISK%d_SIZE" % idx] = size
939
      env["INSTANCE_DISK%d_MODE" % idx] = mode
940
  else:
941
    disk_count = 0
942

    
943
  env["INSTANCE_DISK_COUNT"] = disk_count
944

    
945
  if not tags:
946
    tags = []
947

    
948
  env["INSTANCE_TAGS"] = " ".join(tags)
949

    
950
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
951
    for key, value in source.items():
952
      env["INSTANCE_%s_%s" % (kind, key)] = value
953

    
954
  return env
955

    
956

    
957
def _NICListToTuple(lu, nics):
958
  """Build a list of nic information tuples.
959

960
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
961
  value in LUInstanceQueryData.
962

963
  @type lu:  L{LogicalUnit}
964
  @param lu: the logical unit on whose behalf we execute
965
  @type nics: list of L{objects.NIC}
966
  @param nics: list of nics to convert to hooks tuples
967

968
  """
969
  hooks_nics = []
970
  cluster = lu.cfg.GetClusterInfo()
971
  for nic in nics:
972
    ip = nic.ip
973
    mac = nic.mac
974
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
975
    mode = filled_params[constants.NIC_MODE]
976
    link = filled_params[constants.NIC_LINK]
977
    hooks_nics.append((ip, mac, mode, link))
978
  return hooks_nics
979

    
980

    
981
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
982
  """Builds instance related env variables for hooks from an object.
983

984
  @type lu: L{LogicalUnit}
985
  @param lu: the logical unit on whose behalf we execute
986
  @type instance: L{objects.Instance}
987
  @param instance: the instance for which we should build the
988
      environment
989
  @type override: dict
990
  @param override: dictionary with key/values that will override
991
      our values
992
  @rtype: dict
993
  @return: the hook environment dictionary
994

995
  """
996
  cluster = lu.cfg.GetClusterInfo()
997
  bep = cluster.FillBE(instance)
998
  hvp = cluster.FillHV(instance)
999
  args = {
1000
    'name': instance.name,
1001
    'primary_node': instance.primary_node,
1002
    'secondary_nodes': instance.secondary_nodes,
1003
    'os_type': instance.os,
1004
    'status': instance.admin_up,
1005
    'memory': bep[constants.BE_MEMORY],
1006
    'vcpus': bep[constants.BE_VCPUS],
1007
    'nics': _NICListToTuple(lu, instance.nics),
1008
    'disk_template': instance.disk_template,
1009
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
1010
    'bep': bep,
1011
    'hvp': hvp,
1012
    'hypervisor_name': instance.hypervisor,
1013
    'tags': instance.tags,
1014
  }
1015
  if override:
1016
    args.update(override)
1017
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1018

    
1019

    
1020
def _AdjustCandidatePool(lu, exceptions):
1021
  """Adjust the candidate pool after node operations.
1022

1023
  """
1024
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1025
  if mod_list:
1026
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1027
               utils.CommaJoin(node.name for node in mod_list))
1028
    for name in mod_list:
1029
      lu.context.ReaddNode(name)
1030
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1031
  if mc_now > mc_max:
1032
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1033
               (mc_now, mc_max))
1034

    
1035

    
1036
def _DecideSelfPromotion(lu, exceptions=None):
1037
  """Decide whether I should promote myself as a master candidate.
1038

1039
  """
1040
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1041
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1042
  # the new node will increase mc_max with one, so:
1043
  mc_should = min(mc_should + 1, cp_size)
1044
  return mc_now < mc_should
1045

    
1046

    
1047
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1048
  """Check that the brigdes needed by a list of nics exist.
1049

1050
  """
1051
  cluster = lu.cfg.GetClusterInfo()
1052
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1053
  brlist = [params[constants.NIC_LINK] for params in paramslist
1054
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1055
  if brlist:
1056
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1057
    result.Raise("Error checking bridges on destination node '%s'" %
1058
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1059

    
1060

    
1061
def _CheckInstanceBridgesExist(lu, instance, node=None):
1062
  """Check that the brigdes needed by an instance exist.
1063

1064
  """
1065
  if node is None:
1066
    node = instance.primary_node
1067
  _CheckNicsBridgesExist(lu, instance.nics, node)
1068

    
1069

    
1070
def _CheckOSVariant(os_obj, name):
1071
  """Check whether an OS name conforms to the os variants specification.
1072

1073
  @type os_obj: L{objects.OS}
1074
  @param os_obj: OS object to check
1075
  @type name: string
1076
  @param name: OS name passed by the user, to check for validity
1077

1078
  """
1079
  if not os_obj.supported_variants:
1080
    return
1081
  variant = objects.OS.GetVariant(name)
1082
  if not variant:
1083
    raise errors.OpPrereqError("OS name must include a variant",
1084
                               errors.ECODE_INVAL)
1085

    
1086
  if variant not in os_obj.supported_variants:
1087
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1088

    
1089

    
1090
def _GetNodeInstancesInner(cfg, fn):
1091
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1092

    
1093

    
1094
def _GetNodeInstances(cfg, node_name):
1095
  """Returns a list of all primary and secondary instances on a node.
1096

1097
  """
1098

    
1099
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1100

    
1101

    
1102
def _GetNodePrimaryInstances(cfg, node_name):
1103
  """Returns primary instances on a node.
1104

1105
  """
1106
  return _GetNodeInstancesInner(cfg,
1107
                                lambda inst: node_name == inst.primary_node)
1108

    
1109

    
1110
def _GetNodeSecondaryInstances(cfg, node_name):
1111
  """Returns secondary instances on a node.
1112

1113
  """
1114
  return _GetNodeInstancesInner(cfg,
1115
                                lambda inst: node_name in inst.secondary_nodes)
1116

    
1117

    
1118
def _GetStorageTypeArgs(cfg, storage_type):
1119
  """Returns the arguments for a storage type.
1120

1121
  """
1122
  # Special case for file storage
1123
  if storage_type == constants.ST_FILE:
1124
    # storage.FileStorage wants a list of storage directories
1125
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1126

    
1127
  return []
1128

    
1129

    
1130
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1131
  faulty = []
1132

    
1133
  for dev in instance.disks:
1134
    cfg.SetDiskID(dev, node_name)
1135

    
1136
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1137
  result.Raise("Failed to get disk status from node %s" % node_name,
1138
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1139

    
1140
  for idx, bdev_status in enumerate(result.payload):
1141
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1142
      faulty.append(idx)
1143

    
1144
  return faulty
1145

    
1146

    
1147
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1148
  """Check the sanity of iallocator and node arguments and use the
1149
  cluster-wide iallocator if appropriate.
1150

1151
  Check that at most one of (iallocator, node) is specified. If none is
1152
  specified, then the LU's opcode's iallocator slot is filled with the
1153
  cluster-wide default iallocator.
1154

1155
  @type iallocator_slot: string
1156
  @param iallocator_slot: the name of the opcode iallocator slot
1157
  @type node_slot: string
1158
  @param node_slot: the name of the opcode target node slot
1159

1160
  """
1161
  node = getattr(lu.op, node_slot, None)
1162
  iallocator = getattr(lu.op, iallocator_slot, None)
1163

    
1164
  if node is not None and iallocator is not None:
1165
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1166
                               errors.ECODE_INVAL)
1167
  elif node is None and iallocator is None:
1168
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1169
    if default_iallocator:
1170
      setattr(lu.op, iallocator_slot, default_iallocator)
1171
    else:
1172
      raise errors.OpPrereqError("No iallocator or node given and no"
1173
                                 " cluster-wide default iallocator found;"
1174
                                 " please specify either an iallocator or a"
1175
                                 " node, or set a cluster-wide default"
1176
                                 " iallocator")
1177

    
1178

    
1179
class LUClusterPostInit(LogicalUnit):
1180
  """Logical unit for running hooks after cluster initialization.
1181

1182
  """
1183
  HPATH = "cluster-init"
1184
  HTYPE = constants.HTYPE_CLUSTER
1185

    
1186
  def BuildHooksEnv(self):
1187
    """Build hooks env.
1188

1189
    """
1190
    return {
1191
      "OP_TARGET": self.cfg.GetClusterName(),
1192
      }
1193

    
1194
  def BuildHooksNodes(self):
1195
    """Build hooks nodes.
1196

1197
    """
1198
    return ([], [self.cfg.GetMasterNode()])
1199

    
1200
  def Exec(self, feedback_fn):
1201
    """Nothing to do.
1202

1203
    """
1204
    return True
1205

    
1206

    
1207
class LUClusterDestroy(LogicalUnit):
1208
  """Logical unit for destroying the cluster.
1209

1210
  """
1211
  HPATH = "cluster-destroy"
1212
  HTYPE = constants.HTYPE_CLUSTER
1213

    
1214
  def BuildHooksEnv(self):
1215
    """Build hooks env.
1216

1217
    """
1218
    return {
1219
      "OP_TARGET": self.cfg.GetClusterName(),
1220
      }
1221

    
1222
  def BuildHooksNodes(self):
1223
    """Build hooks nodes.
1224

1225
    """
1226
    return ([], [])
1227

    
1228
  def CheckPrereq(self):
1229
    """Check prerequisites.
1230

1231
    This checks whether the cluster is empty.
1232

1233
    Any errors are signaled by raising errors.OpPrereqError.
1234

1235
    """
1236
    master = self.cfg.GetMasterNode()
1237

    
1238
    nodelist = self.cfg.GetNodeList()
1239
    if len(nodelist) != 1 or nodelist[0] != master:
1240
      raise errors.OpPrereqError("There are still %d node(s) in"
1241
                                 " this cluster." % (len(nodelist) - 1),
1242
                                 errors.ECODE_INVAL)
1243
    instancelist = self.cfg.GetInstanceList()
1244
    if instancelist:
1245
      raise errors.OpPrereqError("There are still %d instance(s) in"
1246
                                 " this cluster." % len(instancelist),
1247
                                 errors.ECODE_INVAL)
1248

    
1249
  def Exec(self, feedback_fn):
1250
    """Destroys the cluster.
1251

1252
    """
1253
    master = self.cfg.GetMasterNode()
1254

    
1255
    # Run post hooks on master node before it's removed
1256
    _RunPostHook(self, master)
1257

    
1258
    result = self.rpc.call_node_stop_master(master, False)
1259
    result.Raise("Could not disable the master role")
1260

    
1261
    return master
1262

    
1263

    
1264
def _VerifyCertificate(filename):
1265
  """Verifies a certificate for LUClusterVerifyConfig.
1266

1267
  @type filename: string
1268
  @param filename: Path to PEM file
1269

1270
  """
1271
  try:
1272
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1273
                                           utils.ReadFile(filename))
1274
  except Exception, err: # pylint: disable-msg=W0703
1275
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1276
            "Failed to load X509 certificate %s: %s" % (filename, err))
1277

    
1278
  (errcode, msg) = \
1279
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1280
                                constants.SSL_CERT_EXPIRATION_ERROR)
1281

    
1282
  if msg:
1283
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1284
  else:
1285
    fnamemsg = None
1286

    
1287
  if errcode is None:
1288
    return (None, fnamemsg)
1289
  elif errcode == utils.CERT_WARNING:
1290
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1291
  elif errcode == utils.CERT_ERROR:
1292
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1293

    
1294
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1295

    
1296

    
1297
def _GetAllHypervisorParameters(cluster, instances):
1298
  """Compute the set of all hypervisor parameters.
1299

1300
  @type cluster: L{objects.Cluster}
1301
  @param cluster: the cluster object
1302
  @param instances: list of L{objects.Instance}
1303
  @param instances: additional instances from which to obtain parameters
1304
  @rtype: list of (origin, hypervisor, parameters)
1305
  @return: a list with all parameters found, indicating the hypervisor they
1306
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1307

1308
  """
1309
  hvp_data = []
1310

    
1311
  for hv_name in cluster.enabled_hypervisors:
1312
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1313

    
1314
  for os_name, os_hvp in cluster.os_hvp.items():
1315
    for hv_name, hv_params in os_hvp.items():
1316
      if hv_params:
1317
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1318
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1319

    
1320
  # TODO: collapse identical parameter values in a single one
1321
  for instance in instances:
1322
    if instance.hvparams:
1323
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1324
                       cluster.FillHV(instance)))
1325

    
1326
  return hvp_data
1327

    
1328

    
1329
class _VerifyErrors(object):
1330
  """Mix-in for cluster/group verify LUs.
1331

1332
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1333
  self.op and self._feedback_fn to be available.)
1334

1335
  """
1336
  TCLUSTER = "cluster"
1337
  TNODE = "node"
1338
  TINSTANCE = "instance"
1339

    
1340
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1341
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1342
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1343
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1344
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1345
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1346
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1347
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1348
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1349
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1350
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1351
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1352
  ENODEDRBD = (TNODE, "ENODEDRBD")
1353
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1354
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1355
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1356
  ENODEHV = (TNODE, "ENODEHV")
1357
  ENODELVM = (TNODE, "ENODELVM")
1358
  ENODEN1 = (TNODE, "ENODEN1")
1359
  ENODENET = (TNODE, "ENODENET")
1360
  ENODEOS = (TNODE, "ENODEOS")
1361
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1362
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1363
  ENODERPC = (TNODE, "ENODERPC")
1364
  ENODESSH = (TNODE, "ENODESSH")
1365
  ENODEVERSION = (TNODE, "ENODEVERSION")
1366
  ENODESETUP = (TNODE, "ENODESETUP")
1367
  ENODETIME = (TNODE, "ENODETIME")
1368
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1369

    
1370
  ETYPE_FIELD = "code"
1371
  ETYPE_ERROR = "ERROR"
1372
  ETYPE_WARNING = "WARNING"
1373

    
1374
  def _Error(self, ecode, item, msg, *args, **kwargs):
1375
    """Format an error message.
1376

1377
    Based on the opcode's error_codes parameter, either format a
1378
    parseable error code, or a simpler error string.
1379

1380
    This must be called only from Exec and functions called from Exec.
1381

1382
    """
1383
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1384
    itype, etxt = ecode
1385
    # first complete the msg
1386
    if args:
1387
      msg = msg % args
1388
    # then format the whole message
1389
    if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1390
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1391
    else:
1392
      if item:
1393
        item = " " + item
1394
      else:
1395
        item = ""
1396
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1397
    # and finally report it via the feedback_fn
1398
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1399

    
1400
  def _ErrorIf(self, cond, *args, **kwargs):
1401
    """Log an error message if the passed condition is True.
1402

1403
    """
1404
    cond = (bool(cond)
1405
            or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1406
    if cond:
1407
      self._Error(*args, **kwargs)
1408
    # do not mark the operation as failed for WARN cases only
1409
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1410
      self.bad = self.bad or cond
1411

    
1412

    
1413
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1414
  """Verifies the cluster config.
1415

1416
  """
1417
  REQ_BGL = False
1418

    
1419
  def _VerifyHVP(self, hvp_data):
1420
    """Verifies locally the syntax of the hypervisor parameters.
1421

1422
    """
1423
    for item, hv_name, hv_params in hvp_data:
1424
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1425
             (item, hv_name))
1426
      try:
1427
        hv_class = hypervisor.GetHypervisor(hv_name)
1428
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1429
        hv_class.CheckParameterSyntax(hv_params)
1430
      except errors.GenericError, err:
1431
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1432

    
1433
  def ExpandNames(self):
1434
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1435
    self.all_node_info = self.cfg.GetAllNodesInfo()
1436
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1437
    self.needed_locks = {}
1438

    
1439
  def Exec(self, feedback_fn):
1440
    """Verify integrity of cluster, performing various test on nodes.
1441

1442
    """
1443
    self.bad = False
1444
    self._feedback_fn = feedback_fn
1445

    
1446
    feedback_fn("* Verifying cluster config")
1447

    
1448
    for msg in self.cfg.VerifyConfig():
1449
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1450

    
1451
    feedback_fn("* Verifying cluster certificate files")
1452

    
1453
    for cert_filename in constants.ALL_CERT_FILES:
1454
      (errcode, msg) = _VerifyCertificate(cert_filename)
1455
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1456

    
1457
    feedback_fn("* Verifying hypervisor parameters")
1458

    
1459
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1460
                                                self.all_inst_info.values()))
1461

    
1462
    feedback_fn("* Verifying all nodes belong to an existing group")
1463

    
1464
    # We do this verification here because, should this bogus circumstance
1465
    # occur, it would never be catched by VerifyGroup, which only acts on
1466
    # nodes/instances reachable from existing node groups.
1467

    
1468
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1469
                         if node.group not in self.all_group_info)
1470

    
1471
    dangling_instances = {}
1472
    no_node_instances = []
1473

    
1474
    for inst in self.all_inst_info.values():
1475
      if inst.primary_node in dangling_nodes:
1476
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1477
      elif inst.primary_node not in self.all_node_info:
1478
        no_node_instances.append(inst.name)
1479

    
1480
    pretty_dangling = [
1481
        "%s (%s)" %
1482
        (node.name,
1483
         utils.CommaJoin(dangling_instances.get(node.name,
1484
                                                ["no instances"])))
1485
        for node in dangling_nodes]
1486

    
1487
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1488
                  "the following nodes (and their instances) belong to a non"
1489
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1490

    
1491
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1492
                  "the following instances have a non-existing primary-node:"
1493
                  " %s", utils.CommaJoin(no_node_instances))
1494

    
1495
    return (not self.bad, [g.name for g in self.all_group_info.values()])
1496

    
1497

    
1498
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1499
  """Verifies the status of a node group.
1500

1501
  """
1502
  HPATH = "cluster-verify"
1503
  HTYPE = constants.HTYPE_CLUSTER
1504
  REQ_BGL = False
1505

    
1506
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1507

    
1508
  class NodeImage(object):
1509
    """A class representing the logical and physical status of a node.
1510

1511
    @type name: string
1512
    @ivar name: the node name to which this object refers
1513
    @ivar volumes: a structure as returned from
1514
        L{ganeti.backend.GetVolumeList} (runtime)
1515
    @ivar instances: a list of running instances (runtime)
1516
    @ivar pinst: list of configured primary instances (config)
1517
    @ivar sinst: list of configured secondary instances (config)
1518
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1519
        instances for which this node is secondary (config)
1520
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1521
    @ivar dfree: free disk, as reported by the node (runtime)
1522
    @ivar offline: the offline status (config)
1523
    @type rpc_fail: boolean
1524
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1525
        not whether the individual keys were correct) (runtime)
1526
    @type lvm_fail: boolean
1527
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1528
    @type hyp_fail: boolean
1529
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1530
    @type ghost: boolean
1531
    @ivar ghost: whether this is a known node or not (config)
1532
    @type os_fail: boolean
1533
    @ivar os_fail: whether the RPC call didn't return valid OS data
1534
    @type oslist: list
1535
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1536
    @type vm_capable: boolean
1537
    @ivar vm_capable: whether the node can host instances
1538

1539
    """
1540
    def __init__(self, offline=False, name=None, vm_capable=True):
1541
      self.name = name
1542
      self.volumes = {}
1543
      self.instances = []
1544
      self.pinst = []
1545
      self.sinst = []
1546
      self.sbp = {}
1547
      self.mfree = 0
1548
      self.dfree = 0
1549
      self.offline = offline
1550
      self.vm_capable = vm_capable
1551
      self.rpc_fail = False
1552
      self.lvm_fail = False
1553
      self.hyp_fail = False
1554
      self.ghost = False
1555
      self.os_fail = False
1556
      self.oslist = {}
1557

    
1558
  def ExpandNames(self):
1559
    # This raises errors.OpPrereqError on its own:
1560
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1561

    
1562
    all_node_info = self.cfg.GetAllNodesInfo()
1563
    all_inst_info = self.cfg.GetAllInstancesInfo()
1564

    
1565
    node_names = set(node.name
1566
                     for node in all_node_info.values()
1567
                     if node.group == self.group_uuid)
1568

    
1569
    inst_names = [inst.name
1570
                  for inst in all_inst_info.values()
1571
                  if inst.primary_node in node_names]
1572

    
1573
    # In Exec(), we warn about mirrored instances that have primary and
1574
    # secondary living in separate node groups. To fully verify that
1575
    # volumes for these instances are healthy, we will need to do an
1576
    # extra call to their secondaries. We ensure here those nodes will
1577
    # be locked.
1578
    for inst in inst_names:
1579
      if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1580
        node_names.update(all_inst_info[inst].secondary_nodes)
1581

    
1582
    self.needed_locks = {
1583
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1584
      locking.LEVEL_NODE: list(node_names),
1585
      locking.LEVEL_INSTANCE: inst_names,
1586
    }
1587

    
1588
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1589

    
1590
  def CheckPrereq(self):
1591
    self.all_node_info = self.cfg.GetAllNodesInfo()
1592
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1593

    
1594
    group_nodes = set(node.name
1595
                      for node in self.all_node_info.values()
1596
                      if node.group == self.group_uuid)
1597

    
1598
    group_instances = set(inst.name
1599
                          for inst in self.all_inst_info.values()
1600
                          if inst.primary_node in group_nodes)
1601

    
1602
    unlocked_nodes = \
1603
        group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1604

    
1605
    unlocked_instances = \
1606
        group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
1607

    
1608
    if unlocked_nodes:
1609
      raise errors.OpPrereqError("missing lock for nodes: %s" %
1610
                                 utils.CommaJoin(unlocked_nodes))
1611

    
1612
    if unlocked_instances:
1613
      raise errors.OpPrereqError("missing lock for instances: %s" %
1614
                                 utils.CommaJoin(unlocked_instances))
1615

    
1616
    self.my_node_names = utils.NiceSort(group_nodes)
1617
    self.my_inst_names = utils.NiceSort(group_instances)
1618

    
1619
    self.my_node_info = dict((name, self.all_node_info[name])
1620
                             for name in self.my_node_names)
1621

    
1622
    self.my_inst_info = dict((name, self.all_inst_info[name])
1623
                             for name in self.my_inst_names)
1624

    
1625
    # We detect here the nodes that will need the extra RPC calls for verifying
1626
    # split LV volumes; they should be locked.
1627
    extra_lv_nodes = set()
1628

    
1629
    for inst in self.my_inst_info.values():
1630
      if inst.disk_template in constants.DTS_INT_MIRROR:
1631
        group = self.my_node_info[inst.primary_node].group
1632
        for nname in inst.secondary_nodes:
1633
          if self.all_node_info[nname].group != group:
1634
            extra_lv_nodes.add(nname)
1635

    
1636
    unlocked_lv_nodes = \
1637
        extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1638

    
1639
    if unlocked_lv_nodes:
1640
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1641
                                 utils.CommaJoin(unlocked_lv_nodes))
1642
    self.extra_lv_nodes = list(extra_lv_nodes)
1643

    
1644
  def _VerifyNode(self, ninfo, nresult):
1645
    """Perform some basic validation on data returned from a node.
1646

1647
      - check the result data structure is well formed and has all the
1648
        mandatory fields
1649
      - check ganeti version
1650

1651
    @type ninfo: L{objects.Node}
1652
    @param ninfo: the node to check
1653
    @param nresult: the results from the node
1654
    @rtype: boolean
1655
    @return: whether overall this call was successful (and we can expect
1656
         reasonable values in the respose)
1657

1658
    """
1659
    node = ninfo.name
1660
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1661

    
1662
    # main result, nresult should be a non-empty dict
1663
    test = not nresult or not isinstance(nresult, dict)
1664
    _ErrorIf(test, self.ENODERPC, node,
1665
                  "unable to verify node: no data returned")
1666
    if test:
1667
      return False
1668

    
1669
    # compares ganeti version
1670
    local_version = constants.PROTOCOL_VERSION
1671
    remote_version = nresult.get("version", None)
1672
    test = not (remote_version and
1673
                isinstance(remote_version, (list, tuple)) and
1674
                len(remote_version) == 2)
1675
    _ErrorIf(test, self.ENODERPC, node,
1676
             "connection to node returned invalid data")
1677
    if test:
1678
      return False
1679

    
1680
    test = local_version != remote_version[0]
1681
    _ErrorIf(test, self.ENODEVERSION, node,
1682
             "incompatible protocol versions: master %s,"
1683
             " node %s", local_version, remote_version[0])
1684
    if test:
1685
      return False
1686

    
1687
    # node seems compatible, we can actually try to look into its results
1688

    
1689
    # full package version
1690
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1691
                  self.ENODEVERSION, node,
1692
                  "software version mismatch: master %s, node %s",
1693
                  constants.RELEASE_VERSION, remote_version[1],
1694
                  code=self.ETYPE_WARNING)
1695

    
1696
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1697
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1698
      for hv_name, hv_result in hyp_result.iteritems():
1699
        test = hv_result is not None
1700
        _ErrorIf(test, self.ENODEHV, node,
1701
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1702

    
1703
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1704
    if ninfo.vm_capable and isinstance(hvp_result, list):
1705
      for item, hv_name, hv_result in hvp_result:
1706
        _ErrorIf(True, self.ENODEHV, node,
1707
                 "hypervisor %s parameter verify failure (source %s): %s",
1708
                 hv_name, item, hv_result)
1709

    
1710
    test = nresult.get(constants.NV_NODESETUP,
1711
                       ["Missing NODESETUP results"])
1712
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1713
             "; ".join(test))
1714

    
1715
    return True
1716

    
1717
  def _VerifyNodeTime(self, ninfo, nresult,
1718
                      nvinfo_starttime, nvinfo_endtime):
1719
    """Check the node time.
1720

1721
    @type ninfo: L{objects.Node}
1722
    @param ninfo: the node to check
1723
    @param nresult: the remote results for the node
1724
    @param nvinfo_starttime: the start time of the RPC call
1725
    @param nvinfo_endtime: the end time of the RPC call
1726

1727
    """
1728
    node = ninfo.name
1729
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1730

    
1731
    ntime = nresult.get(constants.NV_TIME, None)
1732
    try:
1733
      ntime_merged = utils.MergeTime(ntime)
1734
    except (ValueError, TypeError):
1735
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1736
      return
1737

    
1738
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1739
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1740
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1741
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1742
    else:
1743
      ntime_diff = None
1744

    
1745
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1746
             "Node time diverges by at least %s from master node time",
1747
             ntime_diff)
1748

    
1749
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1750
    """Check the node LVM results.
1751

1752
    @type ninfo: L{objects.Node}
1753
    @param ninfo: the node to check
1754
    @param nresult: the remote results for the node
1755
    @param vg_name: the configured VG name
1756

1757
    """
1758
    if vg_name is None:
1759
      return
1760

    
1761
    node = ninfo.name
1762
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1763

    
1764
    # checks vg existence and size > 20G
1765
    vglist = nresult.get(constants.NV_VGLIST, None)
1766
    test = not vglist
1767
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1768
    if not test:
1769
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1770
                                            constants.MIN_VG_SIZE)
1771
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1772

    
1773
    # check pv names
1774
    pvlist = nresult.get(constants.NV_PVLIST, None)
1775
    test = pvlist is None
1776
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1777
    if not test:
1778
      # check that ':' is not present in PV names, since it's a
1779
      # special character for lvcreate (denotes the range of PEs to
1780
      # use on the PV)
1781
      for _, pvname, owner_vg in pvlist:
1782
        test = ":" in pvname
1783
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1784
                 " '%s' of VG '%s'", pvname, owner_vg)
1785

    
1786
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1787
    """Check the node bridges.
1788

1789
    @type ninfo: L{objects.Node}
1790
    @param ninfo: the node to check
1791
    @param nresult: the remote results for the node
1792
    @param bridges: the expected list of bridges
1793

1794
    """
1795
    if not bridges:
1796
      return
1797

    
1798
    node = ninfo.name
1799
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1800

    
1801
    missing = nresult.get(constants.NV_BRIDGES, None)
1802
    test = not isinstance(missing, list)
1803
    _ErrorIf(test, self.ENODENET, node,
1804
             "did not return valid bridge information")
1805
    if not test:
1806
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1807
               utils.CommaJoin(sorted(missing)))
1808

    
1809
  def _VerifyNodeNetwork(self, ninfo, nresult):
1810
    """Check the node network connectivity results.
1811

1812
    @type ninfo: L{objects.Node}
1813
    @param ninfo: the node to check
1814
    @param nresult: the remote results for the node
1815

1816
    """
1817
    node = ninfo.name
1818
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1819

    
1820
    test = constants.NV_NODELIST not in nresult
1821
    _ErrorIf(test, self.ENODESSH, node,
1822
             "node hasn't returned node ssh connectivity data")
1823
    if not test:
1824
      if nresult[constants.NV_NODELIST]:
1825
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1826
          _ErrorIf(True, self.ENODESSH, node,
1827
                   "ssh communication with node '%s': %s", a_node, a_msg)
1828

    
1829
    test = constants.NV_NODENETTEST not in nresult
1830
    _ErrorIf(test, self.ENODENET, node,
1831
             "node hasn't returned node tcp connectivity data")
1832
    if not test:
1833
      if nresult[constants.NV_NODENETTEST]:
1834
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1835
        for anode in nlist:
1836
          _ErrorIf(True, self.ENODENET, node,
1837
                   "tcp communication with node '%s': %s",
1838
                   anode, nresult[constants.NV_NODENETTEST][anode])
1839

    
1840
    test = constants.NV_MASTERIP not in nresult
1841
    _ErrorIf(test, self.ENODENET, node,
1842
             "node hasn't returned node master IP reachability data")
1843
    if not test:
1844
      if not nresult[constants.NV_MASTERIP]:
1845
        if node == self.master_node:
1846
          msg = "the master node cannot reach the master IP (not configured?)"
1847
        else:
1848
          msg = "cannot reach the master IP"
1849
        _ErrorIf(True, self.ENODENET, node, msg)
1850

    
1851
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1852
                      diskstatus):
1853
    """Verify an instance.
1854

1855
    This function checks to see if the required block devices are
1856
    available on the instance's node.
1857

1858
    """
1859
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1860
    node_current = instanceconfig.primary_node
1861

    
1862
    node_vol_should = {}
1863
    instanceconfig.MapLVsByNode(node_vol_should)
1864

    
1865
    for node in node_vol_should:
1866
      n_img = node_image[node]
1867
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1868
        # ignore missing volumes on offline or broken nodes
1869
        continue
1870
      for volume in node_vol_should[node]:
1871
        test = volume not in n_img.volumes
1872
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1873
                 "volume %s missing on node %s", volume, node)
1874

    
1875
    if instanceconfig.admin_up:
1876
      pri_img = node_image[node_current]
1877
      test = instance not in pri_img.instances and not pri_img.offline
1878
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1879
               "instance not running on its primary node %s",
1880
               node_current)
1881

    
1882
    diskdata = [(nname, success, status, idx)
1883
                for (nname, disks) in diskstatus.items()
1884
                for idx, (success, status) in enumerate(disks)]
1885

    
1886
    for nname, success, bdev_status, idx in diskdata:
1887
      # the 'ghost node' construction in Exec() ensures that we have a
1888
      # node here
1889
      snode = node_image[nname]
1890
      bad_snode = snode.ghost or snode.offline
1891
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1892
               self.EINSTANCEFAULTYDISK, instance,
1893
               "couldn't retrieve status for disk/%s on %s: %s",
1894
               idx, nname, bdev_status)
1895
      _ErrorIf((instanceconfig.admin_up and success and
1896
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1897
               self.EINSTANCEFAULTYDISK, instance,
1898
               "disk/%s on %s is faulty", idx, nname)
1899

    
1900
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1901
    """Verify if there are any unknown volumes in the cluster.
1902

1903
    The .os, .swap and backup volumes are ignored. All other volumes are
1904
    reported as unknown.
1905

1906
    @type reserved: L{ganeti.utils.FieldSet}
1907
    @param reserved: a FieldSet of reserved volume names
1908

1909
    """
1910
    for node, n_img in node_image.items():
1911
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1912
        # skip non-healthy nodes
1913
        continue
1914
      for volume in n_img.volumes:
1915
        test = ((node not in node_vol_should or
1916
                volume not in node_vol_should[node]) and
1917
                not reserved.Matches(volume))
1918
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1919
                      "volume %s is unknown", volume)
1920

    
1921
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1922
    """Verify N+1 Memory Resilience.
1923

1924
    Check that if one single node dies we can still start all the
1925
    instances it was primary for.
1926

1927
    """
1928
    cluster_info = self.cfg.GetClusterInfo()
1929
    for node, n_img in node_image.items():
1930
      # This code checks that every node which is now listed as
1931
      # secondary has enough memory to host all instances it is
1932
      # supposed to should a single other node in the cluster fail.
1933
      # FIXME: not ready for failover to an arbitrary node
1934
      # FIXME: does not support file-backed instances
1935
      # WARNING: we currently take into account down instances as well
1936
      # as up ones, considering that even if they're down someone
1937
      # might want to start them even in the event of a node failure.
1938
      if n_img.offline:
1939
        # we're skipping offline nodes from the N+1 warning, since
1940
        # most likely we don't have good memory infromation from them;
1941
        # we already list instances living on such nodes, and that's
1942
        # enough warning
1943
        continue
1944
      for prinode, instances in n_img.sbp.items():
1945
        needed_mem = 0
1946
        for instance in instances:
1947
          bep = cluster_info.FillBE(instance_cfg[instance])
1948
          if bep[constants.BE_AUTO_BALANCE]:
1949
            needed_mem += bep[constants.BE_MEMORY]
1950
        test = n_img.mfree < needed_mem
1951
        self._ErrorIf(test, self.ENODEN1, node,
1952
                      "not enough memory to accomodate instance failovers"
1953
                      " should node %s fail (%dMiB needed, %dMiB available)",
1954
                      prinode, needed_mem, n_img.mfree)
1955

    
1956
  @classmethod
1957
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1958
                   (files_all, files_all_opt, files_mc, files_vm)):
1959
    """Verifies file checksums collected from all nodes.
1960

1961
    @param errorif: Callback for reporting errors
1962
    @param nodeinfo: List of L{objects.Node} objects
1963
    @param master_node: Name of master node
1964
    @param all_nvinfo: RPC results
1965

1966
    """
1967
    node_names = frozenset(node.name for node in nodeinfo)
1968

    
1969
    assert master_node in node_names
1970
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1971
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1972
           "Found file listed in more than one file list"
1973

    
1974
    # Define functions determining which nodes to consider for a file
1975
    file2nodefn = dict([(filename, fn)
1976
      for (files, fn) in [(files_all, None),
1977
                          (files_all_opt, None),
1978
                          (files_mc, lambda node: (node.master_candidate or
1979
                                                   node.name == master_node)),
1980
                          (files_vm, lambda node: node.vm_capable)]
1981
      for filename in files])
1982

    
1983
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1984

    
1985
    for node in nodeinfo:
1986
      nresult = all_nvinfo[node.name]
1987

    
1988
      if nresult.fail_msg or not nresult.payload:
1989
        node_files = None
1990
      else:
1991
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
1992

    
1993
      test = not (node_files and isinstance(node_files, dict))
1994
      errorif(test, cls.ENODEFILECHECK, node.name,
1995
              "Node did not return file checksum data")
1996
      if test:
1997
        continue
1998

    
1999
      for (filename, checksum) in node_files.items():
2000
        # Check if the file should be considered for a node
2001
        fn = file2nodefn[filename]
2002
        if fn is None or fn(node):
2003
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
2004

    
2005
    for (filename, checksums) in fileinfo.items():
2006
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2007

    
2008
      # Nodes having the file
2009
      with_file = frozenset(node_name
2010
                            for nodes in fileinfo[filename].values()
2011
                            for node_name in nodes)
2012

    
2013
      # Nodes missing file
2014
      missing_file = node_names - with_file
2015

    
2016
      if filename in files_all_opt:
2017
        # All or no nodes
2018
        errorif(missing_file and missing_file != node_names,
2019
                cls.ECLUSTERFILECHECK, None,
2020
                "File %s is optional, but it must exist on all or no nodes (not"
2021
                " found on %s)",
2022
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2023
      else:
2024
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2025
                "File %s is missing from node(s) %s", filename,
2026
                utils.CommaJoin(utils.NiceSort(missing_file)))
2027

    
2028
      # See if there are multiple versions of the file
2029
      test = len(checksums) > 1
2030
      if test:
2031
        variants = ["variant %s on %s" %
2032
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2033
                    for (idx, (checksum, nodes)) in
2034
                      enumerate(sorted(checksums.items()))]
2035
      else:
2036
        variants = []
2037

    
2038
      errorif(test, cls.ECLUSTERFILECHECK, None,
2039
              "File %s found with %s different checksums (%s)",
2040
              filename, len(checksums), "; ".join(variants))
2041

    
2042
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2043
                      drbd_map):
2044
    """Verifies and the node DRBD status.
2045

2046
    @type ninfo: L{objects.Node}
2047
    @param ninfo: the node to check
2048
    @param nresult: the remote results for the node
2049
    @param instanceinfo: the dict of instances
2050
    @param drbd_helper: the configured DRBD usermode helper
2051
    @param drbd_map: the DRBD map as returned by
2052
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2053

2054
    """
2055
    node = ninfo.name
2056
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2057

    
2058
    if drbd_helper:
2059
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2060
      test = (helper_result == None)
2061
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2062
               "no drbd usermode helper returned")
2063
      if helper_result:
2064
        status, payload = helper_result
2065
        test = not status
2066
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2067
                 "drbd usermode helper check unsuccessful: %s", payload)
2068
        test = status and (payload != drbd_helper)
2069
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2070
                 "wrong drbd usermode helper: %s", payload)
2071

    
2072
    # compute the DRBD minors
2073
    node_drbd = {}
2074
    for minor, instance in drbd_map[node].items():
2075
      test = instance not in instanceinfo
2076
      _ErrorIf(test, self.ECLUSTERCFG, None,
2077
               "ghost instance '%s' in temporary DRBD map", instance)
2078
        # ghost instance should not be running, but otherwise we
2079
        # don't give double warnings (both ghost instance and
2080
        # unallocated minor in use)
2081
      if test:
2082
        node_drbd[minor] = (instance, False)
2083
      else:
2084
        instance = instanceinfo[instance]
2085
        node_drbd[minor] = (instance.name, instance.admin_up)
2086

    
2087
    # and now check them
2088
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2089
    test = not isinstance(used_minors, (tuple, list))
2090
    _ErrorIf(test, self.ENODEDRBD, node,
2091
             "cannot parse drbd status file: %s", str(used_minors))
2092
    if test:
2093
      # we cannot check drbd status
2094
      return
2095

    
2096
    for minor, (iname, must_exist) in node_drbd.items():
2097
      test = minor not in used_minors and must_exist
2098
      _ErrorIf(test, self.ENODEDRBD, node,
2099
               "drbd minor %d of instance %s is not active", minor, iname)
2100
    for minor in used_minors:
2101
      test = minor not in node_drbd
2102
      _ErrorIf(test, self.ENODEDRBD, node,
2103
               "unallocated drbd minor %d is in use", minor)
2104

    
2105
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2106
    """Builds the node OS structures.
2107

2108
    @type ninfo: L{objects.Node}
2109
    @param ninfo: the node to check
2110
    @param nresult: the remote results for the node
2111
    @param nimg: the node image object
2112

2113
    """
2114
    node = ninfo.name
2115
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2116

    
2117
    remote_os = nresult.get(constants.NV_OSLIST, None)
2118
    test = (not isinstance(remote_os, list) or
2119
            not compat.all(isinstance(v, list) and len(v) == 7
2120
                           for v in remote_os))
2121

    
2122
    _ErrorIf(test, self.ENODEOS, node,
2123
             "node hasn't returned valid OS data")
2124

    
2125
    nimg.os_fail = test
2126

    
2127
    if test:
2128
      return
2129

    
2130
    os_dict = {}
2131

    
2132
    for (name, os_path, status, diagnose,
2133
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2134

    
2135
      if name not in os_dict:
2136
        os_dict[name] = []
2137

    
2138
      # parameters is a list of lists instead of list of tuples due to
2139
      # JSON lacking a real tuple type, fix it:
2140
      parameters = [tuple(v) for v in parameters]
2141
      os_dict[name].append((os_path, status, diagnose,
2142
                            set(variants), set(parameters), set(api_ver)))
2143

    
2144
    nimg.oslist = os_dict
2145

    
2146
  def _VerifyNodeOS(self, ninfo, nimg, base):
2147
    """Verifies the node OS list.
2148

2149
    @type ninfo: L{objects.Node}
2150
    @param ninfo: the node to check
2151
    @param nimg: the node image object
2152
    @param base: the 'template' node we match against (e.g. from the master)
2153

2154
    """
2155
    node = ninfo.name
2156
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2157

    
2158
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2159

    
2160
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2161
    for os_name, os_data in nimg.oslist.items():
2162
      assert os_data, "Empty OS status for OS %s?!" % os_name
2163
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2164
      _ErrorIf(not f_status, self.ENODEOS, node,
2165
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2166
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2167
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2168
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2169
      # this will catched in backend too
2170
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
2171
               and not f_var, self.ENODEOS, node,
2172
               "OS %s with API at least %d does not declare any variant",
2173
               os_name, constants.OS_API_V15)
2174
      # comparisons with the 'base' image
2175
      test = os_name not in base.oslist
2176
      _ErrorIf(test, self.ENODEOS, node,
2177
               "Extra OS %s not present on reference node (%s)",
2178
               os_name, base.name)
2179
      if test:
2180
        continue
2181
      assert base.oslist[os_name], "Base node has empty OS status?"
2182
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2183
      if not b_status:
2184
        # base OS is invalid, skipping
2185
        continue
2186
      for kind, a, b in [("API version", f_api, b_api),
2187
                         ("variants list", f_var, b_var),
2188
                         ("parameters", beautify_params(f_param),
2189
                          beautify_params(b_param))]:
2190
        _ErrorIf(a != b, self.ENODEOS, node,
2191
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2192
                 kind, os_name, base.name,
2193
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2194

    
2195
    # check any missing OSes
2196
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2197
    _ErrorIf(missing, self.ENODEOS, node,
2198
             "OSes present on reference node %s but missing on this node: %s",
2199
             base.name, utils.CommaJoin(missing))
2200

    
2201
  def _VerifyOob(self, ninfo, nresult):
2202
    """Verifies out of band functionality of a node.
2203

2204
    @type ninfo: L{objects.Node}
2205
    @param ninfo: the node to check
2206
    @param nresult: the remote results for the node
2207

2208
    """
2209
    node = ninfo.name
2210
    # We just have to verify the paths on master and/or master candidates
2211
    # as the oob helper is invoked on the master
2212
    if ((ninfo.master_candidate or ninfo.master_capable) and
2213
        constants.NV_OOB_PATHS in nresult):
2214
      for path_result in nresult[constants.NV_OOB_PATHS]:
2215
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2216

    
2217
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2218
    """Verifies and updates the node volume data.
2219

2220
    This function will update a L{NodeImage}'s internal structures
2221
    with data from the remote call.
2222

2223
    @type ninfo: L{objects.Node}
2224
    @param ninfo: the node to check
2225
    @param nresult: the remote results for the node
2226
    @param nimg: the node image object
2227
    @param vg_name: the configured VG name
2228

2229
    """
2230
    node = ninfo.name
2231
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2232

    
2233
    nimg.lvm_fail = True
2234
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2235
    if vg_name is None:
2236
      pass
2237
    elif isinstance(lvdata, basestring):
2238
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2239
               utils.SafeEncode(lvdata))
2240
    elif not isinstance(lvdata, dict):
2241
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2242
    else:
2243
      nimg.volumes = lvdata
2244
      nimg.lvm_fail = False
2245

    
2246
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2247
    """Verifies and updates the node instance list.
2248

2249
    If the listing was successful, then updates this node's instance
2250
    list. Otherwise, it marks the RPC call as failed for the instance
2251
    list key.
2252

2253
    @type ninfo: L{objects.Node}
2254
    @param ninfo: the node to check
2255
    @param nresult: the remote results for the node
2256
    @param nimg: the node image object
2257

2258
    """
2259
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2260
    test = not isinstance(idata, list)
2261
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2262
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2263
    if test:
2264
      nimg.hyp_fail = True
2265
    else:
2266
      nimg.instances = idata
2267

    
2268
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2269
    """Verifies and computes a node information map
2270

2271
    @type ninfo: L{objects.Node}
2272
    @param ninfo: the node to check
2273
    @param nresult: the remote results for the node
2274
    @param nimg: the node image object
2275
    @param vg_name: the configured VG name
2276

2277
    """
2278
    node = ninfo.name
2279
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2280

    
2281
    # try to read free memory (from the hypervisor)
2282
    hv_info = nresult.get(constants.NV_HVINFO, None)
2283
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2284
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2285
    if not test:
2286
      try:
2287
        nimg.mfree = int(hv_info["memory_free"])
2288
      except (ValueError, TypeError):
2289
        _ErrorIf(True, self.ENODERPC, node,
2290
                 "node returned invalid nodeinfo, check hypervisor")
2291

    
2292
    # FIXME: devise a free space model for file based instances as well
2293
    if vg_name is not None:
2294
      test = (constants.NV_VGLIST not in nresult or
2295
              vg_name not in nresult[constants.NV_VGLIST])
2296
      _ErrorIf(test, self.ENODELVM, node,
2297
               "node didn't return data for the volume group '%s'"
2298
               " - it is either missing or broken", vg_name)
2299
      if not test:
2300
        try:
2301
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2302
        except (ValueError, TypeError):
2303
          _ErrorIf(True, self.ENODERPC, node,
2304
                   "node returned invalid LVM info, check LVM status")
2305

    
2306
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2307
    """Gets per-disk status information for all instances.
2308

2309
    @type nodelist: list of strings
2310
    @param nodelist: Node names
2311
    @type node_image: dict of (name, L{objects.Node})
2312
    @param node_image: Node objects
2313
    @type instanceinfo: dict of (name, L{objects.Instance})
2314
    @param instanceinfo: Instance objects
2315
    @rtype: {instance: {node: [(succes, payload)]}}
2316
    @return: a dictionary of per-instance dictionaries with nodes as
2317
        keys and disk information as values; the disk information is a
2318
        list of tuples (success, payload)
2319

2320
    """
2321
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2322

    
2323
    node_disks = {}
2324
    node_disks_devonly = {}
2325
    diskless_instances = set()
2326
    diskless = constants.DT_DISKLESS
2327

    
2328
    for nname in nodelist:
2329
      node_instances = list(itertools.chain(node_image[nname].pinst,
2330
                                            node_image[nname].sinst))
2331
      diskless_instances.update(inst for inst in node_instances
2332
                                if instanceinfo[inst].disk_template == diskless)
2333
      disks = [(inst, disk)
2334
               for inst in node_instances
2335
               for disk in instanceinfo[inst].disks]
2336

    
2337
      if not disks:
2338
        # No need to collect data
2339
        continue
2340

    
2341
      node_disks[nname] = disks
2342

    
2343
      # Creating copies as SetDiskID below will modify the objects and that can
2344
      # lead to incorrect data returned from nodes
2345
      devonly = [dev.Copy() for (_, dev) in disks]
2346

    
2347
      for dev in devonly:
2348
        self.cfg.SetDiskID(dev, nname)
2349

    
2350
      node_disks_devonly[nname] = devonly
2351

    
2352
    assert len(node_disks) == len(node_disks_devonly)
2353

    
2354
    # Collect data from all nodes with disks
2355
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2356
                                                          node_disks_devonly)
2357

    
2358
    assert len(result) == len(node_disks)
2359

    
2360
    instdisk = {}
2361

    
2362
    for (nname, nres) in result.items():
2363
      disks = node_disks[nname]
2364

    
2365
      if nres.offline:
2366
        # No data from this node
2367
        data = len(disks) * [(False, "node offline")]
2368
      else:
2369
        msg = nres.fail_msg
2370
        _ErrorIf(msg, self.ENODERPC, nname,
2371
                 "while getting disk information: %s", msg)
2372
        if msg:
2373
          # No data from this node
2374
          data = len(disks) * [(False, msg)]
2375
        else:
2376
          data = []
2377
          for idx, i in enumerate(nres.payload):
2378
            if isinstance(i, (tuple, list)) and len(i) == 2:
2379
              data.append(i)
2380
            else:
2381
              logging.warning("Invalid result from node %s, entry %d: %s",
2382
                              nname, idx, i)
2383
              data.append((False, "Invalid result from the remote node"))
2384

    
2385
      for ((inst, _), status) in zip(disks, data):
2386
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2387

    
2388
    # Add empty entries for diskless instances.
2389
    for inst in diskless_instances:
2390
      assert inst not in instdisk
2391
      instdisk[inst] = {}
2392

    
2393
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2394
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2395
                      compat.all(isinstance(s, (tuple, list)) and
2396
                                 len(s) == 2 for s in statuses)
2397
                      for inst, nnames in instdisk.items()
2398
                      for nname, statuses in nnames.items())
2399
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2400

    
2401
    return instdisk
2402

    
2403
  def BuildHooksEnv(self):
2404
    """Build hooks env.
2405

2406
    Cluster-Verify hooks just ran in the post phase and their failure makes
2407
    the output be logged in the verify output and the verification to fail.
2408

2409
    """
2410
    env = {
2411
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2412
      }
2413

    
2414
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2415
               for node in self.my_node_info.values())
2416

    
2417
    return env
2418

    
2419
  def BuildHooksNodes(self):
2420
    """Build hooks nodes.
2421

2422
    """
2423
    assert self.my_node_names, ("Node list not gathered,"
2424
      " has CheckPrereq been executed?")
2425
    return ([], self.my_node_names)
2426

    
2427
  def Exec(self, feedback_fn):
2428
    """Verify integrity of the node group, performing various test on nodes.
2429

2430
    """
2431
    # This method has too many local variables. pylint: disable-msg=R0914
2432
    self.bad = False
2433
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2434
    verbose = self.op.verbose
2435
    self._feedback_fn = feedback_fn
2436

    
2437
    vg_name = self.cfg.GetVGName()
2438
    drbd_helper = self.cfg.GetDRBDHelper()
2439
    cluster = self.cfg.GetClusterInfo()
2440
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2441
    hypervisors = cluster.enabled_hypervisors
2442
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2443

    
2444
    i_non_redundant = [] # Non redundant instances
2445
    i_non_a_balanced = [] # Non auto-balanced instances
2446
    n_offline = 0 # Count of offline nodes
2447
    n_drained = 0 # Count of nodes being drained
2448
    node_vol_should = {}
2449

    
2450
    # FIXME: verify OS list
2451

    
2452
    # File verification
2453
    filemap = _ComputeAncillaryFiles(cluster, False)
2454

    
2455
    # do local checksums
2456
    master_node = self.master_node = self.cfg.GetMasterNode()
2457
    master_ip = self.cfg.GetMasterIP()
2458

    
2459
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2460

    
2461
    # We will make nodes contact all nodes in their group, and one node from
2462
    # every other group.
2463
    # TODO: should it be a *random* node, different every time?
2464
    online_nodes = [node.name for node in node_data_list if not node.offline]
2465
    other_group_nodes = {}
2466

    
2467
    for name in sorted(self.all_node_info):
2468
      node = self.all_node_info[name]
2469
      if (node.group not in other_group_nodes
2470
          and node.group != self.group_uuid
2471
          and not node.offline):
2472
        other_group_nodes[node.group] = node.name
2473

    
2474
    node_verify_param = {
2475
      constants.NV_FILELIST:
2476
        utils.UniqueSequence(filename
2477
                             for files in filemap
2478
                             for filename in files),
2479
      constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2480
      constants.NV_HYPERVISOR: hypervisors,
2481
      constants.NV_HVPARAMS:
2482
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2483
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2484
                                 for node in node_data_list
2485
                                 if not node.offline],
2486
      constants.NV_INSTANCELIST: hypervisors,
2487
      constants.NV_VERSION: None,
2488
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2489
      constants.NV_NODESETUP: None,
2490
      constants.NV_TIME: None,
2491
      constants.NV_MASTERIP: (master_node, master_ip),
2492
      constants.NV_OSLIST: None,
2493
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2494
      }
2495

    
2496
    if vg_name is not None:
2497
      node_verify_param[constants.NV_VGLIST] = None
2498
      node_verify_param[constants.NV_LVLIST] = vg_name
2499
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2500
      node_verify_param[constants.NV_DRBDLIST] = None
2501

    
2502
    if drbd_helper:
2503
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2504

    
2505
    # bridge checks
2506
    # FIXME: this needs to be changed per node-group, not cluster-wide
2507
    bridges = set()
2508
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2509
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2510
      bridges.add(default_nicpp[constants.NIC_LINK])
2511
    for instance in self.my_inst_info.values():
2512
      for nic in instance.nics:
2513
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2514
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2515
          bridges.add(full_nic[constants.NIC_LINK])
2516

    
2517
    if bridges:
2518
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2519

    
2520
    # Build our expected cluster state
2521
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2522
                                                 name=node.name,
2523
                                                 vm_capable=node.vm_capable))
2524
                      for node in node_data_list)
2525

    
2526
    # Gather OOB paths
2527
    oob_paths = []
2528
    for node in self.all_node_info.values():
2529
      path = _SupportsOob(self.cfg, node)
2530
      if path and path not in oob_paths:
2531
        oob_paths.append(path)
2532

    
2533
    if oob_paths:
2534
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2535

    
2536
    for instance in self.my_inst_names:
2537
      inst_config = self.my_inst_info[instance]
2538

    
2539
      for nname in inst_config.all_nodes:
2540
        if nname not in node_image:
2541
          gnode = self.NodeImage(name=nname)
2542
          gnode.ghost = (nname not in self.all_node_info)
2543
          node_image[nname] = gnode
2544

    
2545
      inst_config.MapLVsByNode(node_vol_should)
2546

    
2547
      pnode = inst_config.primary_node
2548
      node_image[pnode].pinst.append(instance)
2549

    
2550
      for snode in inst_config.secondary_nodes:
2551
        nimg = node_image[snode]
2552
        nimg.sinst.append(instance)
2553
        if pnode not in nimg.sbp:
2554
          nimg.sbp[pnode] = []
2555
        nimg.sbp[pnode].append(instance)
2556

    
2557
    # At this point, we have the in-memory data structures complete,
2558
    # except for the runtime information, which we'll gather next
2559

    
2560
    # Due to the way our RPC system works, exact response times cannot be
2561
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2562
    # time before and after executing the request, we can at least have a time
2563
    # window.
2564
    nvinfo_starttime = time.time()
2565
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2566
                                           node_verify_param,
2567
                                           self.cfg.GetClusterName())
2568
    if self.extra_lv_nodes and vg_name is not None:
2569
      extra_lv_nvinfo = \
2570
          self.rpc.call_node_verify(self.extra_lv_nodes,
2571
                                    {constants.NV_LVLIST: vg_name},
2572
                                    self.cfg.GetClusterName())
2573
    else:
2574
      extra_lv_nvinfo = {}
2575
    nvinfo_endtime = time.time()
2576

    
2577
    all_drbd_map = self.cfg.ComputeDRBDMap()
2578

    
2579
    feedback_fn("* Gathering disk information (%s nodes)" %
2580
                len(self.my_node_names))
2581
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2582
                                     self.my_inst_info)
2583

    
2584
    feedback_fn("* Verifying configuration file consistency")
2585

    
2586
    # If not all nodes are being checked, we need to make sure the master node
2587
    # and a non-checked vm_capable node are in the list.
2588
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2589
    if absent_nodes:
2590
      vf_nvinfo = all_nvinfo.copy()
2591
      vf_node_info = list(self.my_node_info.values())
2592
      additional_nodes = []
2593
      if master_node not in self.my_node_info:
2594
        additional_nodes.append(master_node)
2595
        vf_node_info.append(self.all_node_info[master_node])
2596
      # Add the first vm_capable node we find which is not included
2597
      for node in absent_nodes:
2598
        nodeinfo = self.all_node_info[node]
2599
        if nodeinfo.vm_capable and not nodeinfo.offline:
2600
          additional_nodes.append(node)
2601
          vf_node_info.append(self.all_node_info[node])
2602
          break
2603
      key = constants.NV_FILELIST
2604
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2605
                                                 {key: node_verify_param[key]},
2606
                                                 self.cfg.GetClusterName()))
2607
    else:
2608
      vf_nvinfo = all_nvinfo
2609
      vf_node_info = self.my_node_info.values()
2610

    
2611
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2612

    
2613
    feedback_fn("* Verifying node status")
2614

    
2615
    refos_img = None
2616

    
2617
    for node_i in node_data_list:
2618
      node = node_i.name
2619
      nimg = node_image[node]
2620

    
2621
      if node_i.offline:
2622
        if verbose:
2623
          feedback_fn("* Skipping offline node %s" % (node,))
2624
        n_offline += 1
2625
        continue
2626

    
2627
      if node == master_node:
2628
        ntype = "master"
2629
      elif node_i.master_candidate:
2630
        ntype = "master candidate"
2631
      elif node_i.drained:
2632
        ntype = "drained"
2633
        n_drained += 1
2634
      else:
2635
        ntype = "regular"
2636
      if verbose:
2637
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2638

    
2639
      msg = all_nvinfo[node].fail_msg
2640
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2641
      if msg:
2642
        nimg.rpc_fail = True
2643
        continue
2644

    
2645
      nresult = all_nvinfo[node].payload
2646

    
2647
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2648
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2649
      self._VerifyNodeNetwork(node_i, nresult)
2650
      self._VerifyOob(node_i, nresult)
2651

    
2652
      if nimg.vm_capable:
2653
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2654
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2655
                             all_drbd_map)
2656

    
2657
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2658
        self._UpdateNodeInstances(node_i, nresult, nimg)
2659
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2660
        self._UpdateNodeOS(node_i, nresult, nimg)
2661

    
2662
        if not nimg.os_fail:
2663
          if refos_img is None:
2664
            refos_img = nimg
2665
          self._VerifyNodeOS(node_i, nimg, refos_img)
2666
        self._VerifyNodeBridges(node_i, nresult, bridges)
2667

    
2668
        # Check whether all running instancies are primary for the node. (This
2669
        # can no longer be done from _VerifyInstance below, since some of the
2670
        # wrong instances could be from other node groups.)
2671
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2672

    
2673
        for inst in non_primary_inst:
2674
          test = inst in self.all_inst_info
2675
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2676
                   "instance should not run on node %s", node_i.name)
2677
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2678
                   "node is running unknown instance %s", inst)
2679

    
2680
    for node, result in extra_lv_nvinfo.items():
2681
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2682
                              node_image[node], vg_name)
2683

    
2684
    feedback_fn("* Verifying instance status")
2685
    for instance in self.my_inst_names:
2686
      if verbose:
2687
        feedback_fn("* Verifying instance %s" % instance)
2688
      inst_config = self.my_inst_info[instance]
2689
      self._VerifyInstance(instance, inst_config, node_image,
2690
                           instdisk[instance])
2691
      inst_nodes_offline = []
2692

    
2693
      pnode = inst_config.primary_node
2694
      pnode_img = node_image[pnode]
2695
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2696
               self.ENODERPC, pnode, "instance %s, connection to"
2697
               " primary node failed", instance)
2698

    
2699
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2700
               self.EINSTANCEBADNODE, instance,
2701
               "instance is marked as running and lives on offline node %s",
2702
               inst_config.primary_node)
2703

    
2704
      # If the instance is non-redundant we cannot survive losing its primary
2705
      # node, so we are not N+1 compliant. On the other hand we have no disk
2706
      # templates with more than one secondary so that situation is not well
2707
      # supported either.
2708
      # FIXME: does not support file-backed instances
2709
      if not inst_config.secondary_nodes:
2710
        i_non_redundant.append(instance)
2711

    
2712
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2713
               instance, "instance has multiple secondary nodes: %s",
2714
               utils.CommaJoin(inst_config.secondary_nodes),
2715
               code=self.ETYPE_WARNING)
2716

    
2717
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2718
        pnode = inst_config.primary_node
2719
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2720
        instance_groups = {}
2721

    
2722
        for node in instance_nodes:
2723
          instance_groups.setdefault(self.all_node_info[node].group,
2724
                                     []).append(node)
2725

    
2726
        pretty_list = [
2727
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2728
          # Sort so that we always list the primary node first.
2729
          for group, nodes in sorted(instance_groups.items(),
2730
                                     key=lambda (_, nodes): pnode in nodes,
2731
                                     reverse=True)]
2732

    
2733
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2734
                      instance, "instance has primary and secondary nodes in"
2735
                      " different groups: %s", utils.CommaJoin(pretty_list),
2736
                      code=self.ETYPE_WARNING)
2737

    
2738
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2739
        i_non_a_balanced.append(instance)
2740

    
2741
      for snode in inst_config.secondary_nodes:
2742
        s_img = node_image[snode]
2743
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2744
                 "instance %s, connection to secondary node failed", instance)
2745

    
2746
        if s_img.offline:
2747
          inst_nodes_offline.append(snode)
2748

    
2749
      # warn that the instance lives on offline nodes
2750
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2751
               "instance has offline secondary node(s) %s",
2752
               utils.CommaJoin(inst_nodes_offline))
2753
      # ... or ghost/non-vm_capable nodes
2754
      for node in inst_config.all_nodes:
2755
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2756
                 "instance lives on ghost node %s", node)
2757
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2758
                 instance, "instance lives on non-vm_capable node %s", node)
2759

    
2760
    feedback_fn("* Verifying orphan volumes")
2761
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2762

    
2763
    # We will get spurious "unknown volume" warnings if any node of this group
2764
    # is secondary for an instance whose primary is in another group. To avoid
2765
    # them, we find these instances and add their volumes to node_vol_should.
2766
    for inst in self.all_inst_info.values():
2767
      for secondary in inst.secondary_nodes:
2768
        if (secondary in self.my_node_info
2769
            and inst.name not in self.my_inst_info):
2770
          inst.MapLVsByNode(node_vol_should)
2771
          break
2772

    
2773
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2774

    
2775
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2776
      feedback_fn("* Verifying N+1 Memory redundancy")
2777
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2778

    
2779
    feedback_fn("* Other Notes")
2780
    if i_non_redundant:
2781
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2782
                  % len(i_non_redundant))
2783

    
2784
    if i_non_a_balanced:
2785
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2786
                  % len(i_non_a_balanced))
2787

    
2788
    if n_offline:
2789
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2790

    
2791
    if n_drained:
2792
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2793

    
2794
    return not self.bad
2795

    
2796
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2797
    """Analyze the post-hooks' result
2798

2799
    This method analyses the hook result, handles it, and sends some
2800
    nicely-formatted feedback back to the user.
2801

2802
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2803
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2804
    @param hooks_results: the results of the multi-node hooks rpc call
2805
    @param feedback_fn: function used send feedback back to the caller
2806
    @param lu_result: previous Exec result
2807
    @return: the new Exec result, based on the previous result
2808
        and hook results
2809

2810
    """
2811
    # We only really run POST phase hooks, and are only interested in
2812
    # their results
2813
    if phase == constants.HOOKS_PHASE_POST:
2814
      # Used to change hooks' output to proper indentation
2815
      feedback_fn("* Hooks Results")
2816
      assert hooks_results, "invalid result from hooks"
2817

    
2818
      for node_name in hooks_results:
2819
        res = hooks_results[node_name]
2820
        msg = res.fail_msg
2821
        test = msg and not res.offline
2822
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2823
                      "Communication failure in hooks execution: %s", msg)
2824
        if res.offline or msg:
2825
          # No need to investigate payload if node is offline or gave an error.
2826
          # override manually lu_result here as _ErrorIf only
2827
          # overrides self.bad
2828
          lu_result = 1
2829
          continue
2830
        for script, hkr, output in res.payload:
2831
          test = hkr == constants.HKR_FAIL
2832
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2833
                        "Script %s failed, output:", script)
2834
          if test:
2835
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2836
            feedback_fn("%s" % output)
2837
            lu_result = 0
2838

    
2839
      return lu_result
2840

    
2841

    
2842
class LUClusterVerifyDisks(NoHooksLU):
2843
  """Verifies the cluster disks status.
2844

2845
  """
2846
  REQ_BGL = False
2847

    
2848
  def ExpandNames(self):
2849
    self.needed_locks = {
2850
      locking.LEVEL_NODE: locking.ALL_SET,
2851
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2852
    }
2853
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2854

    
2855
  def Exec(self, feedback_fn):
2856
    """Verify integrity of cluster disks.
2857

2858
    @rtype: tuple of three items
2859
    @return: a tuple of (dict of node-to-node_error, list of instances
2860
        which need activate-disks, dict of instance: (node, volume) for
2861
        missing volumes
2862

2863
    """
2864
    result = res_nodes, res_instances, res_missing = {}, [], {}
2865

    
2866
    nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2867
    instances = self.cfg.GetAllInstancesInfo().values()
2868

    
2869
    nv_dict = {}
2870
    for inst in instances:
2871
      inst_lvs = {}
2872
      if not inst.admin_up:
2873
        continue
2874
      inst.MapLVsByNode(inst_lvs)
2875
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2876
      for node, vol_list in inst_lvs.iteritems():
2877
        for vol in vol_list:
2878
          nv_dict[(node, vol)] = inst
2879

    
2880
    if not nv_dict:
2881
      return result
2882

    
2883
    node_lvs = self.rpc.call_lv_list(nodes, [])
2884
    for node, node_res in node_lvs.items():
2885
      if node_res.offline:
2886
        continue
2887
      msg = node_res.fail_msg
2888
      if msg:
2889
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2890
        res_nodes[node] = msg
2891
        continue
2892

    
2893
      lvs = node_res.payload
2894
      for lv_name, (_, _, lv_online) in lvs.items():
2895
        inst = nv_dict.pop((node, lv_name), None)
2896
        if (not lv_online and inst is not None
2897
            and inst.name not in res_instances):
2898
          res_instances.append(inst.name)
2899

    
2900
    # any leftover items in nv_dict are missing LVs, let's arrange the
2901
    # data better
2902
    for key, inst in nv_dict.iteritems():
2903
      if inst.name not in res_missing:
2904
        res_missing[inst.name] = []
2905
      res_missing[inst.name].append(key)
2906

    
2907
    return result
2908

    
2909

    
2910
class LUClusterRepairDiskSizes(NoHooksLU):
2911
  """Verifies the cluster disks sizes.
2912

2913
  """
2914
  REQ_BGL = False
2915

    
2916
  def ExpandNames(self):
2917
    if self.op.instances:
2918
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
2919
      self.needed_locks = {
2920
        locking.LEVEL_NODE: [],
2921
        locking.LEVEL_INSTANCE: self.wanted_names,
2922
        }
2923
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2924
    else:
2925
      self.wanted_names = None
2926
      self.needed_locks = {
2927
        locking.LEVEL_NODE: locking.ALL_SET,
2928
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2929
        }
2930
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2931

    
2932
  def DeclareLocks(self, level):
2933
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2934
      self._LockInstancesNodes(primary_only=True)
2935

    
2936
  def CheckPrereq(self):
2937
    """Check prerequisites.
2938

2939
    This only checks the optional instance list against the existing names.
2940

2941
    """
2942
    if self.wanted_names is None:
2943
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
2944

    
2945
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2946
                             in self.wanted_names]
2947

    
2948
  def _EnsureChildSizes(self, disk):
2949
    """Ensure children of the disk have the needed disk size.
2950

2951
    This is valid mainly for DRBD8 and fixes an issue where the
2952
    children have smaller disk size.
2953

2954
    @param disk: an L{ganeti.objects.Disk} object
2955

2956
    """
2957
    if disk.dev_type == constants.LD_DRBD8:
2958
      assert disk.children, "Empty children for DRBD8?"
2959
      fchild = disk.children[0]
2960
      mismatch = fchild.size < disk.size
2961
      if mismatch:
2962
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2963
                     fchild.size, disk.size)
2964
        fchild.size = disk.size
2965

    
2966
      # and we recurse on this child only, not on the metadev
2967
      return self._EnsureChildSizes(fchild) or mismatch
2968
    else:
2969
      return False
2970

    
2971
  def Exec(self, feedback_fn):
2972
    """Verify the size of cluster disks.
2973

2974
    """
2975
    # TODO: check child disks too
2976
    # TODO: check differences in size between primary/secondary nodes
2977
    per_node_disks = {}
2978
    for instance in self.wanted_instances:
2979
      pnode = instance.primary_node
2980
      if pnode not in per_node_disks:
2981
        per_node_disks[pnode] = []
2982
      for idx, disk in enumerate(instance.disks):
2983
        per_node_disks[pnode].append((instance, idx, disk))
2984

    
2985
    changed = []
2986
    for node, dskl in per_node_disks.items():
2987
      newl = [v[2].Copy() for v in dskl]
2988
      for dsk in newl:
2989
        self.cfg.SetDiskID(dsk, node)
2990
      result = self.rpc.call_blockdev_getsize(node, newl)
2991
      if result.fail_msg:
2992
        self.LogWarning("Failure in blockdev_getsize call to node"
2993
                        " %s, ignoring", node)
2994
        continue
2995
      if len(result.payload) != len(dskl):
2996
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
2997
                        " result.payload=%s", node, len(dskl), result.payload)
2998
        self.LogWarning("Invalid result from node %s, ignoring node results",
2999
                        node)
3000
        continue
3001
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3002
        if size is None:
3003
          self.LogWarning("Disk %d of instance %s did not return size"
3004
                          " information, ignoring", idx, instance.name)
3005
          continue
3006
        if not isinstance(size, (int, long)):
3007
          self.LogWarning("Disk %d of instance %s did not return valid"
3008
                          " size information, ignoring", idx, instance.name)
3009
          continue
3010
        size = size >> 20
3011
        if size != disk.size:
3012
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3013
                       " correcting: recorded %d, actual %d", idx,
3014
                       instance.name, disk.size, size)
3015
          disk.size = size
3016
          self.cfg.Update(instance, feedback_fn)
3017
          changed.append((instance.name, idx, size))
3018
        if self._EnsureChildSizes(disk):
3019
          self.cfg.Update(instance, feedback_fn)
3020
          changed.append((instance.name, idx, disk.size))
3021
    return changed
3022

    
3023

    
3024
class LUClusterRename(LogicalUnit):
3025
  """Rename the cluster.
3026

3027
  """
3028
  HPATH = "cluster-rename"
3029
  HTYPE = constants.HTYPE_CLUSTER
3030

    
3031
  def BuildHooksEnv(self):
3032
    """Build hooks env.
3033

3034
    """
3035
    return {
3036
      "OP_TARGET": self.cfg.GetClusterName(),
3037
      "NEW_NAME": self.op.name,
3038
      }
3039

    
3040
  def BuildHooksNodes(self):
3041
    """Build hooks nodes.
3042

3043
    """
3044
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3045

    
3046
  def CheckPrereq(self):
3047
    """Verify that the passed name is a valid one.
3048

3049
    """
3050
    hostname = netutils.GetHostname(name=self.op.name,
3051
                                    family=self.cfg.GetPrimaryIPFamily())
3052

    
3053
    new_name = hostname.name
3054
    self.ip = new_ip = hostname.ip
3055
    old_name = self.cfg.GetClusterName()
3056
    old_ip = self.cfg.GetMasterIP()
3057
    if new_name == old_name and new_ip == old_ip:
3058
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3059
                                 " cluster has changed",
3060
                                 errors.ECODE_INVAL)
3061
    if new_ip != old_ip:
3062
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3063
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3064
                                   " reachable on the network" %
3065
                                   new_ip, errors.ECODE_NOTUNIQUE)
3066

    
3067
    self.op.name = new_name
3068

    
3069
  def Exec(self, feedback_fn):
3070
    """Rename the cluster.
3071

3072
    """
3073
    clustername = self.op.name
3074
    ip = self.ip
3075

    
3076
    # shutdown the master IP
3077
    master = self.cfg.GetMasterNode()
3078
    result = self.rpc.call_node_stop_master(master, False)
3079
    result.Raise("Could not disable the master role")
3080

    
3081
    try:
3082
      cluster = self.cfg.GetClusterInfo()
3083
      cluster.cluster_name = clustername
3084
      cluster.master_ip = ip
3085
      self.cfg.Update(cluster, feedback_fn)
3086

    
3087
      # update the known hosts file
3088
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3089
      node_list = self.cfg.GetOnlineNodeList()
3090
      try:
3091
        node_list.remove(master)
3092
      except ValueError:
3093
        pass
3094
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3095
    finally:
3096
      result = self.rpc.call_node_start_master(master, False, False)
3097
      msg = result.fail_msg
3098
      if msg:
3099
        self.LogWarning("Could not re-enable the master role on"
3100
                        " the master, please restart manually: %s", msg)
3101

    
3102
    return clustername
3103

    
3104

    
3105
class LUClusterSetParams(LogicalUnit):
3106
  """Change the parameters of the cluster.
3107

3108
  """
3109
  HPATH = "cluster-modify"
3110
  HTYPE = constants.HTYPE_CLUSTER
3111
  REQ_BGL = False
3112

    
3113
  def CheckArguments(self):
3114
    """Check parameters
3115

3116
    """
3117
    if self.op.uid_pool:
3118
      uidpool.CheckUidPool(self.op.uid_pool)
3119

    
3120
    if self.op.add_uids:
3121
      uidpool.CheckUidPool(self.op.add_uids)
3122

    
3123
    if self.op.remove_uids:
3124
      uidpool.CheckUidPool(self.op.remove_uids)
3125

    
3126
  def ExpandNames(self):
3127
    # FIXME: in the future maybe other cluster params won't require checking on
3128
    # all nodes to be modified.
3129
    self.needed_locks = {
3130
      locking.LEVEL_NODE: locking.ALL_SET,
3131
    }
3132
    self.share_locks[locking.LEVEL_NODE] = 1
3133

    
3134
  def BuildHooksEnv(self):
3135
    """Build hooks env.
3136

3137
    """
3138
    return {
3139
      "OP_TARGET": self.cfg.GetClusterName(),
3140
      "NEW_VG_NAME": self.op.vg_name,
3141
      }
3142

    
3143
  def BuildHooksNodes(self):
3144
    """Build hooks nodes.
3145

3146
    """
3147
    mn = self.cfg.GetMasterNode()
3148
    return ([mn], [mn])
3149

    
3150
  def CheckPrereq(self):
3151
    """Check prerequisites.
3152

3153
    This checks whether the given params don't conflict and
3154
    if the given volume group is valid.
3155

3156
    """
3157
    if self.op.vg_name is not None and not self.op.vg_name:
3158
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3159
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3160
                                   " instances exist", errors.ECODE_INVAL)
3161

    
3162
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3163
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3164
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3165
                                   " drbd-based instances exist",
3166
                                   errors.ECODE_INVAL)
3167

    
3168
    node_list = self.glm.list_owned(locking.LEVEL_NODE)
3169

    
3170
    # if vg_name not None, checks given volume group on all nodes
3171
    if self.op.vg_name:
3172
      vglist = self.rpc.call_vg_list(node_list)
3173
      for node in node_list:
3174
        msg = vglist[node].fail_msg
3175
        if msg:
3176
          # ignoring down node
3177
          self.LogWarning("Error while gathering data on node %s"
3178
                          " (ignoring node): %s", node, msg)
3179
          continue
3180
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3181
                                              self.op.vg_name,
3182
                                              constants.MIN_VG_SIZE)
3183
        if vgstatus:
3184
          raise errors.OpPrereqError("Error on node '%s': %s" %
3185
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3186

    
3187
    if self.op.drbd_helper:
3188
      # checks given drbd helper on all nodes
3189
      helpers = self.rpc.call_drbd_helper(node_list)
3190
      for node in node_list:
3191
        ninfo = self.cfg.GetNodeInfo(node)
3192
        if ninfo.offline:
3193
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3194
          continue
3195
        msg = helpers[node].fail_msg
3196
        if msg:
3197
          raise errors.OpPrereqError("Error checking drbd helper on node"
3198
                                     " '%s': %s" % (node, msg),
3199
                                     errors.ECODE_ENVIRON)
3200
        node_helper = helpers[node].payload
3201
        if node_helper != self.op.drbd_helper:
3202
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3203
                                     (node, node_helper), errors.ECODE_ENVIRON)
3204

    
3205
    self.cluster = cluster = self.cfg.GetClusterInfo()
3206
    # validate params changes
3207
    if self.op.beparams:
3208
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3209
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3210

    
3211
    if self.op.ndparams:
3212
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3213
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3214

    
3215
      # TODO: we need a more general way to handle resetting
3216
      # cluster-level parameters to default values
3217
      if self.new_ndparams["oob_program"] == "":
3218
        self.new_ndparams["oob_program"] = \
3219
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3220

    
3221
    if self.op.nicparams:
3222
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3223
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3224
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3225
      nic_errors = []
3226

    
3227
      # check all instances for consistency
3228
      for instance in self.cfg.GetAllInstancesInfo().values():
3229
        for nic_idx, nic in enumerate(instance.nics):
3230
          params_copy = copy.deepcopy(nic.nicparams)
3231
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3232

    
3233
          # check parameter syntax
3234
          try:
3235
            objects.NIC.CheckParameterSyntax(params_filled)
3236
          except errors.ConfigurationError, err:
3237
            nic_errors.append("Instance %s, nic/%d: %s" %
3238
                              (instance.name, nic_idx, err))
3239

    
3240
          # if we're moving instances to routed, check that they have an ip
3241
          target_mode = params_filled[constants.NIC_MODE]
3242
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3243
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3244
                              " address" % (instance.name, nic_idx))
3245
      if nic_errors:
3246
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3247
                                   "\n".join(nic_errors))
3248

    
3249
    # hypervisor list/parameters
3250
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3251
    if self.op.hvparams:
3252
      for hv_name, hv_dict in self.op.hvparams.items():
3253
        if hv_name not in self.new_hvparams:
3254
          self.new_hvparams[hv_name] = hv_dict
3255
        else:
3256
          self.new_hvparams[hv_name].update(hv_dict)
3257

    
3258
    # os hypervisor parameters
3259
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3260
    if self.op.os_hvp:
3261
      for os_name, hvs in self.op.os_hvp.items():
3262
        if os_name not in self.new_os_hvp:
3263
          self.new_os_hvp[os_name] = hvs
3264
        else:
3265
          for hv_name, hv_dict in hvs.items():
3266
            if hv_name not in self.new_os_hvp[os_name]:
3267
              self.new_os_hvp[os_name][hv_name] = hv_dict
3268
            else:
3269
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3270

    
3271
    # os parameters
3272
    self.new_osp = objects.FillDict(cluster.osparams, {})
3273
    if self.op.osparams:
3274
      for os_name, osp in self.op.osparams.items():
3275
        if os_name not in self.new_osp:
3276
          self.new_osp[os_name] = {}
3277

    
3278
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3279
                                                  use_none=True)
3280

    
3281
        if not self.new_osp[os_name]:
3282
          # we removed all parameters
3283
          del self.new_osp[os_name]
3284
        else:
3285
          # check the parameter validity (remote check)
3286
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3287
                         os_name, self.new_osp[os_name])
3288

    
3289
    # changes to the hypervisor list
3290
    if self.op.enabled_hypervisors is not None:
3291
      self.hv_list = self.op.enabled_hypervisors
3292
      for hv in self.hv_list:
3293
        # if the hypervisor doesn't already exist in the cluster
3294
        # hvparams, we initialize it to empty, and then (in both
3295
        # cases) we make sure to fill the defaults, as we might not
3296
        # have a complete defaults list if the hypervisor wasn't
3297
        # enabled before
3298
        if hv not in new_hvp:
3299
          new_hvp[hv] = {}
3300
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3301
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3302
    else:
3303
      self.hv_list = cluster.enabled_hypervisors
3304

    
3305
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3306
      # either the enabled list has changed, or the parameters have, validate
3307
      for hv_name, hv_params in self.new_hvparams.items():
3308
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3309
            (self.op.enabled_hypervisors and
3310
             hv_name in self.op.enabled_hypervisors)):
3311
          # either this is a new hypervisor, or its parameters have changed
3312
          hv_class = hypervisor.GetHypervisor(hv_name)
3313
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3314
          hv_class.CheckParameterSyntax(hv_params)
3315
          _CheckHVParams(self, node_list, hv_name, hv_params)
3316

    
3317
    if self.op.os_hvp:
3318
      # no need to check any newly-enabled hypervisors, since the
3319
      # defaults have already been checked in the above code-block
3320
      for os_name, os_hvp in self.new_os_hvp.items():
3321
        for hv_name, hv_params in os_hvp.items():
3322
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3323
          # we need to fill in the new os_hvp on top of the actual hv_p
3324
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3325
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3326
          hv_class = hypervisor.GetHypervisor(hv_name)
3327
          hv_class.CheckParameterSyntax(new_osp)
3328
          _CheckHVParams(self, node_list, hv_name, new_osp)
3329

    
3330
    if self.op.default_iallocator:
3331
      alloc_script = utils.FindFile(self.op.default_iallocator,
3332
                                    constants.IALLOCATOR_SEARCH_PATH,
3333
                                    os.path.isfile)
3334
      if alloc_script is None:
3335
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3336
                                   " specified" % self.op.default_iallocator,
3337
                                   errors.ECODE_INVAL)
3338

    
3339
  def Exec(self, feedback_fn):
3340
    """Change the parameters of the cluster.
3341

3342
    """
3343
    if self.op.vg_name is not None:
3344
      new_volume = self.op.vg_name
3345
      if not new_volume:
3346
        new_volume = None
3347
      if new_volume != self.cfg.GetVGName():
3348
        self.cfg.SetVGName(new_volume)
3349
      else:
3350
        feedback_fn("Cluster LVM configuration already in desired"
3351
                    " state, not changing")
3352
    if self.op.drbd_helper is not None:
3353
      new_helper = self.op.drbd_helper
3354
      if not new_helper:
3355
        new_helper = None
3356
      if new_helper != self.cfg.GetDRBDHelper():
3357
        self.cfg.SetDRBDHelper(new_helper)
3358
      else:
3359
        feedback_fn("Cluster DRBD helper already in desired state,"
3360
                    " not changing")
3361
    if self.op.hvparams:
3362
      self.cluster.hvparams = self.new_hvparams
3363
    if self.op.os_hvp:
3364
      self.cluster.os_hvp = self.new_os_hvp
3365
    if self.op.enabled_hypervisors is not None:
3366
      self.cluster.hvparams = self.new_hvparams
3367
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3368
    if self.op.beparams:
3369
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3370
    if self.op.nicparams:
3371
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3372
    if self.op.osparams:
3373
      self.cluster.osparams = self.new_osp
3374
    if self.op.ndparams:
3375
      self.cluster.ndparams = self.new_ndparams
3376

    
3377
    if self.op.candidate_pool_size is not None:
3378
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3379
      # we need to update the pool size here, otherwise the save will fail
3380
      _AdjustCandidatePool(self, [])
3381

    
3382
    if self.op.maintain_node_health is not None:
3383
      self.cluster.maintain_node_health = self.op.maintain_node_health
3384

    
3385
    if self.op.prealloc_wipe_disks is not None:
3386
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3387

    
3388
    if self.op.add_uids is not None:
3389
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3390

    
3391
    if self.op.remove_uids is not None:
3392
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3393

    
3394
    if self.op.uid_pool is not None:
3395
      self.cluster.uid_pool = self.op.uid_pool
3396

    
3397
    if self.op.default_iallocator is not None:
3398
      self.cluster.default_iallocator = self.op.default_iallocator
3399

    
3400
    if self.op.reserved_lvs is not None:
3401
      self.cluster.reserved_lvs = self.op.reserved_lvs
3402

    
3403
    def helper_os(aname, mods, desc):
3404
      desc += " OS list"
3405
      lst = getattr(self.cluster, aname)
3406
      for key, val in mods:
3407
        if key == constants.DDM_ADD:
3408
          if val in lst:
3409
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3410
          else:
3411
            lst.append(val)
3412
        elif key == constants.DDM_REMOVE:
3413
          if val in lst:
3414
            lst.remove(val)
3415
          else:
3416
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3417
        else:
3418
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3419

    
3420
    if self.op.hidden_os:
3421
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3422

    
3423
    if self.op.blacklisted_os:
3424
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3425

    
3426
    if self.op.master_netdev:
3427
      master = self.cfg.GetMasterNode()
3428
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3429
                  self.cluster.master_netdev)
3430
      result = self.rpc.call_node_stop_master(master, False)
3431
      result.Raise("Could not disable the master ip")
3432
      feedback_fn("Changing master_netdev from %s to %s" %
3433
                  (self.cluster.master_netdev, self.op.master_netdev))
3434
      self.cluster.master_netdev = self.op.master_netdev
3435

    
3436
    self.cfg.Update(self.cluster, feedback_fn)
3437

    
3438
    if self.op.master_netdev:
3439
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3440
                  self.op.master_netdev)
3441
      result = self.rpc.call_node_start_master(master, False, False)
3442
      if result.fail_msg:
3443
        self.LogWarning("Could not re-enable the master ip on"
3444
                        " the master, please restart manually: %s",
3445
                        result.fail_msg)
3446

    
3447

    
3448
def _UploadHelper(lu, nodes, fname):
3449
  """Helper for uploading a file and showing warnings.
3450

3451
  """
3452
  if os.path.exists(fname):
3453
    result = lu.rpc.call_upload_file(nodes, fname)
3454
    for to_node, to_result in result.items():
3455
      msg = to_result.fail_msg
3456
      if msg:
3457
        msg = ("Copy of file %s to node %s failed: %s" %
3458
               (fname, to_node, msg))
3459
        lu.proc.LogWarning(msg)
3460

    
3461

    
3462
def _ComputeAncillaryFiles(cluster, redist):
3463
  """Compute files external to Ganeti which need to be consistent.
3464

3465
  @type redist: boolean
3466
  @param redist: Whether to include files which need to be redistributed
3467

3468
  """
3469
  # Compute files for all nodes
3470
  files_all = set([
3471
    constants.SSH_KNOWN_HOSTS_FILE,
3472
    constants.CONFD_HMAC_KEY,
3473
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3474
    ])
3475

    
3476
  if not redist:
3477
    files_all.update(constants.ALL_CERT_FILES)
3478
    files_all.update(ssconf.SimpleStore().GetFileList())
3479

    
3480
  if cluster.modify_etc_hosts:
3481
    files_all.add(constants.ETC_HOSTS)
3482

    
3483
  # Files which must either exist on all nodes or on none
3484
  files_all_opt = set([
3485
    constants.RAPI_USERS_FILE,
3486
    ])
3487

    
3488
  # Files which should only be on master candidates
3489
  files_mc = set()
3490
  if not redist:
3491
    files_mc.add(constants.CLUSTER_CONF_FILE)
3492

    
3493
  # Files which should only be on VM-capable nodes
3494
  files_vm = set(filename
3495
    for hv_name in cluster.enabled_hypervisors
3496
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3497

    
3498
  # Filenames must be unique
3499
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3500
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3501
         "Found file listed in more than one file list"
3502

    
3503
  return (files_all, files_all_opt, files_mc, files_vm)
3504

    
3505

    
3506
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3507
  """Distribute additional files which are part of the cluster configuration.
3508

3509
  ConfigWriter takes care of distributing the config and ssconf files, but
3510
  there are more files which should be distributed to all nodes. This function
3511
  makes sure those are copied.
3512

3513
  @param lu: calling logical unit
3514
  @param additional_nodes: list of nodes not in the config to distribute to
3515
  @type additional_vm: boolean
3516
  @param additional_vm: whether the additional nodes are vm-capable or not
3517

3518
  """
3519
  # Gather target nodes
3520
  cluster = lu.cfg.GetClusterInfo()
3521
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3522

    
3523
  online_nodes = lu.cfg.GetOnlineNodeList()
3524
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3525

    
3526
  if additional_nodes is not None:
3527
    online_nodes.extend(additional_nodes)
3528
    if additional_vm:
3529
      vm_nodes.extend(additional_nodes)
3530

    
3531
  # Never distribute to master node
3532
  for nodelist in [online_nodes, vm_nodes]:
3533
    if master_info.name in nodelist:
3534
      nodelist.remove(master_info.name)
3535

    
3536
  # Gather file lists
3537
  (files_all, files_all_opt, files_mc, files_vm) = \
3538
    _ComputeAncillaryFiles(cluster, True)
3539

    
3540
  # Never re-distribute configuration file from here
3541
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3542
              constants.CLUSTER_CONF_FILE in files_vm)
3543
  assert not files_mc, "Master candidates not handled in this function"
3544

    
3545
  filemap = [
3546
    (online_nodes, files_all),
3547
    (online_nodes, files_all_opt),
3548
    (vm_nodes, files_vm),
3549
    ]
3550

    
3551
  # Upload the files
3552
  for (node_list, files) in filemap:
3553
    for fname in files:
3554
      _UploadHelper(lu, node_list, fname)
3555

    
3556

    
3557
class LUClusterRedistConf(NoHooksLU):
3558
  """Force the redistribution of cluster configuration.
3559

3560
  This is a very simple LU.
3561

3562
  """
3563
  REQ_BGL = False
3564

    
3565
  def ExpandNames(self):
3566
    self.needed_locks = {
3567
      locking.LEVEL_NODE: locking.ALL_SET,
3568
    }
3569
    self.share_locks[locking.LEVEL_NODE] = 1
3570

    
3571
  def Exec(self, feedback_fn):
3572
    """Redistribute the configuration.
3573

3574
    """
3575
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3576
    _RedistributeAncillaryFiles(self)
3577

    
3578

    
3579
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3580
  """Sleep and poll for an instance's disk to sync.
3581

3582
  """
3583
  if not instance.disks or disks is not None and not disks:
3584
    return True
3585

    
3586
  disks = _ExpandCheckDisks(instance, disks)
3587

    
3588
  if not oneshot:
3589
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3590

    
3591
  node = instance.primary_node
3592

    
3593
  for dev in disks:
3594
    lu.cfg.SetDiskID(dev, node)
3595

    
3596
  # TODO: Convert to utils.Retry
3597

    
3598
  retries = 0
3599
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3600
  while True:
3601
    max_time = 0
3602
    done = True
3603
    cumul_degraded = False
3604
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3605
    msg = rstats.fail_msg
3606
    if msg:
3607
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3608
      retries += 1
3609
      if retries >= 10:
3610
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3611
                                 " aborting." % node)
3612
      time.sleep(6)
3613
      continue
3614
    rstats = rstats.payload
3615
    retries = 0
3616
    for i, mstat in enumerate(rstats):
3617
      if mstat is None:
3618
        lu.LogWarning("Can't compute data for node %s/%s",
3619
                           node, disks[i].iv_name)
3620
        continue
3621

    
3622
      cumul_degraded = (cumul_degraded or
3623
                        (mstat.is_degraded and mstat.sync_percent is None))
3624
      if mstat.sync_percent is not None:
3625
        done = False
3626
        if mstat.estimated_time is not None:
3627
          rem_time = ("%s remaining (estimated)" %
3628
                      utils.FormatSeconds(mstat.estimated_time))
3629
          max_time = mstat.estimated_time
3630
        else:
3631
          rem_time = "no time estimate"
3632
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3633
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3634

    
3635
    # if we're done but degraded, let's do a few small retries, to
3636
    # make sure we see a stable and not transient situation; therefore
3637
    # we force restart of the loop
3638
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3639
      logging.info("Degraded disks found, %d retries left", degr_retries)
3640
      degr_retries -= 1
3641
      time.sleep(1)
3642
      continue
3643

    
3644
    if done or oneshot:
3645
      break
3646

    
3647
    time.sleep(min(60, max_time))
3648

    
3649
  if done:
3650
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3651
  return not cumul_degraded
3652

    
3653

    
3654
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3655
  """Check that mirrors are not degraded.
3656

3657
  The ldisk parameter, if True, will change the test from the
3658
  is_degraded attribute (which represents overall non-ok status for
3659
  the device(s)) to the ldisk (representing the local storage status).
3660

3661
  """
3662
  lu.cfg.SetDiskID(dev, node)
3663

    
3664
  result = True
3665

    
3666
  if on_primary or dev.AssembleOnSecondary():
3667
    rstats = lu.rpc.call_blockdev_find(node, dev)
3668
    msg = rstats.fail_msg
3669
    if msg:
3670
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3671
      result = False
3672
    elif not rstats.payload:
3673
      lu.LogWarning("Can't find disk on node %s", node)
3674
      result = False
3675
    else:
3676
      if ldisk:
3677
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3678
      else:
3679
        result = result and not rstats.payload.is_degraded
3680

    
3681
  if dev.children:
3682
    for child in dev.children:
3683
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3684

    
3685
  return result
3686

    
3687

    
3688
class LUOobCommand(NoHooksLU):
3689
  """Logical unit for OOB handling.
3690

3691
  """
3692
  REG_BGL = False
3693
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3694

    
3695
  def ExpandNames(self):
3696
    """Gather locks we need.
3697

3698
    """
3699
    if self.op.node_names:
3700
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3701
      lock_names = self.op.node_names
3702
    else:
3703
      lock_names = locking.ALL_SET
3704

    
3705
    self.needed_locks = {
3706
      locking.LEVEL_NODE: lock_names,
3707
      }
3708

    
3709
  def CheckPrereq(self):
3710
    """Check prerequisites.
3711

3712
    This checks:
3713
     - the node exists in the configuration
3714
     - OOB is supported
3715

3716
    Any errors are signaled by raising errors.OpPrereqError.
3717

3718
    """
3719
    self.nodes = []
3720
    self.master_node = self.cfg.GetMasterNode()
3721

    
3722
    assert self.op.power_delay >= 0.0
3723

    
3724
    if self.op.node_names:
3725
      if (self.op.command in self._SKIP_MASTER and
3726
          self.master_node in self.op.node_names):
3727
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3728
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3729

    
3730
        if master_oob_handler:
3731
          additional_text = ("run '%s %s %s' if you want to operate on the"
3732
                             " master regardless") % (master_oob_handler,
3733
                                                      self.op.command,
3734
                                                      self.master_node)
3735
        else:
3736
          additional_text = "it does not support out-of-band operations"
3737

    
3738
        raise errors.OpPrereqError(("Operating on the master node %s is not"
3739
                                    " allowed for %s; %s") %
3740
                                   (self.master_node, self.op.command,
3741
                                    additional_text), errors.ECODE_INVAL)
3742
    else:
3743
      self.op.node_names = self.cfg.GetNodeList()
3744
      if self.op.command in self._SKIP_MASTER:
3745
        self.op.node_names.remove(self.master_node)
3746

    
3747
    if self.op.command in self._SKIP_MASTER:
3748
      assert self.master_node not in self.op.node_names
3749

    
3750
    for node_name in self.op.node_names:
3751
      node = self.cfg.GetNodeInfo(node_name)
3752

    
3753
      if node is None:
3754
        raise errors.OpPrereqError("Node %s not found" % node_name,
3755
                                   errors.ECODE_NOENT)
3756
      else:
3757
        self.nodes.append(node)
3758

    
3759
      if (not self.op.ignore_status and
3760
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3761
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3762
                                    " not marked offline") % node_name,
3763
                                   errors.ECODE_STATE)
3764

    
3765
  def Exec(self, feedback_fn):
3766
    """Execute OOB and return result if we expect any.
3767

3768
    """
3769
    master_node = self.master_node
3770
    ret = []
3771

    
3772
    for idx, node in enumerate(utils.NiceSort(self.nodes,
3773
                                              key=lambda node: node.name)):
3774
      node_entry = [(constants.RS_NORMAL, node.name)]
3775
      ret.append(node_entry)
3776

    
3777
      oob_program = _SupportsOob(self.cfg, node)
3778

    
3779
      if not oob_program:
3780
        node_entry.append((constants.RS_UNAVAIL, None))
3781
        continue
3782

    
3783
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3784
                   self.op.command, oob_program, node.name)
3785
      result = self.rpc.call_run_oob(master_node, oob_program,
3786
                                     self.op.command, node.name,
3787
                                     self.op.timeout)
3788

    
3789
      if result.fail_msg:
3790
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3791
                        node.name, result.fail_msg)
3792
        node_entry.append((constants.RS_NODATA, None))
3793
      else:
3794
        try:
3795
          self._CheckPayload(result)
3796
        except errors.OpExecError, err:
3797
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
3798
                          node.name, err)
3799
          node_entry.append((constants.RS_NODATA, None))
3800
        else:
3801
          if self.op.command == constants.OOB_HEALTH:
3802
            # For health we should log important events
3803
            for item, status in result.payload:
3804
              if status in [constants.OOB_STATUS_WARNING,
3805
                            constants.OOB_STATUS_CRITICAL]:
3806
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
3807
                                item, node.name, status)
3808

    
3809
          if self.op.command == constants.OOB_POWER_ON:
3810
            node.powered = True
3811
          elif self.op.command == constants.OOB_POWER_OFF:
3812
            node.powered = False
3813
          elif self.op.command == constants.OOB_POWER_STATUS:
3814
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3815
            if powered != node.powered:
3816
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3817
                               " match actual power state (%s)"), node.powered,
3818
                              node.name, powered)
3819

    
3820
          # For configuration changing commands we should update the node
3821
          if self.op.command in (constants.OOB_POWER_ON,
3822
                                 constants.OOB_POWER_OFF):
3823
            self.cfg.Update(node, feedback_fn)
3824

    
3825
          node_entry.append((constants.RS_NORMAL, result.payload))
3826

    
3827
          if (self.op.command == constants.OOB_POWER_ON and
3828
              idx < len(self.nodes) - 1):
3829
            time.sleep(self.op.power_delay)
3830

    
3831
    return ret
3832

    
3833
  def _CheckPayload(self, result):
3834
    """Checks if the payload is valid.
3835

3836
    @param result: RPC result
3837
    @raises errors.OpExecError: If payload is not valid
3838

3839
    """
3840
    errs = []
3841
    if self.op.command == constants.OOB_HEALTH:
3842
      if not isinstance(result.payload, list):
3843
        errs.append("command 'health' is expected to return a list but got %s" %
3844
                    type(result.payload))
3845
      else:
3846
        for item, status in result.payload:
3847
          if status not in constants.OOB_STATUSES:
3848
            errs.append("health item '%s' has invalid status '%s'" %
3849
                        (item, status))
3850

    
3851
    if self.op.command == constants.OOB_POWER_STATUS:
3852
      if not isinstance(result.payload, dict):
3853
        errs.append("power-status is expected to return a dict but got %s" %
3854
                    type(result.payload))
3855

    
3856
    if self.op.command in [
3857
        constants.OOB_POWER_ON,
3858
        constants.OOB_POWER_OFF,
3859
        constants.OOB_POWER_CYCLE,
3860
        ]:
3861
      if result.payload is not None:
3862
        errs.append("%s is expected to not return payload but got '%s'" %
3863
                    (self.op.command, result.payload))
3864

    
3865
    if errs:
3866
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3867
                               utils.CommaJoin(errs))
3868

    
3869
class _OsQuery(_QueryBase):
3870
  FIELDS = query.OS_FIELDS
3871

    
3872
  def ExpandNames(self, lu):
3873
    # Lock all nodes in shared mode
3874
    # Temporary removal of locks, should be reverted later
3875
    # TODO: reintroduce locks when they are lighter-weight
3876
    lu.needed_locks = {}
3877
    #self.share_locks[locking.LEVEL_NODE] = 1
3878
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3879

    
3880
    # The following variables interact with _QueryBase._GetNames
3881
    if self.names:
3882
      self.wanted = self.names
3883
    else:
3884
      self.wanted = locking.ALL_SET
3885

    
3886
    self.do_locking = self.use_locking
3887

    
3888
  def DeclareLocks(self, lu, level):
3889
    pass
3890

    
3891
  @staticmethod
3892
  def _DiagnoseByOS(rlist):
3893
    """Remaps a per-node return list into an a per-os per-node dictionary
3894

3895
    @param rlist: a map with node names as keys and OS objects as values
3896

3897
    @rtype: dict
3898
    @return: a dictionary with osnames as keys and as value another
3899
        map, with nodes as keys and tuples of (path, status, diagnose,
3900
        variants, parameters, api_versions) as values, eg::
3901

3902
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3903
                                     (/srv/..., False, "invalid api")],
3904
                           "node2": [(/srv/..., True, "", [], [])]}
3905
          }
3906

3907
    """
3908
    all_os = {}
3909
    # we build here the list of nodes that didn't fail the RPC (at RPC
3910
    # level), so that nodes with a non-responding node daemon don't
3911
    # make all OSes invalid
3912
    good_nodes = [node_name for node_name in rlist
3913
                  if not rlist[node_name].fail_msg]
3914
    for node_name, nr in rlist.items():
3915
      if nr.fail_msg or not nr.payload:
3916
        continue
3917
      for (name, path, status, diagnose, variants,
3918
           params, api_versions) in nr.payload:
3919
        if name not in all_os:
3920
          # build a list of nodes for this os containing empty lists
3921
          # for each node in node_list
3922
          all_os[name] = {}
3923
          for nname in good_nodes:
3924
            all_os[name][nname] = []
3925
        # convert params from [name, help] to (name, help)
3926
        params = [tuple(v) for v in params]
3927
        all_os[name][node_name].append((path, status, diagnose,
3928
                                        variants, params, api_versions))
3929
    return all_os
3930

    
3931
  def _GetQueryData(self, lu):
3932
    """Computes the list of nodes and their attributes.
3933

3934
    """
3935
    # Locking is not used
3936
    assert not (compat.any(lu.glm.is_owned(level)
3937
                           for level in locking.LEVELS
3938
                           if level != locking.LEVEL_CLUSTER) or
3939
                self.do_locking or self.use_locking)
3940

    
3941
    valid_nodes = [node.name
3942
                   for node in lu.cfg.GetAllNodesInfo().values()
3943
                   if not node.offline and node.vm_capable]
3944
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3945
    cluster = lu.cfg.GetClusterInfo()
3946

    
3947
    data = {}
3948

    
3949
    for (os_name, os_data) in pol.items():
3950
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3951
                          hidden=(os_name in cluster.hidden_os),
3952
                          blacklisted=(os_name in cluster.blacklisted_os))
3953

    
3954
      variants = set()
3955
      parameters = set()
3956
      api_versions = set()
3957

    
3958
      for idx, osl in enumerate(os_data.values()):
3959
        info.valid = bool(info.valid and osl and osl[0][1])
3960
        if not info.valid:
3961
          break
3962

    
3963
        (node_variants, node_params, node_api) = osl[0][3:6]
3964
        if idx == 0:
3965
          # First entry
3966
          variants.update(node_variants)
3967
          parameters.update(node_params)
3968
          api_versions.update(node_api)
3969
        else:
3970
          # Filter out inconsistent values
3971
          variants.intersection_update(node_variants)
3972
          parameters.intersection_update(node_params)
3973
          api_versions.intersection_update(node_api)
3974

    
3975
      info.variants = list(variants)
3976
      info.parameters = list(parameters)
3977
      info.api_versions = list(api_versions)
3978

    
3979
      data[os_name] = info
3980

    
3981
    # Prepare data in requested order
3982
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3983
            if name in data]
3984

    
3985

    
3986
class LUOsDiagnose(NoHooksLU):
3987
  """Logical unit for OS diagnose/query.
3988

3989
  """
3990
  REQ_BGL = False
3991

    
3992
  @staticmethod
3993
  def _BuildFilter(fields, names):
3994
    """Builds a filter for querying OSes.
3995

3996
    """
3997
    name_filter = qlang.MakeSimpleFilter("name", names)
3998

    
3999
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4000
    # respective field is not requested
4001
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4002
                     for fname in ["hidden", "blacklisted"]
4003
                     if fname not in fields]
4004
    if "valid" not in fields:
4005
      status_filter.append([qlang.OP_TRUE, "valid"])
4006

    
4007
    if status_filter:
4008
      status_filter.insert(0, qlang.OP_AND)
4009
    else:
4010
      status_filter = None
4011

    
4012
    if name_filter and status_filter:
4013
      return [qlang.OP_AND, name_filter, status_filter]
4014
    elif name_filter:
4015
      return name_filter
4016
    else:
4017
      return status_filter
4018

    
4019
  def CheckArguments(self):
4020
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4021
                       self.op.output_fields, False)
4022

    
4023
  def ExpandNames(self):
4024
    self.oq.ExpandNames(self)
4025

    
4026
  def Exec(self, feedback_fn):
4027
    return self.oq.OldStyleQuery(self)
4028

    
4029

    
4030
class LUNodeRemove(LogicalUnit):
4031
  """Logical unit for removing a node.
4032

4033
  """
4034
  HPATH = "node-remove"
4035
  HTYPE = constants.HTYPE_NODE
4036

    
4037
  def BuildHooksEnv(self):
4038
    """Build hooks env.
4039

4040
    This doesn't run on the target node in the pre phase as a failed
4041
    node would then be impossible to remove.
4042

4043
    """
4044
    return {
4045
      "OP_TARGET": self.op.node_name,
4046
      "NODE_NAME": self.op.node_name,
4047
      }
4048

    
4049
  def BuildHooksNodes(self):
4050
    """Build hooks nodes.
4051

4052
    """
4053
    all_nodes = self.cfg.GetNodeList()
4054
    try:
4055
      all_nodes.remove(self.op.node_name)
4056
    except ValueError:
4057
      logging.warning("Node '%s', which is about to be removed, was not found"
4058
                      " in the list of all nodes", self.op.node_name)
4059
    return (all_nodes, all_nodes)
4060

    
4061
  def CheckPrereq(self):
4062
    """Check prerequisites.
4063

4064
    This checks:
4065
     - the node exists in the configuration
4066
     - it does not have primary or secondary instances
4067
     - it's not the master
4068

4069
    Any errors are signaled by raising errors.OpPrereqError.
4070

4071
    """
4072
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4073
    node = self.cfg.GetNodeInfo(self.op.node_name)
4074
    assert node is not None
4075

    
4076
    instance_list = self.cfg.GetInstanceList()
4077

    
4078
    masternode = self.cfg.GetMasterNode()
4079
    if node.name == masternode:
4080
      raise errors.OpPrereqError("Node is the master node, failover to another"
4081
                                 " node is required", errors.ECODE_INVAL)
4082

    
4083
    for instance_name in instance_list:
4084
      instance = self.cfg.GetInstanceInfo(instance_name)
4085
      if node.name in instance.all_nodes:
4086
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4087
                                   " please remove first" % instance_name,
4088
                                   errors.ECODE_INVAL)
4089
    self.op.node_name = node.name
4090
    self.node = node
4091

    
4092
  def Exec(self, feedback_fn):
4093
    """Removes the node from the cluster.
4094

4095
    """
4096
    node = self.node
4097
    logging.info("Stopping the node daemon and removing configs from node %s",
4098
                 node.name)
4099

    
4100
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4101

    
4102
    # Promote nodes to master candidate as needed
4103
    _AdjustCandidatePool(self, exceptions=[node.name])
4104
    self.context.RemoveNode(node.name)
4105

    
4106
    # Run post hooks on the node before it's removed
4107
    _RunPostHook(self, node.name)
4108

    
4109
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4110
    msg = result.fail_msg
4111
    if msg:
4112
      self.LogWarning("Errors encountered on the remote node while leaving"
4113
                      " the cluster: %s", msg)
4114

    
4115
    # Remove node from our /etc/hosts
4116
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4117
      master_node = self.cfg.GetMasterNode()
4118
      result = self.rpc.call_etc_hosts_modify(master_node,
4119
                                              constants.ETC_HOSTS_REMOVE,
4120
                                              node.name, None)
4121
      result.Raise("Can't update hosts file with new host data")
4122
      _RedistributeAncillaryFiles(self)
4123

    
4124

    
4125
class _NodeQuery(_QueryBase):
4126
  FIELDS = query.NODE_FIELDS
4127

    
4128
  def ExpandNames(self, lu):
4129
    lu.needed_locks = {}
4130
    lu.share_locks[locking.LEVEL_NODE] = 1
4131

    
4132
    if self.names:
4133
      self.wanted = _GetWantedNodes(lu, self.names)
4134
    else:
4135
      self.wanted = locking.ALL_SET
4136

    
4137
    self.do_locking = (self.use_locking and
4138
                       query.NQ_LIVE in self.requested_data)
4139

    
4140
    if self.do_locking:
4141
      # if we don't request only static fields, we need to lock the nodes
4142
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4143

    
4144
  def DeclareLocks(self, lu, level):
4145
    pass
4146

    
4147
  def _GetQueryData(self, lu):
4148
    """Computes the list of nodes and their attributes.
4149

4150
    """
4151
    all_info = lu.cfg.GetAllNodesInfo()
4152

    
4153
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4154

    
4155
    # Gather data as requested
4156
    if query.NQ_LIVE in self.requested_data:
4157
      # filter out non-vm_capable nodes
4158
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4159

    
4160
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4161
                                        lu.cfg.GetHypervisorType())
4162
      live_data = dict((name, nresult.payload)
4163
                       for (name, nresult) in node_data.items()
4164
                       if not nresult.fail_msg and nresult.payload)
4165
    else:
4166
      live_data = None
4167

    
4168
    if query.NQ_INST in self.requested_data:
4169
      node_to_primary = dict([(name, set()) for name in nodenames])
4170
      node_to_secondary = dict([(name, set()) for name in nodenames])
4171

    
4172
      inst_data = lu.cfg.GetAllInstancesInfo()
4173

    
4174
      for inst in inst_data.values():
4175
        if inst.primary_node in node_to_primary:
4176
          node_to_primary[inst.primary_node].add(inst.name)
4177
        for secnode in inst.secondary_nodes:
4178
          if secnode in node_to_secondary:
4179
            node_to_secondary[secnode].add(inst.name)
4180
    else:
4181
      node_to_primary = None
4182
      node_to_secondary = None
4183

    
4184
    if query.NQ_OOB in self.requested_data:
4185
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4186
                         for name, node in all_info.iteritems())
4187
    else:
4188
      oob_support = None
4189

    
4190
    if query.NQ_GROUP in self.requested_data:
4191
      groups = lu.cfg.GetAllNodeGroupsInfo()
4192
    else:
4193
      groups = {}
4194

    
4195
    return query.NodeQueryData([all_info[name] for name in nodenames],
4196
                               live_data, lu.cfg.GetMasterNode(),
4197
                               node_to_primary, node_to_secondary, groups,
4198
                               oob_support, lu.cfg.GetClusterInfo())
4199

    
4200

    
4201
class LUNodeQuery(NoHooksLU):
4202
  """Logical unit for querying nodes.
4203

4204
  """
4205
  # pylint: disable-msg=W0142
4206
  REQ_BGL = False
4207

    
4208
  def CheckArguments(self):
4209
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4210
                         self.op.output_fields, self.op.use_locking)
4211

    
4212
  def ExpandNames(self):
4213
    self.nq.ExpandNames(self)
4214

    
4215
  def Exec(self, feedback_fn):
4216
    return self.nq.OldStyleQuery(self)
4217

    
4218

    
4219
class LUNodeQueryvols(NoHooksLU):
4220
  """Logical unit for getting volumes on node(s).
4221

4222
  """
4223
  REQ_BGL = False
4224
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4225
  _FIELDS_STATIC = utils.FieldSet("node")
4226

    
4227
  def CheckArguments(self):
4228
    _CheckOutputFields(static=self._FIELDS_STATIC,
4229
                       dynamic=self._FIELDS_DYNAMIC,
4230
                       selected=self.op.output_fields)
4231

    
4232
  def ExpandNames(self):
4233
    self.needed_locks = {}
4234
    self.share_locks[locking.LEVEL_NODE] = 1
4235
    if not self.op.nodes:
4236
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4237
    else:
4238
      self.needed_locks[locking.LEVEL_NODE] = \
4239
        _GetWantedNodes(self, self.op.nodes)
4240

    
4241
  def Exec(self, feedback_fn):
4242
    """Computes the list of nodes and their attributes.
4243

4244
    """
4245
    nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4246
    volumes = self.rpc.call_node_volumes(nodenames)
4247

    
4248
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
4249
             in self.cfg.GetInstanceList()]
4250

    
4251
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
4252

    
4253
    output = []
4254
    for node in nodenames:
4255
      nresult = volumes[node]
4256
      if nresult.offline:
4257
        continue
4258
      msg = nresult.fail_msg
4259
      if msg:
4260
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4261
        continue
4262

    
4263
      node_vols = nresult.payload[:]
4264
      node_vols.sort(key=lambda vol: vol['dev'])
4265

    
4266
      for vol in node_vols:
4267
        node_output = []
4268
        for field in self.op.output_fields:
4269
          if field == "node":
4270
            val = node
4271
          elif field == "phys":
4272
            val = vol['dev']
4273
          elif field == "vg":
4274
            val = vol['vg']
4275
          elif field == "name":
4276
            val = vol['name']
4277
          elif field == "size":
4278
            val = int(float(vol['size']))
4279
          elif field == "instance":
4280
            for inst in ilist:
4281
              if node not in lv_by_node[inst]:
4282
                continue
4283
              if vol['name'] in lv_by_node[inst][node]:
4284
                val = inst.name
4285
                break
4286
            else:
4287
              val = '-'
4288
          else:
4289
            raise errors.ParameterError(field)
4290
          node_output.append(str(val))
4291

    
4292
        output.append(node_output)
4293

    
4294
    return output
4295

    
4296

    
4297
class LUNodeQueryStorage(NoHooksLU):
4298
  """Logical unit for getting information on storage units on node(s).
4299

4300
  """
4301
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4302
  REQ_BGL = False
4303

    
4304
  def CheckArguments(self):
4305
    _CheckOutputFields(static=self._FIELDS_STATIC,
4306
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4307
                       selected=self.op.output_fields)
4308

    
4309
  def ExpandNames(self):
4310
    self.needed_locks = {}
4311
    self.share_locks[locking.LEVEL_NODE] = 1
4312

    
4313
    if self.op.nodes:
4314
      self.needed_locks[locking.LEVEL_NODE] = \
4315
        _GetWantedNodes(self, self.op.nodes)
4316
    else:
4317
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4318

    
4319
  def Exec(self, feedback_fn):
4320
    """Computes the list of nodes and their attributes.
4321

4322
    """
4323
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4324

    
4325
    # Always get name to sort by
4326
    if constants.SF_NAME in self.op.output_fields:
4327
      fields = self.op.output_fields[:]
4328
    else:
4329
      fields = [constants.SF_NAME] + self.op.output_fields
4330

    
4331
    # Never ask for node or type as it's only known to the LU
4332
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4333
      while extra in fields:
4334
        fields.remove(extra)
4335

    
4336
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4337
    name_idx = field_idx[constants.SF_NAME]
4338

    
4339
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4340
    data = self.rpc.call_storage_list(self.nodes,
4341
                                      self.op.storage_type, st_args,
4342
                                      self.op.name, fields)
4343

    
4344
    result = []
4345

    
4346
    for node in utils.NiceSort(self.nodes):
4347
      nresult = data[node]
4348
      if nresult.offline:
4349
        continue
4350

    
4351
      msg = nresult.fail_msg
4352
      if msg:
4353
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4354
        continue
4355

    
4356
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4357

    
4358
      for name in utils.NiceSort(rows.keys()):
4359
        row = rows[name]
4360

    
4361
        out = []
4362

    
4363
        for field in self.op.output_fields:
4364
          if field == constants.SF_NODE:
4365
            val = node
4366
          elif field == constants.SF_TYPE:
4367
            val = self.op.storage_type
4368
          elif field in field_idx:
4369
            val = row[field_idx[field]]
4370
          else:
4371
            raise errors.ParameterError(field)
4372

    
4373
          out.append(val)
4374

    
4375
        result.append(out)
4376

    
4377
    return result
4378

    
4379

    
4380
class _InstanceQuery(_QueryBase):
4381
  FIELDS = query.INSTANCE_FIELDS
4382

    
4383
  def ExpandNames(self, lu):
4384
    lu.needed_locks = {}
4385
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
4386
    lu.share_locks[locking.LEVEL_NODE] = 1
4387

    
4388
    if self.names:
4389
      self.wanted = _GetWantedInstances(lu, self.names)
4390
    else:
4391
      self.wanted = locking.ALL_SET
4392

    
4393
    self.do_locking = (self.use_locking and
4394
                       query.IQ_LIVE in self.requested_data)
4395
    if self.do_locking:
4396
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4397
      lu.needed_locks[locking.LEVEL_NODE] = []
4398
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4399

    
4400
  def DeclareLocks(self, lu, level):
4401
    if level == locking.LEVEL_NODE and self.do_locking:
4402
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
4403

    
4404
  def _GetQueryData(self, lu):
4405
    """Computes the list of instances and their attributes.
4406

4407
    """
4408
    cluster = lu.cfg.GetClusterInfo()
4409
    all_info = lu.cfg.GetAllInstancesInfo()
4410

    
4411
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4412

    
4413
    instance_list = [all_info[name] for name in instance_names]
4414
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4415
                                        for inst in instance_list)))
4416
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4417
    bad_nodes = []
4418
    offline_nodes = []
4419
    wrongnode_inst = set()
4420

    
4421
    # Gather data as requested
4422
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4423
      live_data = {}
4424
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4425
      for name in nodes:
4426
        result = node_data[name]
4427
        if result.offline:
4428
          # offline nodes will be in both lists
4429
          assert result.fail_msg
4430
          offline_nodes.append(name)
4431
        if result.fail_msg:
4432
          bad_nodes.append(name)
4433
        elif result.payload:
4434
          for inst in result.payload:
4435
            if inst in all_info:
4436
              if all_info[inst].primary_node == name:
4437
                live_data.update(result.payload)
4438
              else:
4439
                wrongnode_inst.add(inst)
4440
            else:
4441
              # orphan instance; we don't list it here as we don't
4442
              # handle this case yet in the output of instance listing
4443
              logging.warning("Orphan instance '%s' found on node %s",
4444
                              inst, name)
4445
        # else no instance is alive
4446
    else:
4447
      live_data = {}
4448

    
4449
    if query.IQ_DISKUSAGE in self.requested_data:
4450
      disk_usage = dict((inst.name,
4451
                         _ComputeDiskSize(inst.disk_template,
4452
                                          [{constants.IDISK_SIZE: disk.size}
4453
                                           for disk in inst.disks]))
4454
                        for inst in instance_list)
4455
    else:
4456
      disk_usage = None
4457

    
4458
    if query.IQ_CONSOLE in self.requested_data:
4459
      consinfo = {}
4460
      for inst in instance_list:
4461
        if inst.name in live_data:
4462
          # Instance is running
4463
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4464
        else:
4465
          consinfo[inst.name] = None
4466
      assert set(consinfo.keys()) == set(instance_names)
4467
    else:
4468
      consinfo = None
4469

    
4470
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4471
                                   disk_usage, offline_nodes, bad_nodes,
4472
                                   live_data, wrongnode_inst, consinfo)
4473

    
4474

    
4475
class LUQuery(NoHooksLU):
4476
  """Query for resources/items of a certain kind.
4477

4478
  """
4479
  # pylint: disable-msg=W0142
4480
  REQ_BGL = False
4481

    
4482
  def CheckArguments(self):
4483
    qcls = _GetQueryImplementation(self.op.what)
4484

    
4485
    self.impl = qcls(self.op.filter, self.op.fields, False)
4486

    
4487
  def ExpandNames(self):
4488
    self.impl.ExpandNames(self)
4489

    
4490
  def DeclareLocks(self, level):
4491
    self.impl.DeclareLocks(self, level)
4492

    
4493
  def Exec(self, feedback_fn):
4494
    return self.impl.NewStyleQuery(self)
4495

    
4496

    
4497
class LUQueryFields(NoHooksLU):
4498
  """Query for resources/items of a certain kind.
4499

4500
  """
4501
  # pylint: disable-msg=W0142
4502
  REQ_BGL = False
4503

    
4504
  def CheckArguments(self):
4505
    self.qcls = _GetQueryImplementation(self.op.what)
4506

    
4507
  def ExpandNames(self):
4508
    self.needed_locks = {}
4509

    
4510
  def Exec(self, feedback_fn):
4511
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4512

    
4513

    
4514
class LUNodeModifyStorage(NoHooksLU):
4515
  """Logical unit for modifying a storage volume on a node.
4516

4517
  """
4518
  REQ_BGL = False
4519

    
4520
  def CheckArguments(self):
4521
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4522

    
4523
    storage_type = self.op.storage_type
4524

    
4525
    try:
4526
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4527
    except KeyError:
4528
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4529
                                 " modified" % storage_type,
4530
                                 errors.ECODE_INVAL)
4531

    
4532
    diff = set(self.op.changes.keys()) - modifiable
4533
    if diff:
4534
      raise errors.OpPrereqError("The following fields can not be modified for"
4535
                                 " storage units of type '%s': %r" %
4536
                                 (storage_type, list(diff)),
4537
                                 errors.ECODE_INVAL)
4538

    
4539
  def ExpandNames(self):
4540
    self.needed_locks = {
4541
      locking.LEVEL_NODE: self.op.node_name,
4542
      }
4543

    
4544
  def Exec(self, feedback_fn):
4545
    """Computes the list of nodes and their attributes.
4546

4547
    """
4548
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4549
    result = self.rpc.call_storage_modify(self.op.node_name,
4550
                                          self.op.storage_type, st_args,
4551
                                          self.op.name, self.op.changes)
4552
    result.Raise("Failed to modify storage unit '%s' on %s" %
4553
                 (self.op.name, self.op.node_name))
4554

    
4555

    
4556
class LUNodeAdd(LogicalUnit):
4557
  """Logical unit for adding node to the cluster.
4558

4559
  """
4560
  HPATH = "node-add"
4561
  HTYPE = constants.HTYPE_NODE
4562
  _NFLAGS = ["master_capable", "vm_capable"]
4563

    
4564
  def CheckArguments(self):
4565
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4566
    # validate/normalize the node name
4567
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4568
                                         family=self.primary_ip_family)
4569
    self.op.node_name = self.hostname.name
4570

    
4571
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4572
      raise errors.OpPrereqError("Cannot readd the master node",
4573
                                 errors.ECODE_STATE)
4574

    
4575
    if self.op.readd and self.op.group:
4576
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4577
                                 " being readded", errors.ECODE_INVAL)
4578

    
4579
  def BuildHooksEnv(self):
4580
    """Build hooks env.
4581

4582
    This will run on all nodes before, and on all nodes + the new node after.
4583

4584
    """
4585
    return {
4586
      "OP_TARGET": self.op.node_name,
4587
      "NODE_NAME": self.op.node_name,
4588
      "NODE_PIP": self.op.primary_ip,
4589
      "NODE_SIP": self.op.secondary_ip,
4590
      "MASTER_CAPABLE": str(self.op.master_capable),
4591
      "VM_CAPABLE": str(self.op.vm_capable),
4592
      }
4593

    
4594
  def BuildHooksNodes(self):
4595
    """Build hooks nodes.
4596

4597
    """
4598
    # Exclude added node
4599
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4600
    post_nodes = pre_nodes + [self.op.node_name, ]
4601

    
4602
    return (pre_nodes, post_nodes)
4603

    
4604
  def CheckPrereq(self):
4605
    """Check prerequisites.
4606

4607
    This checks:
4608
     - the new node is not already in the config
4609
     - it is resolvable
4610
     - its parameters (single/dual homed) matches the cluster
4611

4612
    Any errors are signaled by raising errors.OpPrereqError.
4613

4614
    """
4615
    cfg = self.cfg
4616
    hostname = self.hostname
4617
    node = hostname.name
4618
    primary_ip = self.op.primary_ip = hostname.ip
4619
    if self.op.secondary_ip is None:
4620
      if self.primary_ip_family == netutils.IP6Address.family:
4621
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4622
                                   " IPv4 address must be given as secondary",
4623
                                   errors.ECODE_INVAL)
4624
      self.op.secondary_ip = primary_ip
4625

    
4626
    secondary_ip = self.op.secondary_ip
4627
    if not netutils.IP4Address.IsValid(secondary_ip):
4628
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4629
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4630

    
4631
    node_list = cfg.GetNodeList()
4632
    if not self.op.readd and node in node_list:
4633
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4634
                                 node, errors.ECODE_EXISTS)
4635
    elif self.op.readd and node not in node_list:
4636
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4637
                                 errors.ECODE_NOENT)
4638

    
4639
    self.changed_primary_ip = False
4640

    
4641
    for existing_node_name in node_list:
4642
      existing_node = cfg.GetNodeInfo(existing_node_name)
4643

    
4644
      if self.op.readd and node == existing_node_name:
4645
        if existing_node.secondary_ip != secondary_ip:
4646
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4647
                                     " address configuration as before",
4648
                                     errors.ECODE_INVAL)
4649
        if existing_node.primary_ip != primary_ip:
4650
          self.changed_primary_ip = True
4651

    
4652
        continue
4653

    
4654
      if (existing_node.primary_ip == primary_ip or
4655
          existing_node.secondary_ip == primary_ip or
4656
          existing_node.primary_ip == secondary_ip or
4657
          existing_node.secondary_ip == secondary_ip):
4658
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4659
                                   " existing node %s" % existing_node.name,
4660
                                   errors.ECODE_NOTUNIQUE)
4661

    
4662
    # After this 'if' block, None is no longer a valid value for the
4663
    # _capable op attributes
4664
    if self.op.readd:
4665
      old_node = self.cfg.GetNodeInfo(node)
4666
      assert old_node is not None, "Can't retrieve locked node %s" % node
4667
      for attr in self._NFLAGS:
4668
        if getattr(self.op, attr) is None:
4669
          setattr(self.op, attr, getattr(old_node, attr))
4670
    else:
4671
      for attr in self._NFLAGS:
4672
        if getattr(self.op, attr) is None:
4673
          setattr(self.op, attr, True)
4674

    
4675
    if self.op.readd and not self.op.vm_capable:
4676
      pri, sec = cfg.GetNodeInstances(node)
4677
      if pri or sec:
4678
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4679
                                   " flag set to false, but it already holds"
4680
                                   " instances" % node,
4681
                                   errors.ECODE_STATE)
4682

    
4683
    # check that the type of the node (single versus dual homed) is the
4684
    # same as for the master
4685
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4686
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4687
    newbie_singlehomed = secondary_ip == primary_ip
4688
    if master_singlehomed != newbie_singlehomed:
4689
      if master_singlehomed:
4690
        raise errors.OpPrereqError("The master has no secondary ip but the"
4691
                                   " new node has one",
4692
                                   errors.ECODE_INVAL)
4693
      else:
4694
        raise errors.OpPrereqError("The master has a secondary ip but the"
4695
                                   " new node doesn't have one",
4696
                                   errors.ECODE_INVAL)
4697

    
4698
    # checks reachability
4699
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4700
      raise errors.OpPrereqError("Node not reachable by ping",
4701
                                 errors.ECODE_ENVIRON)
4702

    
4703
    if not newbie_singlehomed:
4704
      # check reachability from my secondary ip to newbie's secondary ip
4705
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4706
                           source=myself.secondary_ip):
4707
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4708
                                   " based ping to node daemon port",
4709
                                   errors.ECODE_ENVIRON)
4710

    
4711
    if self.op.readd:
4712
      exceptions = [node]
4713
    else:
4714
      exceptions = []
4715

    
4716
    if self.op.master_capable:
4717
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4718
    else:
4719
      self.master_candidate = False
4720

    
4721
    if self.op.readd:
4722
      self.new_node = old_node
4723
    else:
4724
      node_group = cfg.LookupNodeGroup(self.op.group)
4725
      self.new_node = objects.Node(name=node,
4726
                                   primary_ip=primary_ip,
4727
                                   secondary_ip=secondary_ip,
4728
                                   master_candidate=self.master_candidate,
4729
                                   offline=False, drained=False,
4730
                                   group=node_group)
4731

    
4732
    if self.op.ndparams:
4733
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4734

    
4735
  def Exec(self, feedback_fn):
4736
    """Adds the new node to the cluster.
4737

4738
    """
4739
    new_node = self.new_node
4740
    node = new_node.name
4741

    
4742
    # We adding a new node so we assume it's powered
4743
    new_node.powered = True
4744

    
4745
    # for re-adds, reset the offline/drained/master-candidate flags;
4746
    # we need to reset here, otherwise offline would prevent RPC calls
4747
    # later in the procedure; this also means that if the re-add
4748
    # fails, we are left with a non-offlined, broken node
4749
    if self.op.readd:
4750
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4751
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4752
      # if we demote the node, we do cleanup later in the procedure
4753
      new_node.master_candidate = self.master_candidate
4754
      if self.changed_primary_ip:
4755
        new_node.primary_ip = self.op.primary_ip
4756

    
4757
    # copy the master/vm_capable flags
4758
    for attr in self._NFLAGS:
4759
      setattr(new_node, attr, getattr(self.op, attr))
4760

    
4761
    # notify the user about any possible mc promotion
4762
    if new_node.master_candidate:
4763
      self.LogInfo("Node will be a master candidate")
4764

    
4765
    if self.op.ndparams:
4766
      new_node.ndparams = self.op.ndparams
4767
    else:
4768
      new_node.ndparams = {}
4769

    
4770
    # check connectivity
4771
    result = self.rpc.call_version([node])[node]
4772
    result.Raise("Can't get version information from node %s" % node)
4773
    if constants.PROTOCOL_VERSION == result.payload:
4774
      logging.info("Communication to node %s fine, sw version %s match",
4775
                   node, result.payload)
4776
    else:
4777
      raise errors.OpExecError("Version mismatch master version %s,"
4778
                               " node version %s" %
4779
                               (constants.PROTOCOL_VERSION, result.payload))
4780

    
4781
    # Add node to our /etc/hosts, and add key to known_hosts
4782
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4783
      master_node = self.cfg.GetMasterNode()
4784
      result = self.rpc.call_etc_hosts_modify(master_node,
4785
                                              constants.ETC_HOSTS_ADD,
4786
                                              self.hostname.name,
4787
                                              self.hostname.ip)
4788
      result.Raise("Can't update hosts file with new host data")
4789

    
4790
    if new_node.secondary_ip != new_node.primary_ip:
4791
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4792
                               False)
4793

    
4794
    node_verify_list = [self.cfg.GetMasterNode()]
4795
    node_verify_param = {
4796
      constants.NV_NODELIST: [node],
4797
      # TODO: do a node-net-test as well?
4798
    }
4799

    
4800
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4801
                                       self.cfg.GetClusterName())
4802
    for verifier in node_verify_list:
4803
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4804
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4805
      if nl_payload:
4806
        for failed in nl_payload:
4807
          feedback_fn("ssh/hostname verification failed"
4808
                      " (checking from %s): %s" %
4809
                      (verifier, nl_payload[failed]))
4810
        raise errors.OpExecError("ssh/hostname verification failed")
4811

    
4812
    if self.op.readd:
4813
      _RedistributeAncillaryFiles(self)
4814
      self.context.ReaddNode(new_node)
4815
      # make sure we redistribute the config
4816
      self.cfg.Update(new_node, feedback_fn)
4817
      # and make sure the new node will not have old files around
4818
      if not new_node.master_candidate:
4819
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4820
        msg = result.fail_msg
4821
        if msg:
4822
          self.LogWarning("Node failed to demote itself from master"
4823
                          " candidate status: %s" % msg)
4824
    else:
4825
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4826
                                  additional_vm=self.op.vm_capable)
4827
      self.context.AddNode(new_node, self.proc.GetECId())
4828

    
4829

    
4830
class LUNodeSetParams(LogicalUnit):
4831
  """Modifies the parameters of a node.
4832

4833
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4834
      to the node role (as _ROLE_*)
4835
  @cvar _R2F: a dictionary from node role to tuples of flags
4836
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4837

4838
  """
4839
  HPATH = "node-modify"
4840
  HTYPE = constants.HTYPE_NODE
4841
  REQ_BGL = False
4842
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4843
  _F2R = {
4844
    (True, False, False): _ROLE_CANDIDATE,
4845
    (False, True, False): _ROLE_DRAINED,
4846
    (False, False, True): _ROLE_OFFLINE,
4847
    (False, False, False): _ROLE_REGULAR,
4848
    }
4849
  _R2F = dict((v, k) for k, v in _F2R.items())
4850
  _FLAGS = ["master_candidate", "drained", "offline"]
4851

    
4852
  def CheckArguments(self):
4853
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4854
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4855
                self.op.master_capable, self.op.vm_capable,
4856
                self.op.secondary_ip, self.op.ndparams]
4857
    if all_mods.count(None) == len(all_mods):
4858
      raise errors.OpPrereqError("Please pass at least one modification",
4859
                                 errors.ECODE_INVAL)
4860
    if all_mods.count(True) > 1:
4861
      raise errors.OpPrereqError("Can't set the node into more than one"
4862
                                 " state at the same time",
4863
                                 errors.ECODE_INVAL)
4864

    
4865
    # Boolean value that tells us whether we might be demoting from MC
4866
    self.might_demote = (self.op.master_candidate == False or
4867
                         self.op.offline == True or
4868
                         self.op.drained == True or
4869
                         self.op.master_capable == False)
4870

    
4871
    if self.op.secondary_ip:
4872
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4873
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4874
                                   " address" % self.op.secondary_ip,
4875
                                   errors.ECODE_INVAL)
4876

    
4877
    self.lock_all = self.op.auto_promote and self.might_demote
4878
    self.lock_instances = self.op.secondary_ip is not None
4879

    
4880
  def ExpandNames(self):
4881
    if self.lock_all:
4882
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4883
    else:
4884
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4885

    
4886
    if self.lock_instances:
4887
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4888

    
4889
  def DeclareLocks(self, level):
4890
    # If we have locked all instances, before waiting to lock nodes, release
4891
    # all the ones living on nodes unrelated to the current operation.
4892
    if level == locking.LEVEL_NODE and self.lock_instances:
4893
      self.affected_instances = []
4894
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4895
        instances_keep = []
4896

    
4897
        # Build list of instances to release
4898
        for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
4899
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4900
          if (instance.disk_template in constants.DTS_INT_MIRROR and
4901
              self.op.node_name in instance.all_nodes):
4902
            instances_keep.append(instance_name)
4903
            self.affected_instances.append(instance)
4904

    
4905
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
4906

    
4907
        assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
4908
                set(instances_keep))
4909

    
4910
  def BuildHooksEnv(self):
4911
    """Build hooks env.
4912

4913
    This runs on the master node.
4914

4915
    """
4916
    return {
4917
      "OP_TARGET": self.op.node_name,
4918
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4919
      "OFFLINE": str(self.op.offline),
4920
      "DRAINED": str(self.op.drained),
4921
      "MASTER_CAPABLE": str(self.op.master_capable),
4922
      "VM_CAPABLE": str(self.op.vm_capable),
4923
      }
4924

    
4925
  def BuildHooksNodes(self):
4926
    """Build hooks nodes.
4927

4928
    """
4929
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
4930
    return (nl, nl)
4931

    
4932
  def CheckPrereq(self):
4933
    """Check prerequisites.
4934

4935
    This only checks the instance list against the existing names.
4936

4937
    """
4938
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4939

    
4940
    if (self.op.master_candidate is not None or
4941
        self.op.drained is not None or
4942
        self.op.offline is not None):
4943
      # we can't change the master's node flags
4944
      if self.op.node_name == self.cfg.GetMasterNode():
4945
        raise errors.OpPrereqError("The master role can be changed"
4946
                                   " only via master-failover",
4947
                                   errors.ECODE_INVAL)
4948

    
4949
    if self.op.master_candidate and not node.master_capable:
4950
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4951
                                 " it a master candidate" % node.name,
4952
                                 errors.ECODE_STATE)
4953

    
4954
    if self.op.vm_capable == False:
4955
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4956
      if ipri or isec:
4957
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4958
                                   " the vm_capable flag" % node.name,
4959
                                   errors.ECODE_STATE)
4960

    
4961
    if node.master_candidate and self.might_demote and not self.lock_all:
4962
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
4963
      # check if after removing the current node, we're missing master
4964
      # candidates
4965
      (mc_remaining, mc_should, _) = \
4966
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4967
      if mc_remaining < mc_should:
4968
        raise errors.OpPrereqError("Not enough master candidates, please"
4969
                                   " pass auto promote option to allow"
4970
                                   " promotion", errors.ECODE_STATE)
4971

    
4972
    self.old_flags = old_flags = (node.master_candidate,
4973
                                  node.drained, node.offline)
4974
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4975
    self.old_role = old_role = self._F2R[old_flags]
4976

    
4977
    # Check for ineffective changes
4978
    for attr in self._FLAGS:
4979
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4980
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4981
        setattr(self.op, attr, None)
4982

    
4983
    # Past this point, any flag change to False means a transition
4984
    # away from the respective state, as only real changes are kept
4985

    
4986
    # TODO: We might query the real power state if it supports OOB
4987
    if _SupportsOob(self.cfg, node):
4988
      if self.op.offline is False and not (node.powered or
4989
                                           self.op.powered == True):
4990
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
4991
                                    " offline status can be reset") %
4992
                                   self.op.node_name)
4993
    elif self.op.powered is not None:
4994
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
4995
                                  " as it does not support out-of-band"
4996
                                  " handling") % self.op.node_name)
4997

    
4998
    # If we're being deofflined/drained, we'll MC ourself if needed
4999
    if (self.op.drained == False or self.op.offline == False or
5000
        (self.op.master_capable and not node.master_capable)):
5001
      if _DecideSelfPromotion(self):
5002
        self.op.master_candidate = True
5003
        self.LogInfo("Auto-promoting node to master candidate")
5004

    
5005
    # If we're no longer master capable, we'll demote ourselves from MC
5006
    if self.op.master_capable == False and node.master_candidate:
5007
      self.LogInfo("Demoting from master candidate")
5008
      self.op.master_candidate = False
5009

    
5010
    # Compute new role
5011
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5012
    if self.op.master_candidate:
5013
      new_role = self._ROLE_CANDIDATE
5014
    elif self.op.drained:
5015
      new_role = self._ROLE_DRAINED
5016
    elif self.op.offline:
5017
      new_role = self._ROLE_OFFLINE
5018
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5019
      # False is still in new flags, which means we're un-setting (the
5020
      # only) True flag
5021
      new_role = self._ROLE_REGULAR
5022
    else: # no new flags, nothing, keep old role
5023
      new_role = old_role
5024

    
5025
    self.new_role = new_role
5026

    
5027
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5028
      # Trying to transition out of offline status
5029
      result = self.rpc.call_version([node.name])[node.name]
5030
      if result.fail_msg:
5031
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5032
                                   " to report its version: %s" %
5033
                                   (node.name, result.fail_msg),
5034
                                   errors.ECODE_STATE)
5035
      else:
5036
        self.LogWarning("Transitioning node from offline to online state"
5037
                        " without using re-add. Please make sure the node"
5038
                        " is healthy!")
5039

    
5040
    if self.op.secondary_ip:
5041
      # Ok even without locking, because this can't be changed by any LU
5042
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5043
      master_singlehomed = master.secondary_ip == master.primary_ip
5044
      if master_singlehomed and self.op.secondary_ip:
5045
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5046
                                   " homed cluster", errors.ECODE_INVAL)
5047

    
5048
      if node.offline:
5049
        if self.affected_instances:
5050
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5051
                                     " node has instances (%s) configured"
5052
                                     " to use it" % self.affected_instances)
5053
      else:
5054
        # On online nodes, check that no instances are running, and that
5055
        # the node has the new ip and we can reach it.
5056
        for instance in self.affected_instances:
5057
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5058

    
5059
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5060
        if master.name != node.name:
5061
          # check reachability from master secondary ip to new secondary ip
5062
          if not netutils.TcpPing(self.op.secondary_ip,
5063
                                  constants.DEFAULT_NODED_PORT,
5064
                                  source=master.secondary_ip):
5065
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5066
                                       " based ping to node daemon port",
5067
                                       errors.ECODE_ENVIRON)
5068

    
5069
    if self.op.ndparams:
5070
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5071
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5072
      self.new_ndparams = new_ndparams
5073

    
5074
  def Exec(self, feedback_fn):
5075
    """Modifies a node.
5076

5077
    """
5078
    node = self.node
5079
    old_role = self.old_role
5080
    new_role = self.new_role
5081

    
5082
    result = []
5083

    
5084
    if self.op.ndparams:
5085
      node.ndparams = self.new_ndparams
5086

    
5087
    if self.op.powered is not None:
5088
      node.powered = self.op.powered
5089

    
5090
    for attr in ["master_capable", "vm_capable"]:
5091
      val = getattr(self.op, attr)
5092
      if val is not None:
5093
        setattr(node, attr, val)
5094
        result.append((attr, str(val)))
5095

    
5096
    if new_role != old_role:
5097
      # Tell the node to demote itself, if no longer MC and not offline
5098
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5099
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5100
        if msg:
5101
          self.LogWarning("Node failed to demote itself: %s", msg)
5102

    
5103
      new_flags = self._R2F[new_role]
5104
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5105
        if of != nf:
5106
          result.append((desc, str(nf)))
5107
      (node.master_candidate, node.drained, node.offline) = new_flags
5108

    
5109
      # we locked all nodes, we adjust the CP before updating this node
5110
      if self.lock_all:
5111
        _AdjustCandidatePool(self, [node.name])
5112

    
5113
    if self.op.secondary_ip:
5114
      node.secondary_ip = self.op.secondary_ip
5115
      result.append(("secondary_ip", self.op.secondary_ip))
5116

    
5117
    # this will trigger configuration file update, if needed
5118
    self.cfg.Update(node, feedback_fn)
5119

    
5120
    # this will trigger job queue propagation or cleanup if the mc
5121
    # flag changed
5122
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5123
      self.context.ReaddNode(node)
5124

    
5125
    return result
5126

    
5127

    
5128
class LUNodePowercycle(NoHooksLU):
5129
  """Powercycles a node.
5130

5131
  """
5132
  REQ_BGL = False
5133

    
5134
  def CheckArguments(self):
5135
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5136
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5137
      raise errors.OpPrereqError("The node is the master and the force"
5138
                                 " parameter was not set",
5139
                                 errors.ECODE_INVAL)
5140

    
5141
  def ExpandNames(self):
5142
    """Locking for PowercycleNode.
5143

5144
    This is a last-resort option and shouldn't block on other
5145
    jobs. Therefore, we grab no locks.
5146

5147
    """
5148
    self.needed_locks = {}
5149

    
5150
  def Exec(self, feedback_fn):
5151
    """Reboots a node.
5152

5153
    """
5154
    result = self.rpc.call_node_powercycle(self.op.node_name,
5155
                                           self.cfg.GetHypervisorType())
5156
    result.Raise("Failed to schedule the reboot")
5157
    return result.payload
5158

    
5159

    
5160
class LUClusterQuery(NoHooksLU):
5161
  """Query cluster configuration.
5162

5163
  """
5164
  REQ_BGL = False
5165

    
5166
  def ExpandNames(self):
5167
    self.needed_locks = {}
5168

    
5169
  def Exec(self, feedback_fn):
5170
    """Return cluster config.
5171

5172
    """
5173
    cluster = self.cfg.GetClusterInfo()
5174
    os_hvp = {}
5175

    
5176
    # Filter just for enabled hypervisors
5177
    for os_name, hv_dict in cluster.os_hvp.items():
5178
      os_hvp[os_name] = {}
5179
      for hv_name, hv_params in hv_dict.items():
5180
        if hv_name in cluster.enabled_hypervisors:
5181
          os_hvp[os_name][hv_name] = hv_params
5182

    
5183
    # Convert ip_family to ip_version
5184
    primary_ip_version = constants.IP4_VERSION
5185
    if cluster.primary_ip_family == netutils.IP6Address.family:
5186
      primary_ip_version = constants.IP6_VERSION
5187

    
5188
    result = {
5189
      "software_version": constants.RELEASE_VERSION,
5190
      "protocol_version": constants.PROTOCOL_VERSION,
5191
      "config_version": constants.CONFIG_VERSION,
5192
      "os_api_version": max(constants.OS_API_VERSIONS),
5193
      "export_version": constants.EXPORT_VERSION,
5194
      "architecture": (platform.architecture()[0], platform.machine()),
5195
      "name": cluster.cluster_name,
5196
      "master": cluster.master_node,
5197
      "default_hypervisor": cluster.enabled_hypervisors[0],
5198
      "enabled_hypervisors": cluster.enabled_hypervisors,
5199
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5200
                        for hypervisor_name in cluster.enabled_hypervisors]),
5201
      "os_hvp": os_hvp,
5202
      "beparams": cluster.beparams,
5203
      "osparams": cluster.osparams,
5204
      "nicparams": cluster.nicparams,
5205
      "ndparams": cluster.ndparams,
5206
      "candidate_pool_size": cluster.candidate_pool_size,
5207
      "master_netdev": cluster.master_netdev,
5208
      "volume_group_name": cluster.volume_group_name,
5209
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5210
      "file_storage_dir": cluster.file_storage_dir,
5211
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5212
      "maintain_node_health": cluster.maintain_node_health,
5213
      "ctime": cluster.ctime,
5214
      "mtime": cluster.mtime,
5215
      "uuid": cluster.uuid,
5216
      "tags": list(cluster.GetTags()),
5217
      "uid_pool": cluster.uid_pool,
5218
      "default_iallocator": cluster.default_iallocator,
5219
      "reserved_lvs": cluster.reserved_lvs,
5220
      "primary_ip_version": primary_ip_version,
5221
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5222
      "hidden_os": cluster.hidden_os,
5223
      "blacklisted_os": cluster.blacklisted_os,
5224
      }
5225

    
5226
    return result
5227

    
5228

    
5229
class LUClusterConfigQuery(NoHooksLU):
5230
  """Return configuration values.
5231

5232
  """
5233
  REQ_BGL = False
5234
  _FIELDS_DYNAMIC = utils.FieldSet()
5235
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5236
                                  "watcher_pause", "volume_group_name")
5237

    
5238
  def CheckArguments(self):
5239
    _CheckOutputFields(static=self._FIELDS_STATIC,
5240
                       dynamic=self._FIELDS_DYNAMIC,
5241
                       selected=self.op.output_fields)
5242

    
5243
  def ExpandNames(self):
5244
    self.needed_locks = {}
5245

    
5246
  def Exec(self, feedback_fn):
5247
    """Dump a representation of the cluster config to the standard output.
5248

5249
    """
5250
    values = []
5251
    for field in self.op.output_fields:
5252
      if field == "cluster_name":
5253
        entry = self.cfg.GetClusterName()
5254
      elif field == "master_node":
5255
        entry = self.cfg.GetMasterNode()
5256
      elif field == "drain_flag":
5257
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5258
      elif field == "watcher_pause":
5259
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5260
      elif field == "volume_group_name":
5261
        entry = self.cfg.GetVGName()
5262
      else:
5263
        raise errors.ParameterError(field)
5264
      values.append(entry)
5265
    return values
5266

    
5267

    
5268
class LUInstanceActivateDisks(NoHooksLU):
5269
  """Bring up an instance's disks.
5270

5271
  """
5272
  REQ_BGL = False
5273

    
5274
  def ExpandNames(self):
5275
    self._ExpandAndLockInstance()
5276
    self.needed_locks[locking.LEVEL_NODE] = []
5277
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5278

    
5279
  def DeclareLocks(self, level):
5280
    if level == locking.LEVEL_NODE:
5281
      self._LockInstancesNodes()
5282

    
5283
  def CheckPrereq(self):
5284
    """Check prerequisites.
5285

5286
    This checks that the instance is in the cluster.
5287

5288
    """
5289
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5290
    assert self.instance is not None, \
5291
      "Cannot retrieve locked instance %s" % self.op.instance_name
5292
    _CheckNodeOnline(self, self.instance.primary_node)
5293

    
5294
  def Exec(self, feedback_fn):
5295
    """Activate the disks.
5296

5297
    """
5298
    disks_ok, disks_info = \
5299
              _AssembleInstanceDisks(self, self.instance,
5300
                                     ignore_size=self.op.ignore_size)
5301
    if not disks_ok:
5302
      raise errors.OpExecError("Cannot activate block devices")
5303

    
5304
    return disks_info
5305

    
5306

    
5307
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5308
                           ignore_size=False):
5309
  """Prepare the block devices for an instance.
5310

5311
  This sets up the block devices on all nodes.
5312

5313
  @type lu: L{LogicalUnit}
5314
  @param lu: the logical unit on whose behalf we execute
5315
  @type instance: L{objects.Instance}
5316
  @param instance: the instance for whose disks we assemble
5317
  @type disks: list of L{objects.Disk} or None
5318
  @param disks: which disks to assemble (or all, if None)
5319
  @type ignore_secondaries: boolean
5320
  @param ignore_secondaries: if true, errors on secondary nodes
5321
      won't result in an error return from the function
5322
  @type ignore_size: boolean
5323
  @param ignore_size: if true, the current known size of the disk
5324
      will not be used during the disk activation, useful for cases
5325
      when the size is wrong
5326
  @return: False if the operation failed, otherwise a list of
5327
      (host, instance_visible_name, node_visible_name)
5328
      with the mapping from node devices to instance devices
5329

5330
  """
5331
  device_info = []
5332
  disks_ok = True
5333
  iname = instance.name
5334
  disks = _ExpandCheckDisks(instance, disks)
5335

    
5336
  # With the two passes mechanism we try to reduce the window of
5337
  # opportunity for the race condition of switching DRBD to primary
5338
  # before handshaking occured, but we do not eliminate it
5339

    
5340
  # The proper fix would be to wait (with some limits) until the
5341
  # connection has been made and drbd transitions from WFConnection
5342
  # into any other network-connected state (Connected, SyncTarget,
5343
  # SyncSource, etc.)
5344

    
5345
  # 1st pass, assemble on all nodes in secondary mode
5346
  for idx, inst_disk in enumerate(disks):
5347
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5348
      if ignore_size:
5349
        node_disk = node_disk.Copy()
5350
        node_disk.UnsetSize()
5351
      lu.cfg.SetDiskID(node_disk, node)
5352
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5353
      msg = result.fail_msg
5354
      if msg:
5355
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5356
                           " (is_primary=False, pass=1): %s",
5357
                           inst_disk.iv_name, node, msg)
5358
        if not ignore_secondaries:
5359
          disks_ok = False
5360

    
5361
  # FIXME: race condition on drbd migration to primary
5362

    
5363
  # 2nd pass, do only the primary node
5364
  for idx, inst_disk in enumerate(disks):
5365
    dev_path = None
5366

    
5367
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5368
      if node != instance.primary_node:
5369
        continue
5370
      if ignore_size:
5371
        node_disk = node_disk.Copy()
5372
        node_disk.UnsetSize()
5373
      lu.cfg.SetDiskID(node_disk, node)
5374
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5375
      msg = result.fail_msg
5376
      if msg:
5377
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5378
                           " (is_primary=True, pass=2): %s",
5379
                           inst_disk.iv_name, node, msg)
5380
        disks_ok = False
5381
      else:
5382
        dev_path = result.payload
5383

    
5384
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5385

    
5386
  # leave the disks configured for the primary node
5387
  # this is a workaround that would be fixed better by
5388
  # improving the logical/physical id handling
5389
  for disk in disks:
5390
    lu.cfg.SetDiskID(disk, instance.primary_node)
5391

    
5392
  return disks_ok, device_info
5393

    
5394

    
5395
def _StartInstanceDisks(lu, instance, force):
5396
  """Start the disks of an instance.
5397

5398
  """
5399
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5400
                                           ignore_secondaries=force)
5401
  if not disks_ok:
5402
    _ShutdownInstanceDisks(lu, instance)
5403
    if force is not None and not force:
5404
      lu.proc.LogWarning("", hint="If the message above refers to a"
5405
                         " secondary node,"
5406
                         " you can retry the operation using '--force'.")
5407
    raise errors.OpExecError("Disk consistency error")
5408

    
5409

    
5410
class LUInstanceDeactivateDisks(NoHooksLU):
5411
  """Shutdown an instance's disks.
5412

5413
  """
5414
  REQ_BGL = False
5415

    
5416
  def ExpandNames(self):
5417
    self._ExpandAndLockInstance()
5418
    self.needed_locks[locking.LEVEL_NODE] = []
5419
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5420

    
5421
  def DeclareLocks(self, level):
5422
    if level == locking.LEVEL_NODE:
5423
      self._LockInstancesNodes()
5424

    
5425
  def CheckPrereq(self):
5426
    """Check prerequisites.
5427

5428
    This checks that the instance is in the cluster.
5429

5430
    """
5431
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5432
    assert self.instance is not None, \
5433
      "Cannot retrieve locked instance %s" % self.op.instance_name
5434

    
5435
  def Exec(self, feedback_fn):
5436
    """Deactivate the disks
5437

5438
    """
5439
    instance = self.instance
5440
    if self.op.force:
5441
      _ShutdownInstanceDisks(self, instance)
5442
    else:
5443
      _SafeShutdownInstanceDisks(self, instance)
5444

    
5445

    
5446
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5447
  """Shutdown block devices of an instance.
5448

5449
  This function checks if an instance is running, before calling
5450
  _ShutdownInstanceDisks.
5451

5452
  """
5453
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5454
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5455

    
5456

    
5457
def _ExpandCheckDisks(instance, disks):
5458
  """Return the instance disks selected by the disks list
5459

5460
  @type disks: list of L{objects.Disk} or None
5461
  @param disks: selected disks
5462
  @rtype: list of L{objects.Disk}
5463
  @return: selected instance disks to act on
5464

5465
  """
5466
  if disks is None:
5467
    return instance.disks
5468
  else:
5469
    if not set(disks).issubset(instance.disks):
5470
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5471
                                   " target instance")
5472
    return disks
5473

    
5474

    
5475
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5476
  """Shutdown block devices of an instance.
5477

5478
  This does the shutdown on all nodes of the instance.
5479

5480
  If the ignore_primary is false, errors on the primary node are
5481
  ignored.
5482

5483
  """
5484
  all_result = True
5485
  disks = _ExpandCheckDisks(instance, disks)
5486

    
5487
  for disk in disks:
5488
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5489
      lu.cfg.SetDiskID(top_disk, node)
5490
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5491
      msg = result.fail_msg
5492
      if msg:
5493
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5494
                      disk.iv_name, node, msg)
5495
        if ((node == instance.primary_node and not ignore_primary) or
5496
            (node != instance.primary_node and not result.offline)):
5497
          all_result = False
5498
  return all_result
5499

    
5500

    
5501
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5502
  """Checks if a node has enough free memory.
5503

5504
  This function check if a given node has the needed amount of free
5505
  memory. In case the node has less memory or we cannot get the
5506
  information from the node, this function raise an OpPrereqError
5507
  exception.
5508

5509
  @type lu: C{LogicalUnit}
5510
  @param lu: a logical unit from which we get configuration data
5511
  @type node: C{str}
5512
  @param node: the node to check
5513
  @type reason: C{str}
5514
  @param reason: string to use in the error message
5515
  @type requested: C{int}
5516
  @param requested: the amount of memory in MiB to check for
5517
  @type hypervisor_name: C{str}
5518
  @param hypervisor_name: the hypervisor to ask for memory stats
5519
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5520
      we cannot check the node
5521

5522
  """
5523
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5524
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5525
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5526
  free_mem = nodeinfo[node].payload.get('memory_free', None)
5527
  if not isinstance(free_mem, int):
5528
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5529
                               " was '%s'" % (node, free_mem),
5530
                               errors.ECODE_ENVIRON)
5531
  if requested > free_mem:
5532
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5533
                               " needed %s MiB, available %s MiB" %
5534
                               (node, reason, requested, free_mem),
5535
                               errors.ECODE_NORES)
5536

    
5537

    
5538
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5539
  """Checks if nodes have enough free disk space in the all VGs.
5540

5541
  This function check if all given nodes have the needed amount of
5542
  free disk. In case any node has less disk or we cannot get the
5543
  information from the node, this function raise an OpPrereqError
5544
  exception.
5545

5546
  @type lu: C{LogicalUnit}
5547
  @param lu: a logical unit from which we get configuration data
5548
  @type nodenames: C{list}
5549
  @param nodenames: the list of node names to check
5550
  @type req_sizes: C{dict}
5551
  @param req_sizes: the hash of vg and corresponding amount of disk in
5552
      MiB to check for
5553
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5554
      or we cannot check the node
5555

5556
  """
5557
  for vg, req_size in req_sizes.items():
5558
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5559

    
5560

    
5561
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5562
  """Checks if nodes have enough free disk space in the specified VG.
5563

5564
  This function check if all given nodes have the needed amount of
5565
  free disk. In case any node has less disk or we cannot get the
5566
  information from the node, this function raise an OpPrereqError
5567
  exception.
5568

5569
  @type lu: C{LogicalUnit}
5570
  @param lu: a logical unit from which we get configuration data
5571
  @type nodenames: C{list}
5572
  @param nodenames: the list of node names to check
5573
  @type vg: C{str}
5574
  @param vg: the volume group to check
5575
  @type requested: C{int}
5576
  @param requested: the amount of disk in MiB to check for
5577
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5578
      or we cannot check the node
5579

5580
  """
5581
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5582
  for node in nodenames:
5583
    info = nodeinfo[node]
5584
    info.Raise("Cannot get current information from node %s" % node,
5585
               prereq=True, ecode=errors.ECODE_ENVIRON)
5586
    vg_free = info.payload.get("vg_free", None)
5587
    if not isinstance(vg_free, int):
5588
      raise errors.OpPrereqError("Can't compute free disk space on node"
5589
                                 " %s for vg %s, result was '%s'" %
5590
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5591
    if requested > vg_free:
5592
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5593
                                 " vg %s: required %d MiB, available %d MiB" %
5594
                                 (node, vg, requested, vg_free),
5595
                                 errors.ECODE_NORES)
5596

    
5597

    
5598
class LUInstanceStartup(LogicalUnit):
5599
  """Starts an instance.
5600

5601
  """
5602
  HPATH = "instance-start"
5603
  HTYPE = constants.HTYPE_INSTANCE
5604
  REQ_BGL = False
5605

    
5606
  def CheckArguments(self):
5607
    # extra beparams
5608
    if self.op.beparams:
5609
      # fill the beparams dict
5610
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5611

    
5612
  def ExpandNames(self):
5613
    self._ExpandAndLockInstance()
5614

    
5615
  def BuildHooksEnv(self):
5616
    """Build hooks env.
5617

5618
    This runs on master, primary and secondary nodes of the instance.
5619

5620
    """
5621
    env = {
5622
      "FORCE": self.op.force,
5623
      }
5624

    
5625
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5626

    
5627
    return env
5628

    
5629
  def BuildHooksNodes(self):
5630
    """Build hooks nodes.
5631

5632
    """
5633
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5634
    return (nl, nl)
5635

    
5636
  def CheckPrereq(self):
5637
    """Check prerequisites.
5638

5639
    This checks that the instance is in the cluster.
5640

5641
    """
5642
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5643
    assert self.instance is not None, \
5644
      "Cannot retrieve locked instance %s" % self.op.instance_name
5645

    
5646
    # extra hvparams
5647
    if self.op.hvparams:
5648
      # check hypervisor parameter syntax (locally)
5649
      cluster = self.cfg.GetClusterInfo()
5650
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5651
      filled_hvp = cluster.FillHV(instance)
5652
      filled_hvp.update(self.op.hvparams)
5653
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5654
      hv_type.CheckParameterSyntax(filled_hvp)
5655
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5656

    
5657
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5658

    
5659
    if self.primary_offline and self.op.ignore_offline_nodes:
5660
      self.proc.LogWarning("Ignoring offline primary node")
5661

    
5662
      if self.op.hvparams or self.op.beparams:
5663
        self.proc.LogWarning("Overridden parameters are ignored")
5664
    else:
5665
      _CheckNodeOnline(self, instance.primary_node)
5666

    
5667
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5668

    
5669
      # check bridges existence
5670
      _CheckInstanceBridgesExist(self, instance)
5671

    
5672
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5673
                                                instance.name,
5674
                                                instance.hypervisor)
5675
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5676
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5677
      if not remote_info.payload: # not running already
5678
        _CheckNodeFreeMemory(self, instance.primary_node,
5679
                             "starting instance %s" % instance.name,
5680
                             bep[constants.BE_MEMORY], instance.hypervisor)
5681

    
5682
  def Exec(self, feedback_fn):
5683
    """Start the instance.
5684

5685
    """
5686
    instance = self.instance
5687
    force = self.op.force
5688

    
5689
    if not self.op.no_remember:
5690
      self.cfg.MarkInstanceUp(instance.name)
5691

    
5692
    if self.primary_offline:
5693
      assert self.op.ignore_offline_nodes
5694
      self.proc.LogInfo("Primary node offline, marked instance as started")
5695
    else:
5696
      node_current = instance.primary_node
5697

    
5698
      _StartInstanceDisks(self, instance, force)
5699

    
5700
      result = self.rpc.call_instance_start(node_current, instance,
5701
                                            self.op.hvparams, self.op.beparams)
5702
      msg = result.fail_msg
5703
      if msg:
5704
        _ShutdownInstanceDisks(self, instance)
5705
        raise errors.OpExecError("Could not start instance: %s" % msg)
5706

    
5707

    
5708
class LUInstanceReboot(LogicalUnit):
5709
  """Reboot an instance.
5710

5711
  """
5712
  HPATH = "instance-reboot"
5713
  HTYPE = constants.HTYPE_INSTANCE
5714
  REQ_BGL = False
5715

    
5716
  def ExpandNames(self):
5717
    self._ExpandAndLockInstance()
5718

    
5719
  def BuildHooksEnv(self):
5720
    """Build hooks env.
5721

5722
    This runs on master, primary and secondary nodes of the instance.
5723

5724
    """
5725
    env = {
5726
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5727
      "REBOOT_TYPE": self.op.reboot_type,
5728
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5729
      }
5730

    
5731
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5732

    
5733
    return env
5734

    
5735
  def BuildHooksNodes(self):
5736
    """Build hooks nodes.
5737

5738
    """
5739
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5740
    return (nl, nl)
5741

    
5742
  def CheckPrereq(self):
5743
    """Check prerequisites.
5744

5745
    This checks that the instance is in the cluster.
5746

5747
    """
5748
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5749
    assert self.instance is not None, \
5750
      "Cannot retrieve locked instance %s" % self.op.instance_name
5751

    
5752
    _CheckNodeOnline(self, instance.primary_node)
5753

    
5754
    # check bridges existence
5755
    _CheckInstanceBridgesExist(self, instance)
5756

    
5757
  def Exec(self, feedback_fn):
5758
    """Reboot the instance.
5759

5760
    """
5761
    instance = self.instance
5762
    ignore_secondaries = self.op.ignore_secondaries
5763
    reboot_type = self.op.reboot_type
5764

    
5765
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5766
                                              instance.name,
5767
                                              instance.hypervisor)
5768
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5769
    instance_running = bool(remote_info.payload)
5770

    
5771
    node_current = instance.primary_node
5772

    
5773
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5774
                                            constants.INSTANCE_REBOOT_HARD]:
5775
      for disk in instance.disks:
5776
        self.cfg.SetDiskID(disk, node_current)
5777
      result = self.rpc.call_instance_reboot(node_current, instance,
5778
                                             reboot_type,
5779
                                             self.op.shutdown_timeout)
5780
      result.Raise("Could not reboot instance")
5781
    else:
5782
      if instance_running:
5783
        result = self.rpc.call_instance_shutdown(node_current, instance,
5784
                                                 self.op.shutdown_timeout)
5785
        result.Raise("Could not shutdown instance for full reboot")
5786
        _ShutdownInstanceDisks(self, instance)
5787
      else:
5788
        self.LogInfo("Instance %s was already stopped, starting now",
5789
                     instance.name)
5790
      _StartInstanceDisks(self, instance, ignore_secondaries)
5791
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5792
      msg = result.fail_msg
5793
      if msg:
5794
        _ShutdownInstanceDisks(self, instance)
5795
        raise errors.OpExecError("Could not start instance for"
5796
                                 " full reboot: %s" % msg)
5797

    
5798
    self.cfg.MarkInstanceUp(instance.name)
5799

    
5800

    
5801
class LUInstanceShutdown(LogicalUnit):
5802
  """Shutdown an instance.
5803

5804
  """
5805
  HPATH = "instance-stop"
5806
  HTYPE = constants.HTYPE_INSTANCE
5807
  REQ_BGL = False
5808

    
5809
  def ExpandNames(self):
5810
    self._ExpandAndLockInstance()
5811

    
5812
  def BuildHooksEnv(self):
5813
    """Build hooks env.
5814

5815
    This runs on master, primary and secondary nodes of the instance.
5816

5817
    """
5818
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5819
    env["TIMEOUT"] = self.op.timeout
5820
    return env
5821

    
5822
  def BuildHooksNodes(self):
5823
    """Build hooks nodes.
5824

5825
    """
5826
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5827
    return (nl, nl)
5828

    
5829
  def CheckPrereq(self):
5830
    """Check prerequisites.
5831

5832
    This checks that the instance is in the cluster.
5833

5834
    """
5835
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5836
    assert self.instance is not None, \
5837
      "Cannot retrieve locked instance %s" % self.op.instance_name
5838

    
5839
    self.primary_offline = \
5840
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5841

    
5842
    if self.primary_offline and self.op.ignore_offline_nodes:
5843
      self.proc.LogWarning("Ignoring offline primary node")
5844
    else:
5845
      _CheckNodeOnline(self, self.instance.primary_node)
5846

    
5847
  def Exec(self, feedback_fn):
5848
    """Shutdown the instance.
5849

5850
    """
5851
    instance = self.instance
5852
    node_current = instance.primary_node
5853
    timeout = self.op.timeout
5854

    
5855
    if not self.op.no_remember:
5856
      self.cfg.MarkInstanceDown(instance.name)
5857

    
5858
    if self.primary_offline:
5859
      assert self.op.ignore_offline_nodes
5860
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5861
    else:
5862
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5863
      msg = result.fail_msg
5864
      if msg:
5865
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5866

    
5867
      _ShutdownInstanceDisks(self, instance)
5868

    
5869

    
5870
class LUInstanceReinstall(LogicalUnit):
5871
  """Reinstall an instance.
5872

5873
  """
5874
  HPATH = "instance-reinstall"
5875
  HTYPE = constants.HTYPE_INSTANCE
5876
  REQ_BGL = False
5877

    
5878
  def ExpandNames(self):
5879
    self._ExpandAndLockInstance()
5880

    
5881
  def BuildHooksEnv(self):
5882
    """Build hooks env.
5883

5884
    This runs on master, primary and secondary nodes of the instance.
5885

5886
    """
5887
    return _BuildInstanceHookEnvByObject(self, self.instance)
5888

    
5889
  def BuildHooksNodes(self):
5890
    """Build hooks nodes.
5891

5892
    """
5893
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5894
    return (nl, nl)
5895

    
5896
  def CheckPrereq(self):
5897
    """Check prerequisites.
5898

5899
    This checks that the instance is in the cluster and is not running.
5900

5901
    """
5902
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5903
    assert instance is not None, \
5904
      "Cannot retrieve locked instance %s" % self.op.instance_name
5905
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5906
                     " offline, cannot reinstall")
5907
    for node in instance.secondary_nodes:
5908
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5909
                       " cannot reinstall")
5910

    
5911
    if instance.disk_template == constants.DT_DISKLESS:
5912
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5913
                                 self.op.instance_name,
5914
                                 errors.ECODE_INVAL)
5915
    _CheckInstanceDown(self, instance, "cannot reinstall")
5916

    
5917
    if self.op.os_type is not None:
5918
      # OS verification
5919
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5920
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5921
      instance_os = self.op.os_type
5922
    else:
5923
      instance_os = instance.os
5924

    
5925
    nodelist = list(instance.all_nodes)
5926

    
5927
    if self.op.osparams:
5928
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5929
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5930
      self.os_inst = i_osdict # the new dict (without defaults)
5931
    else:
5932
      self.os_inst = None
5933

    
5934
    self.instance = instance
5935

    
5936
  def Exec(self, feedback_fn):
5937
    """Reinstall the instance.
5938

5939
    """
5940
    inst = self.instance
5941

    
5942
    if self.op.os_type is not None:
5943
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5944
      inst.os = self.op.os_type
5945
      # Write to configuration
5946
      self.cfg.Update(inst, feedback_fn)
5947

    
5948
    _StartInstanceDisks(self, inst, None)
5949
    try:
5950
      feedback_fn("Running the instance OS create scripts...")
5951
      # FIXME: pass debug option from opcode to backend
5952
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5953
                                             self.op.debug_level,
5954
                                             osparams=self.os_inst)
5955
      result.Raise("Could not install OS for instance %s on node %s" %
5956
                   (inst.name, inst.primary_node))
5957
    finally:
5958
      _ShutdownInstanceDisks(self, inst)
5959

    
5960

    
5961
class LUInstanceRecreateDisks(LogicalUnit):
5962
  """Recreate an instance's missing disks.
5963

5964
  """
5965
  HPATH = "instance-recreate-disks"
5966
  HTYPE = constants.HTYPE_INSTANCE
5967
  REQ_BGL = False
5968

    
5969
  def CheckArguments(self):
5970
    # normalise the disk list
5971
    self.op.disks = sorted(frozenset(self.op.disks))
5972

    
5973
  def ExpandNames(self):
5974
    self._ExpandAndLockInstance()
5975
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5976
    if self.op.nodes:
5977
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
5978
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
5979
    else:
5980
      self.needed_locks[locking.LEVEL_NODE] = []
5981

    
5982
  def DeclareLocks(self, level):
5983
    if level == locking.LEVEL_NODE:
5984
      # if we replace the nodes, we only need to lock the old primary,
5985
      # otherwise we need to lock all nodes for disk re-creation
5986
      primary_only = bool(self.op.nodes)
5987
      self._LockInstancesNodes(primary_only=primary_only)
5988

    
5989
  def BuildHooksEnv(self):
5990
    """Build hooks env.
5991

5992
    This runs on master, primary and secondary nodes of the instance.
5993

5994
    """
5995
    return _BuildInstanceHookEnvByObject(self, self.instance)
5996

    
5997
  def BuildHooksNodes(self):
5998
    """Build hooks nodes.
5999

6000
    """
6001
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6002
    return (nl, nl)
6003

    
6004
  def CheckPrereq(self):
6005
    """Check prerequisites.
6006

6007
    This checks that the instance is in the cluster and is not running.
6008

6009
    """
6010
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6011
    assert instance is not None, \
6012
      "Cannot retrieve locked instance %s" % self.op.instance_name
6013
    if self.op.nodes:
6014
      if len(self.op.nodes) != len(instance.all_nodes):
6015
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6016
                                   " %d replacement nodes were specified" %
6017
                                   (instance.name, len(instance.all_nodes),
6018
                                    len(self.op.nodes)),
6019
                                   errors.ECODE_INVAL)
6020
      assert instance.disk_template != constants.DT_DRBD8 or \
6021
          len(self.op.nodes) == 2
6022
      assert instance.disk_template != constants.DT_PLAIN or \
6023
          len(self.op.nodes) == 1
6024
      primary_node = self.op.nodes[0]
6025
    else:
6026
      primary_node = instance.primary_node
6027
    _CheckNodeOnline(self, primary_node)
6028

    
6029
    if instance.disk_template == constants.DT_DISKLESS:
6030
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6031
                                 self.op.instance_name, errors.ECODE_INVAL)
6032
    # if we replace nodes *and* the old primary is offline, we don't
6033
    # check
6034
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6035
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6036
    if not (self.op.nodes and old_pnode.offline):
6037
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6038

    
6039
    if not self.op.disks:
6040
      self.op.disks = range(len(instance.disks))
6041
    else:
6042
      for idx in self.op.disks:
6043
        if idx >= len(instance.disks):
6044
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6045
                                     errors.ECODE_INVAL)
6046
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6047
      raise errors.OpPrereqError("Can't recreate disks partially and"
6048
                                 " change the nodes at the same time",
6049
                                 errors.ECODE_INVAL)
6050
    self.instance = instance
6051

    
6052
  def Exec(self, feedback_fn):
6053
    """Recreate the disks.
6054

6055
    """
6056
    # change primary node, if needed
6057
    if self.op.nodes:
6058
      self.instance.primary_node = self.op.nodes[0]
6059
      self.LogWarning("Changing the instance's nodes, you will have to"
6060
                      " remove any disks left on the older nodes manually")
6061

    
6062
    to_skip = []
6063
    for idx, disk in enumerate(self.instance.disks):
6064
      if idx not in self.op.disks: # disk idx has not been passed in
6065
        to_skip.append(idx)
6066
        continue
6067
      # update secondaries for disks, if needed
6068
      if self.op.nodes:
6069
        if disk.dev_type == constants.LD_DRBD8:
6070
          # need to update the nodes
6071
          assert len(self.op.nodes) == 2
6072
          logical_id = list(disk.logical_id)
6073
          logical_id[0] = self.op.nodes[0]
6074
          logical_id[1] = self.op.nodes[1]
6075
          disk.logical_id = tuple(logical_id)
6076

    
6077
    if self.op.nodes:
6078
      self.cfg.Update(self.instance, feedback_fn)
6079

    
6080
    _CreateDisks(self, self.instance, to_skip=to_skip)
6081

    
6082

    
6083
class LUInstanceRename(LogicalUnit):
6084
  """Rename an instance.
6085

6086
  """
6087
  HPATH = "instance-rename"
6088
  HTYPE = constants.HTYPE_INSTANCE
6089

    
6090
  def CheckArguments(self):
6091
    """Check arguments.
6092

6093
    """
6094
    if self.op.ip_check and not self.op.name_check:
6095
      # TODO: make the ip check more flexible and not depend on the name check
6096
      raise errors.OpPrereqError("IP address check requires a name check",
6097
                                 errors.ECODE_INVAL)
6098

    
6099
  def BuildHooksEnv(self):
6100
    """Build hooks env.
6101

6102
    This runs on master, primary and secondary nodes of the instance.
6103

6104
    """
6105
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6106
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6107
    return env
6108

    
6109
  def BuildHooksNodes(self):
6110
    """Build hooks nodes.
6111

6112
    """
6113
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6114
    return (nl, nl)
6115

    
6116
  def CheckPrereq(self):
6117
    """Check prerequisites.
6118

6119
    This checks that the instance is in the cluster and is not running.
6120

6121
    """
6122
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6123
                                                self.op.instance_name)
6124
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6125
    assert instance is not None
6126
    _CheckNodeOnline(self, instance.primary_node)
6127
    _CheckInstanceDown(self, instance, "cannot rename")
6128
    self.instance = instance
6129

    
6130
    new_name = self.op.new_name
6131
    if self.op.name_check:
6132
      hostname = netutils.GetHostname(name=new_name)
6133
      if hostname != new_name:
6134
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6135
                     hostname.name)
6136
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6137
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6138
                                    " same as given hostname '%s'") %
6139
                                    (hostname.name, self.op.new_name),
6140
                                    errors.ECODE_INVAL)
6141
      new_name = self.op.new_name = hostname.name
6142
      if (self.op.ip_check and
6143
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6144
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6145
                                   (hostname.ip, new_name),
6146
                                   errors.ECODE_NOTUNIQUE)
6147

    
6148
    instance_list = self.cfg.GetInstanceList()
6149
    if new_name in instance_list and new_name != instance.name:
6150
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6151
                                 new_name, errors.ECODE_EXISTS)
6152

    
6153
  def Exec(self, feedback_fn):
6154
    """Rename the instance.
6155

6156
    """
6157
    inst = self.instance
6158
    old_name = inst.name
6159

    
6160
    rename_file_storage = False
6161
    if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
6162
        self.op.new_name != inst.name):
6163
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6164
      rename_file_storage = True
6165

    
6166
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6167
    # Change the instance lock. This is definitely safe while we hold the BGL.
6168
    # Otherwise the new lock would have to be added in acquired mode.
6169
    assert self.REQ_BGL
6170
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6171
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6172

    
6173
    # re-read the instance from the configuration after rename
6174
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6175

    
6176
    if rename_file_storage:
6177
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6178
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6179
                                                     old_file_storage_dir,
6180
                                                     new_file_storage_dir)
6181
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6182
                   " (but the instance has been renamed in Ganeti)" %
6183
                   (inst.primary_node, old_file_storage_dir,
6184
                    new_file_storage_dir))
6185

    
6186
    _StartInstanceDisks(self, inst, None)
6187
    try:
6188
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6189
                                                 old_name, self.op.debug_level)
6190
      msg = result.fail_msg
6191
      if msg:
6192
        msg = ("Could not run OS rename script for instance %s on node %s"
6193
               " (but the instance has been renamed in Ganeti): %s" %
6194
               (inst.name, inst.primary_node, msg))
6195
        self.proc.LogWarning(msg)
6196
    finally:
6197
      _ShutdownInstanceDisks(self, inst)
6198

    
6199
    return inst.name
6200

    
6201

    
6202
class LUInstanceRemove(LogicalUnit):
6203
  """Remove an instance.
6204

6205
  """
6206
  HPATH = "instance-remove"
6207
  HTYPE = constants.HTYPE_INSTANCE
6208
  REQ_BGL = False
6209

    
6210
  def ExpandNames(self):
6211
    self._ExpandAndLockInstance()
6212
    self.needed_locks[locking.LEVEL_NODE] = []
6213
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6214

    
6215
  def DeclareLocks(self, level):
6216
    if level == locking.LEVEL_NODE:
6217
      self._LockInstancesNodes()
6218

    
6219
  def BuildHooksEnv(self):
6220
    """Build hooks env.
6221

6222
    This runs on master, primary and secondary nodes of the instance.
6223

6224
    """
6225
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6226
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6227
    return env
6228

    
6229
  def BuildHooksNodes(self):
6230
    """Build hooks nodes.
6231

6232
    """
6233
    nl = [self.cfg.GetMasterNode()]
6234
    nl_post = list(self.instance.all_nodes) + nl
6235
    return (nl, nl_post)
6236

    
6237
  def CheckPrereq(self):
6238
    """Check prerequisites.
6239

6240
    This checks that the instance is in the cluster.
6241

6242
    """
6243
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6244
    assert self.instance is not None, \
6245
      "Cannot retrieve locked instance %s" % self.op.instance_name
6246

    
6247
  def Exec(self, feedback_fn):
6248
    """Remove the instance.
6249

6250
    """
6251
    instance = self.instance
6252
    logging.info("Shutting down instance %s on node %s",
6253
                 instance.name, instance.primary_node)
6254

    
6255
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6256
                                             self.op.shutdown_timeout)
6257
    msg = result.fail_msg
6258
    if msg:
6259
      if self.op.ignore_failures:
6260
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6261
      else:
6262
        raise errors.OpExecError("Could not shutdown instance %s on"
6263
                                 " node %s: %s" %
6264
                                 (instance.name, instance.primary_node, msg))
6265

    
6266
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6267

    
6268

    
6269
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6270
  """Utility function to remove an instance.
6271

6272
  """
6273
  logging.info("Removing block devices for instance %s", instance.name)
6274

    
6275
  if not _RemoveDisks(lu, instance):
6276
    if not ignore_failures:
6277
      raise errors.OpExecError("Can't remove instance's disks")
6278
    feedback_fn("Warning: can't remove instance's disks")
6279

    
6280
  logging.info("Removing instance %s out of cluster config", instance.name)
6281

    
6282
  lu.cfg.RemoveInstance(instance.name)
6283

    
6284
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6285
    "Instance lock removal conflict"
6286

    
6287
  # Remove lock for the instance
6288
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6289

    
6290

    
6291
class LUInstanceQuery(NoHooksLU):
6292
  """Logical unit for querying instances.
6293

6294
  """
6295
  # pylint: disable-msg=W0142
6296
  REQ_BGL = False
6297

    
6298
  def CheckArguments(self):
6299
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6300
                             self.op.output_fields, self.op.use_locking)
6301

    
6302
  def ExpandNames(self):
6303
    self.iq.ExpandNames(self)
6304

    
6305
  def DeclareLocks(self, level):
6306
    self.iq.DeclareLocks(self, level)
6307

    
6308
  def Exec(self, feedback_fn):
6309
    return self.iq.OldStyleQuery(self)
6310

    
6311

    
6312
class LUInstanceFailover(LogicalUnit):
6313
  """Failover an instance.
6314

6315
  """
6316
  HPATH = "instance-failover"
6317
  HTYPE = constants.HTYPE_INSTANCE
6318
  REQ_BGL = False
6319

    
6320
  def CheckArguments(self):
6321
    """Check the arguments.
6322

6323
    """
6324
    self.iallocator = getattr(self.op, "iallocator", None)
6325
    self.target_node = getattr(self.op, "target_node", None)
6326

    
6327
  def ExpandNames(self):
6328
    self._ExpandAndLockInstance()
6329

    
6330
    if self.op.target_node is not None:
6331
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6332

    
6333
    self.needed_locks[locking.LEVEL_NODE] = []
6334
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6335

    
6336
    ignore_consistency = self.op.ignore_consistency
6337
    shutdown_timeout = self.op.shutdown_timeout
6338
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6339
                                       cleanup=False,
6340
                                       failover=True,
6341
                                       ignore_consistency=ignore_consistency,
6342
                                       shutdown_timeout=shutdown_timeout)
6343
    self.tasklets = [self._migrater]
6344

    
6345
  def DeclareLocks(self, level):
6346
    if level == locking.LEVEL_NODE:
6347
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6348
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6349
        if self.op.target_node is None:
6350
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6351
        else:
6352
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6353
                                                   self.op.target_node]
6354
        del self.recalculate_locks[locking.LEVEL_NODE]
6355
      else:
6356
        self._LockInstancesNodes()
6357

    
6358
  def BuildHooksEnv(self):
6359
    """Build hooks env.
6360

6361
    This runs on master, primary and secondary nodes of the instance.
6362

6363
    """
6364
    instance = self._migrater.instance
6365
    source_node = instance.primary_node
6366
    target_node = self.op.target_node
6367
    env = {
6368
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6369
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6370
      "OLD_PRIMARY": source_node,
6371
      "NEW_PRIMARY": target_node,
6372
      }
6373

    
6374
    if instance.disk_template in constants.DTS_INT_MIRROR:
6375
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6376
      env["NEW_SECONDARY"] = source_node
6377
    else:
6378
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6379

    
6380
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6381

    
6382
    return env
6383

    
6384
  def BuildHooksNodes(self):
6385
    """Build hooks nodes.
6386

6387
    """
6388
    instance = self._migrater.instance
6389
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6390
    return (nl, nl + [instance.primary_node])
6391

    
6392

    
6393
class LUInstanceMigrate(LogicalUnit):
6394
  """Migrate an instance.
6395

6396
  This is migration without shutting down, compared to the failover,
6397
  which is done with shutdown.
6398

6399
  """
6400
  HPATH = "instance-migrate"
6401
  HTYPE = constants.HTYPE_INSTANCE
6402
  REQ_BGL = False
6403

    
6404
  def ExpandNames(self):
6405
    self._ExpandAndLockInstance()
6406

    
6407
    if self.op.target_node is not None:
6408
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6409

    
6410
    self.needed_locks[locking.LEVEL_NODE] = []
6411
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6412

    
6413
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6414
                                       cleanup=self.op.cleanup,
6415
                                       failover=False,
6416
                                       fallback=self.op.allow_failover)
6417
    self.tasklets = [self._migrater]
6418

    
6419
  def DeclareLocks(self, level):
6420
    if level == locking.LEVEL_NODE:
6421
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6422
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6423
        if self.op.target_node is None:
6424
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6425
        else:
6426
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6427
                                                   self.op.target_node]
6428
        del self.recalculate_locks[locking.LEVEL_NODE]
6429
      else:
6430
        self._LockInstancesNodes()
6431

    
6432
  def BuildHooksEnv(self):
6433
    """Build hooks env.
6434

6435
    This runs on master, primary and secondary nodes of the instance.
6436

6437
    """
6438
    instance = self._migrater.instance
6439
    source_node = instance.primary_node
6440
    target_node = self.op.target_node
6441
    env = _BuildInstanceHookEnvByObject(self, instance)
6442
    env.update({
6443
      "MIGRATE_LIVE": self._migrater.live,
6444
      "MIGRATE_CLEANUP": self.op.cleanup,
6445
      "OLD_PRIMARY": source_node,
6446
      "NEW_PRIMARY": target_node,
6447
      })
6448

    
6449
    if instance.disk_template in constants.DTS_INT_MIRROR:
6450
      env["OLD_SECONDARY"] = target_node
6451
      env["NEW_SECONDARY"] = source_node
6452
    else:
6453
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6454

    
6455
    return env
6456

    
6457
  def BuildHooksNodes(self):
6458
    """Build hooks nodes.
6459

6460
    """
6461
    instance = self._migrater.instance
6462
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6463
    return (nl, nl + [instance.primary_node])
6464

    
6465

    
6466
class LUInstanceMove(LogicalUnit):
6467
  """Move an instance by data-copying.
6468

6469
  """
6470
  HPATH = "instance-move"
6471
  HTYPE = constants.HTYPE_INSTANCE
6472
  REQ_BGL = False
6473

    
6474
  def ExpandNames(self):
6475
    self._ExpandAndLockInstance()
6476
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6477
    self.op.target_node = target_node
6478
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6479
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6480

    
6481
  def DeclareLocks(self, level):
6482
    if level == locking.LEVEL_NODE:
6483
      self._LockInstancesNodes(primary_only=True)
6484

    
6485
  def BuildHooksEnv(self):
6486
    """Build hooks env.
6487

6488
    This runs on master, primary and secondary nodes of the instance.
6489

6490
    """
6491
    env = {
6492
      "TARGET_NODE": self.op.target_node,
6493
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6494
      }
6495
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6496
    return env
6497

    
6498
  def BuildHooksNodes(self):
6499
    """Build hooks nodes.
6500

6501
    """
6502
    nl = [
6503
      self.cfg.GetMasterNode(),
6504
      self.instance.primary_node,
6505
      self.op.target_node,
6506
      ]
6507
    return (nl, nl)
6508

    
6509
  def CheckPrereq(self):
6510
    """Check prerequisites.
6511

6512
    This checks that the instance is in the cluster.
6513

6514
    """
6515
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6516
    assert self.instance is not None, \
6517
      "Cannot retrieve locked instance %s" % self.op.instance_name
6518

    
6519
    node = self.cfg.GetNodeInfo(self.op.target_node)
6520
    assert node is not None, \
6521
      "Cannot retrieve locked node %s" % self.op.target_node
6522

    
6523
    self.target_node = target_node = node.name
6524

    
6525
    if target_node == instance.primary_node:
6526
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6527
                                 (instance.name, target_node),
6528
                                 errors.ECODE_STATE)
6529

    
6530
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6531

    
6532
    for idx, dsk in enumerate(instance.disks):
6533
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6534
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6535
                                   " cannot copy" % idx, errors.ECODE_STATE)
6536

    
6537
    _CheckNodeOnline(self, target_node)
6538
    _CheckNodeNotDrained(self, target_node)
6539
    _CheckNodeVmCapable(self, target_node)
6540

    
6541
    if instance.admin_up:
6542
      # check memory requirements on the secondary node
6543
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6544
                           instance.name, bep[constants.BE_MEMORY],
6545
                           instance.hypervisor)
6546
    else:
6547
      self.LogInfo("Not checking memory on the secondary node as"
6548
                   " instance will not be started")
6549

    
6550
    # check bridge existance
6551
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6552

    
6553
  def Exec(self, feedback_fn):
6554
    """Move an instance.
6555

6556
    The move is done by shutting it down on its present node, copying
6557
    the data over (slow) and starting it on the new node.
6558

6559
    """
6560
    instance = self.instance
6561

    
6562
    source_node = instance.primary_node
6563
    target_node = self.target_node
6564

    
6565
    self.LogInfo("Shutting down instance %s on source node %s",
6566
                 instance.name, source_node)
6567

    
6568
    result = self.rpc.call_instance_shutdown(source_node, instance,
6569
                                             self.op.shutdown_timeout)
6570
    msg = result.fail_msg
6571
    if msg:
6572
      if self.op.ignore_consistency:
6573
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6574
                             " Proceeding anyway. Please make sure node"
6575
                             " %s is down. Error details: %s",
6576
                             instance.name, source_node, source_node, msg)
6577
      else:
6578
        raise errors.OpExecError("Could not shutdown instance %s on"
6579
                                 " node %s: %s" %
6580
                                 (instance.name, source_node, msg))
6581

    
6582
    # create the target disks
6583
    try:
6584
      _CreateDisks(self, instance, target_node=target_node)
6585
    except errors.OpExecError:
6586
      self.LogWarning("Device creation failed, reverting...")
6587
      try:
6588
        _RemoveDisks(self, instance, target_node=target_node)
6589
      finally:
6590
        self.cfg.ReleaseDRBDMinors(instance.name)
6591
        raise
6592

    
6593
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6594

    
6595
    errs = []
6596
    # activate, get path, copy the data over
6597
    for idx, disk in enumerate(instance.disks):
6598
      self.LogInfo("Copying data for disk %d", idx)
6599
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6600
                                               instance.name, True, idx)
6601
      if result.fail_msg:
6602
        self.LogWarning("Can't assemble newly created disk %d: %s",
6603
                        idx, result.fail_msg)
6604
        errs.append(result.fail_msg)
6605
        break
6606
      dev_path = result.payload
6607
      result = self.rpc.call_blockdev_export(source_node, disk,
6608
                                             target_node, dev_path,
6609
                                             cluster_name)
6610
      if result.fail_msg:
6611
        self.LogWarning("Can't copy data over for disk %d: %s",
6612
                        idx, result.fail_msg)
6613
        errs.append(result.fail_msg)
6614
        break
6615

    
6616
    if errs:
6617
      self.LogWarning("Some disks failed to copy, aborting")
6618
      try:
6619
        _RemoveDisks(self, instance, target_node=target_node)
6620
      finally:
6621
        self.cfg.ReleaseDRBDMinors(instance.name)
6622
        raise errors.OpExecError("Errors during disk copy: %s" %
6623
                                 (",".join(errs),))
6624

    
6625
    instance.primary_node = target_node
6626
    self.cfg.Update(instance, feedback_fn)
6627

    
6628
    self.LogInfo("Removing the disks on the original node")
6629
    _RemoveDisks(self, instance, target_node=source_node)
6630

    
6631
    # Only start the instance if it's marked as up
6632
    if instance.admin_up:
6633
      self.LogInfo("Starting instance %s on node %s",
6634
                   instance.name, target_node)
6635

    
6636
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6637
                                           ignore_secondaries=True)
6638
      if not disks_ok:
6639
        _ShutdownInstanceDisks(self, instance)
6640
        raise errors.OpExecError("Can't activate the instance's disks")
6641

    
6642
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6643
      msg = result.fail_msg
6644
      if msg:
6645
        _ShutdownInstanceDisks(self, instance)
6646
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6647
                                 (instance.name, target_node, msg))
6648

    
6649

    
6650
class LUNodeMigrate(LogicalUnit):
6651
  """Migrate all instances from a node.
6652

6653
  """
6654
  HPATH = "node-migrate"
6655
  HTYPE = constants.HTYPE_NODE
6656
  REQ_BGL = False
6657

    
6658
  def CheckArguments(self):
6659
    pass
6660

    
6661
  def ExpandNames(self):
6662
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6663

    
6664
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6665
    self.needed_locks = {
6666
      locking.LEVEL_NODE: [self.op.node_name],
6667
      }
6668

    
6669
  def BuildHooksEnv(self):
6670
    """Build hooks env.
6671

6672
    This runs on the master, the primary and all the secondaries.
6673

6674
    """
6675
    return {
6676
      "NODE_NAME": self.op.node_name,
6677
      }
6678

    
6679
  def BuildHooksNodes(self):
6680
    """Build hooks nodes.
6681

6682
    """
6683
    nl = [self.cfg.GetMasterNode()]
6684
    return (nl, nl)
6685

    
6686
  def CheckPrereq(self):
6687
    pass
6688

    
6689
  def Exec(self, feedback_fn):
6690
    # Prepare jobs for migration instances
6691
    jobs = [
6692
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
6693
                                 mode=self.op.mode,
6694
                                 live=self.op.live,
6695
                                 iallocator=self.op.iallocator,
6696
                                 target_node=self.op.target_node)]
6697
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6698
      ]
6699

    
6700
    # TODO: Run iallocator in this opcode and pass correct placement options to
6701
    # OpInstanceMigrate. Since other jobs can modify the cluster between
6702
    # running the iallocator and the actual migration, a good consistency model
6703
    # will have to be found.
6704

    
6705
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
6706
            frozenset([self.op.node_name]))
6707

    
6708
    return ResultWithJobs(jobs)
6709

    
6710

    
6711
class TLMigrateInstance(Tasklet):
6712
  """Tasklet class for instance migration.
6713

6714
  @type live: boolean
6715
  @ivar live: whether the migration will be done live or non-live;
6716
      this variable is initalized only after CheckPrereq has run
6717
  @type cleanup: boolean
6718
  @ivar cleanup: Wheater we cleanup from a failed migration
6719
  @type iallocator: string
6720
  @ivar iallocator: The iallocator used to determine target_node
6721
  @type target_node: string
6722
  @ivar target_node: If given, the target_node to reallocate the instance to
6723
  @type failover: boolean
6724
  @ivar failover: Whether operation results in failover or migration
6725
  @type fallback: boolean
6726
  @ivar fallback: Whether fallback to failover is allowed if migration not
6727
                  possible
6728
  @type ignore_consistency: boolean
6729
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6730
                            and target node
6731
  @type shutdown_timeout: int
6732
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6733

6734
  """
6735
  def __init__(self, lu, instance_name, cleanup=False,
6736
               failover=False, fallback=False,
6737
               ignore_consistency=False,
6738
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6739
    """Initializes this class.
6740

6741
    """
6742
    Tasklet.__init__(self, lu)
6743

    
6744
    # Parameters
6745
    self.instance_name = instance_name
6746
    self.cleanup = cleanup
6747
    self.live = False # will be overridden later
6748
    self.failover = failover
6749
    self.fallback = fallback
6750
    self.ignore_consistency = ignore_consistency
6751
    self.shutdown_timeout = shutdown_timeout
6752

    
6753
  def CheckPrereq(self):
6754
    """Check prerequisites.
6755

6756
    This checks that the instance is in the cluster.
6757

6758
    """
6759
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6760
    instance = self.cfg.GetInstanceInfo(instance_name)
6761
    assert instance is not None
6762
    self.instance = instance
6763

    
6764
    if (not self.cleanup and not instance.admin_up and not self.failover and
6765
        self.fallback):
6766
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6767
                      " to failover")
6768
      self.failover = True
6769

    
6770
    if instance.disk_template not in constants.DTS_MIRRORED:
6771
      if self.failover:
6772
        text = "failovers"
6773
      else:
6774
        text = "migrations"
6775
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6776
                                 " %s" % (instance.disk_template, text),
6777
                                 errors.ECODE_STATE)
6778

    
6779
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6780
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6781

    
6782
      if self.lu.op.iallocator:
6783
        self._RunAllocator()
6784
      else:
6785
        # We set set self.target_node as it is required by
6786
        # BuildHooksEnv
6787
        self.target_node = self.lu.op.target_node
6788

    
6789
      # self.target_node is already populated, either directly or by the
6790
      # iallocator run
6791
      target_node = self.target_node
6792
      if self.target_node == instance.primary_node:
6793
        raise errors.OpPrereqError("Cannot migrate instance %s"
6794
                                   " to its primary (%s)" %
6795
                                   (instance.name, instance.primary_node))
6796

    
6797
      if len(self.lu.tasklets) == 1:
6798
        # It is safe to release locks only when we're the only tasklet
6799
        # in the LU
6800
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6801
                      keep=[instance.primary_node, self.target_node])
6802

    
6803
    else:
6804
      secondary_nodes = instance.secondary_nodes
6805
      if not secondary_nodes:
6806
        raise errors.ConfigurationError("No secondary node but using"
6807
                                        " %s disk template" %
6808
                                        instance.disk_template)
6809
      target_node = secondary_nodes[0]
6810
      if self.lu.op.iallocator or (self.lu.op.target_node and
6811
                                   self.lu.op.target_node != target_node):
6812
        if self.failover:
6813
          text = "failed over"
6814
        else:
6815
          text = "migrated"
6816
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6817
                                   " be %s to arbitrary nodes"
6818
                                   " (neither an iallocator nor a target"
6819
                                   " node can be passed)" %
6820
                                   (instance.disk_template, text),
6821
                                   errors.ECODE_INVAL)
6822

    
6823
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6824

    
6825
    # check memory requirements on the secondary node
6826
    if not self.failover or instance.admin_up:
6827
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6828
                           instance.name, i_be[constants.BE_MEMORY],
6829
                           instance.hypervisor)
6830
    else:
6831
      self.lu.LogInfo("Not checking memory on the secondary node as"
6832
                      " instance will not be started")
6833

    
6834
    # check bridge existance
6835
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6836

    
6837
    if not self.cleanup:
6838
      _CheckNodeNotDrained(self.lu, target_node)
6839
      if not self.failover:
6840
        result = self.rpc.call_instance_migratable(instance.primary_node,
6841
                                                   instance)
6842
        if result.fail_msg and self.fallback:
6843
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6844
                          " failover")
6845
          self.failover = True
6846
        else:
6847
          result.Raise("Can't migrate, please use failover",
6848
                       prereq=True, ecode=errors.ECODE_STATE)
6849

    
6850
    assert not (self.failover and self.cleanup)
6851

    
6852
    if not self.failover:
6853
      if self.lu.op.live is not None and self.lu.op.mode is not None:
6854
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6855
                                   " parameters are accepted",
6856
                                   errors.ECODE_INVAL)
6857
      if self.lu.op.live is not None:
6858
        if self.lu.op.live:
6859
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
6860
        else:
6861
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6862
        # reset the 'live' parameter to None so that repeated
6863
        # invocations of CheckPrereq do not raise an exception
6864
        self.lu.op.live = None
6865
      elif self.lu.op.mode is None:
6866
        # read the default value from the hypervisor
6867
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6868
                                                skip_globals=False)
6869
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6870

    
6871
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6872
    else:
6873
      # Failover is never live
6874
      self.live = False
6875

    
6876
  def _RunAllocator(self):
6877
    """Run the allocator based on input opcode.
6878

6879
    """
6880
    ial = IAllocator(self.cfg, self.rpc,
6881
                     mode=constants.IALLOCATOR_MODE_RELOC,
6882
                     name=self.instance_name,
6883
                     # TODO See why hail breaks with a single node below
6884
                     relocate_from=[self.instance.primary_node,
6885
                                    self.instance.primary_node],
6886
                     )
6887

    
6888
    ial.Run(self.lu.op.iallocator)
6889

    
6890
    if not ial.success:
6891
      raise errors.OpPrereqError("Can't compute nodes using"
6892
                                 " iallocator '%s': %s" %
6893
                                 (self.lu.op.iallocator, ial.info),
6894
                                 errors.ECODE_NORES)
6895
    if len(ial.result) != ial.required_nodes:
6896
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6897
                                 " of nodes (%s), required %s" %
6898
                                 (self.lu.op.iallocator, len(ial.result),
6899
                                  ial.required_nodes), errors.ECODE_FAULT)
6900
    self.target_node = ial.result[0]
6901
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6902
                 self.instance_name, self.lu.op.iallocator,
6903
                 utils.CommaJoin(ial.result))
6904

    
6905
  def _WaitUntilSync(self):
6906
    """Poll with custom rpc for disk sync.
6907

6908
    This uses our own step-based rpc call.
6909

6910
    """
6911
    self.feedback_fn("* wait until resync is done")
6912
    all_done = False
6913
    while not all_done:
6914
      all_done = True
6915
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6916
                                            self.nodes_ip,
6917
                                            self.instance.disks)
6918
      min_percent = 100
6919
      for node, nres in result.items():
6920
        nres.Raise("Cannot resync disks on node %s" % node)
6921
        node_done, node_percent = nres.payload
6922
        all_done = all_done and node_done
6923
        if node_percent is not None:
6924
          min_percent = min(min_percent, node_percent)
6925
      if not all_done:
6926
        if min_percent < 100:
6927
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6928
        time.sleep(2)
6929

    
6930
  def _EnsureSecondary(self, node):
6931
    """Demote a node to secondary.
6932

6933
    """
6934
    self.feedback_fn("* switching node %s to secondary mode" % node)
6935

    
6936
    for dev in self.instance.disks:
6937
      self.cfg.SetDiskID(dev, node)
6938

    
6939
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6940
                                          self.instance.disks)
6941
    result.Raise("Cannot change disk to secondary on node %s" % node)
6942

    
6943
  def _GoStandalone(self):
6944
    """Disconnect from the network.
6945

6946
    """
6947
    self.feedback_fn("* changing into standalone mode")
6948
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6949
                                               self.instance.disks)
6950
    for node, nres in result.items():
6951
      nres.Raise("Cannot disconnect disks node %s" % node)
6952

    
6953
  def _GoReconnect(self, multimaster):
6954
    """Reconnect to the network.
6955

6956
    """
6957
    if multimaster:
6958
      msg = "dual-master"
6959
    else:
6960
      msg = "single-master"
6961
    self.feedback_fn("* changing disks into %s mode" % msg)
6962
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6963
                                           self.instance.disks,
6964
                                           self.instance.name, multimaster)
6965
    for node, nres in result.items():
6966
      nres.Raise("Cannot change disks config on node %s" % node)
6967

    
6968
  def _ExecCleanup(self):
6969
    """Try to cleanup after a failed migration.
6970

6971
    The cleanup is done by:
6972
      - check that the instance is running only on one node
6973
        (and update the config if needed)
6974
      - change disks on its secondary node to secondary
6975
      - wait until disks are fully synchronized
6976
      - disconnect from the network
6977
      - change disks into single-master mode
6978
      - wait again until disks are fully synchronized
6979

6980
    """
6981
    instance = self.instance
6982
    target_node = self.target_node
6983
    source_node = self.source_node
6984

    
6985
    # check running on only one node
6986
    self.feedback_fn("* checking where the instance actually runs"
6987
                     " (if this hangs, the hypervisor might be in"
6988
                     " a bad state)")
6989
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6990
    for node, result in ins_l.items():
6991
      result.Raise("Can't contact node %s" % node)
6992

    
6993
    runningon_source = instance.name in ins_l[source_node].payload
6994
    runningon_target = instance.name in ins_l[target_node].payload
6995

    
6996
    if runningon_source and runningon_target:
6997
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6998
                               " or the hypervisor is confused; you will have"
6999
                               " to ensure manually that it runs only on one"
7000
                               " and restart this operation")
7001

    
7002
    if not (runningon_source or runningon_target):
7003
      raise errors.OpExecError("Instance does not seem to be running at all;"
7004
                               " in this case it's safer to repair by"
7005
                               " running 'gnt-instance stop' to ensure disk"
7006
                               " shutdown, and then restarting it")
7007

    
7008
    if runningon_target:
7009
      # the migration has actually succeeded, we need to update the config
7010
      self.feedback_fn("* instance running on secondary node (%s),"
7011
                       " updating config" % target_node)
7012
      instance.primary_node = target_node
7013
      self.cfg.Update(instance, self.feedback_fn)
7014
      demoted_node = source_node
7015
    else:
7016
      self.feedback_fn("* instance confirmed to be running on its"
7017
                       " primary node (%s)" % source_node)
7018
      demoted_node = target_node
7019

    
7020
    if instance.disk_template in constants.DTS_INT_MIRROR:
7021
      self._EnsureSecondary(demoted_node)
7022
      try:
7023
        self._WaitUntilSync()
7024
      except errors.OpExecError:
7025
        # we ignore here errors, since if the device is standalone, it
7026
        # won't be able to sync
7027
        pass
7028
      self._GoStandalone()
7029
      self._GoReconnect(False)
7030
      self._WaitUntilSync()
7031

    
7032
    self.feedback_fn("* done")
7033

    
7034
  def _RevertDiskStatus(self):
7035
    """Try to revert the disk status after a failed migration.
7036

7037
    """
7038
    target_node = self.target_node
7039
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7040
      return
7041

    
7042
    try:
7043
      self._EnsureSecondary(target_node)
7044
      self._GoStandalone()
7045
      self._GoReconnect(False)
7046
      self._WaitUntilSync()
7047
    except errors.OpExecError, err:
7048
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7049
                         " please try to recover the instance manually;"
7050
                         " error '%s'" % str(err))
7051

    
7052
  def _AbortMigration(self):
7053
    """Call the hypervisor code to abort a started migration.
7054

7055
    """
7056
    instance = self.instance
7057
    target_node = self.target_node
7058
    migration_info = self.migration_info
7059

    
7060
    abort_result = self.rpc.call_finalize_migration(target_node,
7061
                                                    instance,
7062
                                                    migration_info,
7063
                                                    False)
7064
    abort_msg = abort_result.fail_msg
7065
    if abort_msg:
7066
      logging.error("Aborting migration failed on target node %s: %s",
7067
                    target_node, abort_msg)
7068
      # Don't raise an exception here, as we stil have to try to revert the
7069
      # disk status, even if this step failed.
7070

    
7071
  def _ExecMigration(self):
7072
    """Migrate an instance.
7073

7074
    The migrate is done by:
7075
      - change the disks into dual-master mode
7076
      - wait until disks are fully synchronized again
7077
      - migrate the instance
7078
      - change disks on the new secondary node (the old primary) to secondary
7079
      - wait until disks are fully synchronized
7080
      - change disks into single-master mode
7081

7082
    """
7083
    instance = self.instance
7084
    target_node = self.target_node
7085
    source_node = self.source_node
7086

    
7087
    self.feedback_fn("* checking disk consistency between source and target")
7088
    for dev in instance.disks:
7089
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7090
        raise errors.OpExecError("Disk %s is degraded or not fully"
7091
                                 " synchronized on target node,"
7092
                                 " aborting migration" % dev.iv_name)
7093

    
7094
    # First get the migration information from the remote node
7095
    result = self.rpc.call_migration_info(source_node, instance)
7096
    msg = result.fail_msg
7097
    if msg:
7098
      log_err = ("Failed fetching source migration information from %s: %s" %
7099
                 (source_node, msg))
7100
      logging.error(log_err)
7101
      raise errors.OpExecError(log_err)
7102

    
7103
    self.migration_info = migration_info = result.payload
7104

    
7105
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7106
      # Then switch the disks to master/master mode
7107
      self._EnsureSecondary(target_node)
7108
      self._GoStandalone()
7109
      self._GoReconnect(True)
7110
      self._WaitUntilSync()
7111

    
7112
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7113
    result = self.rpc.call_accept_instance(target_node,
7114
                                           instance,
7115
                                           migration_info,
7116
                                           self.nodes_ip[target_node])
7117

    
7118
    msg = result.fail_msg
7119
    if msg:
7120
      logging.error("Instance pre-migration failed, trying to revert"
7121
                    " disk status: %s", msg)
7122
      self.feedback_fn("Pre-migration failed, aborting")
7123
      self._AbortMigration()
7124
      self._RevertDiskStatus()
7125
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7126
                               (instance.name, msg))
7127

    
7128
    self.feedback_fn("* migrating instance to %s" % target_node)
7129
    result = self.rpc.call_instance_migrate(source_node, instance,
7130
                                            self.nodes_ip[target_node],
7131
                                            self.live)
7132
    msg = result.fail_msg
7133
    if msg:
7134
      logging.error("Instance migration failed, trying to revert"
7135
                    " disk status: %s", msg)
7136
      self.feedback_fn("Migration failed, aborting")
7137
      self._AbortMigration()
7138
      self._RevertDiskStatus()
7139
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7140
                               (instance.name, msg))
7141

    
7142
    instance.primary_node = target_node
7143
    # distribute new instance config to the other nodes
7144
    self.cfg.Update(instance, self.feedback_fn)
7145

    
7146
    result = self.rpc.call_finalize_migration(target_node,
7147
                                              instance,
7148
                                              migration_info,
7149
                                              True)
7150
    msg = result.fail_msg
7151
    if msg:
7152
      logging.error("Instance migration succeeded, but finalization failed:"
7153
                    " %s", msg)
7154
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7155
                               msg)
7156

    
7157
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7158
      self._EnsureSecondary(source_node)
7159
      self._WaitUntilSync()
7160
      self._GoStandalone()
7161
      self._GoReconnect(False)
7162
      self._WaitUntilSync()
7163

    
7164
    self.feedback_fn("* done")
7165

    
7166
  def _ExecFailover(self):
7167
    """Failover an instance.
7168

7169
    The failover is done by shutting it down on its present node and
7170
    starting it on the secondary.
7171

7172
    """
7173
    instance = self.instance
7174
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7175

    
7176
    source_node = instance.primary_node
7177
    target_node = self.target_node
7178

    
7179
    if instance.admin_up:
7180
      self.feedback_fn("* checking disk consistency between source and target")
7181
      for dev in instance.disks:
7182
        # for drbd, these are drbd over lvm
7183
        if not _CheckDiskConsistency(self, dev, target_node, False):
7184
          if not self.ignore_consistency:
7185
            raise errors.OpExecError("Disk %s is degraded on target node,"
7186
                                     " aborting failover" % dev.iv_name)
7187
    else:
7188
      self.feedback_fn("* not checking disk consistency as instance is not"
7189
                       " running")
7190

    
7191
    self.feedback_fn("* shutting down instance on source node")
7192
    logging.info("Shutting down instance %s on node %s",
7193
                 instance.name, source_node)
7194

    
7195
    result = self.rpc.call_instance_shutdown(source_node, instance,
7196
                                             self.shutdown_timeout)
7197
    msg = result.fail_msg
7198
    if msg:
7199
      if self.ignore_consistency or primary_node.offline:
7200
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7201
                           " proceeding anyway; please make sure node"
7202
                           " %s is down; error details: %s",
7203
                           instance.name, source_node, source_node, msg)
7204
      else:
7205
        raise errors.OpExecError("Could not shutdown instance %s on"
7206
                                 " node %s: %s" %
7207
                                 (instance.name, source_node, msg))
7208

    
7209
    self.feedback_fn("* deactivating the instance's disks on source node")
7210
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
7211
      raise errors.OpExecError("Can't shut down the instance's disks.")
7212

    
7213
    instance.primary_node = target_node
7214
    # distribute new instance config to the other nodes
7215
    self.cfg.Update(instance, self.feedback_fn)
7216

    
7217
    # Only start the instance if it's marked as up
7218
    if instance.admin_up:
7219
      self.feedback_fn("* activating the instance's disks on target node")
7220
      logging.info("Starting instance %s on node %s",
7221
                   instance.name, target_node)
7222

    
7223
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7224
                                           ignore_secondaries=True)
7225
      if not disks_ok:
7226
        _ShutdownInstanceDisks(self, instance)
7227
        raise errors.OpExecError("Can't activate the instance's disks")
7228

    
7229
      self.feedback_fn("* starting the instance on the target node")
7230
      result = self.rpc.call_instance_start(target_node, instance, None, None)
7231
      msg = result.fail_msg
7232
      if msg:
7233
        _ShutdownInstanceDisks(self, instance)
7234
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7235
                                 (instance.name, target_node, msg))
7236

    
7237
  def Exec(self, feedback_fn):
7238
    """Perform the migration.
7239

7240
    """
7241
    self.feedback_fn = feedback_fn
7242
    self.source_node = self.instance.primary_node
7243

    
7244
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7245
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7246
      self.target_node = self.instance.secondary_nodes[0]
7247
      # Otherwise self.target_node has been populated either
7248
      # directly, or through an iallocator.
7249

    
7250
    self.all_nodes = [self.source_node, self.target_node]
7251
    self.nodes_ip = {
7252
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
7253
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
7254
      }
7255

    
7256
    if self.failover:
7257
      feedback_fn("Failover instance %s" % self.instance.name)
7258
      self._ExecFailover()
7259
    else:
7260
      feedback_fn("Migrating instance %s" % self.instance.name)
7261

    
7262
      if self.cleanup:
7263
        return self._ExecCleanup()
7264
      else:
7265
        return self._ExecMigration()
7266

    
7267

    
7268
def _CreateBlockDev(lu, node, instance, device, force_create,
7269
                    info, force_open):
7270
  """Create a tree of block devices on a given node.
7271

7272
  If this device type has to be created on secondaries, create it and
7273
  all its children.
7274

7275
  If not, just recurse to children keeping the same 'force' value.
7276

7277
  @param lu: the lu on whose behalf we execute
7278
  @param node: the node on which to create the device
7279
  @type instance: L{objects.Instance}
7280
  @param instance: the instance which owns the device
7281
  @type device: L{objects.Disk}
7282
  @param device: the device to create
7283
  @type force_create: boolean
7284
  @param force_create: whether to force creation of this device; this
7285
      will be change to True whenever we find a device which has
7286
      CreateOnSecondary() attribute
7287
  @param info: the extra 'metadata' we should attach to the device
7288
      (this will be represented as a LVM tag)
7289
  @type force_open: boolean
7290
  @param force_open: this parameter will be passes to the
7291
      L{backend.BlockdevCreate} function where it specifies
7292
      whether we run on primary or not, and it affects both
7293
      the child assembly and the device own Open() execution
7294

7295
  """
7296
  if device.CreateOnSecondary():
7297
    force_create = True
7298

    
7299
  if device.children:
7300
    for child in device.children:
7301
      _CreateBlockDev(lu, node, instance, child, force_create,
7302
                      info, force_open)
7303

    
7304
  if not force_create:
7305
    return
7306

    
7307
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7308

    
7309

    
7310
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7311
  """Create a single block device on a given node.
7312

7313
  This will not recurse over children of the device, so they must be
7314
  created in advance.
7315

7316
  @param lu: the lu on whose behalf we execute
7317
  @param node: the node on which to create the device
7318
  @type instance: L{objects.Instance}
7319
  @param instance: the instance which owns the device
7320
  @type device: L{objects.Disk}
7321
  @param device: the device to create
7322
  @param info: the extra 'metadata' we should attach to the device
7323
      (this will be represented as a LVM tag)
7324
  @type force_open: boolean
7325
  @param force_open: this parameter will be passes to the
7326
      L{backend.BlockdevCreate} function where it specifies
7327
      whether we run on primary or not, and it affects both
7328
      the child assembly and the device own Open() execution
7329

7330
  """
7331
  lu.cfg.SetDiskID(device, node)
7332
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7333
                                       instance.name, force_open, info)
7334
  result.Raise("Can't create block device %s on"
7335
               " node %s for instance %s" % (device, node, instance.name))
7336
  if device.physical_id is None:
7337
    device.physical_id = result.payload
7338

    
7339

    
7340
def _GenerateUniqueNames(lu, exts):
7341
  """Generate a suitable LV name.
7342

7343
  This will generate a logical volume name for the given instance.
7344

7345
  """
7346
  results = []
7347
  for val in exts:
7348
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7349
    results.append("%s%s" % (new_id, val))
7350
  return results
7351

    
7352

    
7353
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7354
                         iv_name, p_minor, s_minor):
7355
  """Generate a drbd8 device complete with its children.
7356

7357
  """
7358
  assert len(vgnames) == len(names) == 2
7359
  port = lu.cfg.AllocatePort()
7360
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7361
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7362
                          logical_id=(vgnames[0], names[0]))
7363
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7364
                          logical_id=(vgnames[1], names[1]))
7365
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7366
                          logical_id=(primary, secondary, port,
7367
                                      p_minor, s_minor,
7368
                                      shared_secret),
7369
                          children=[dev_data, dev_meta],
7370
                          iv_name=iv_name)
7371
  return drbd_dev
7372

    
7373

    
7374
def _GenerateDiskTemplate(lu, template_name,
7375
                          instance_name, primary_node,
7376
                          secondary_nodes, disk_info,
7377
                          file_storage_dir, file_driver,
7378
                          base_index, feedback_fn):
7379
  """Generate the entire disk layout for a given template type.
7380

7381
  """
7382
  #TODO: compute space requirements
7383

    
7384
  vgname = lu.cfg.GetVGName()
7385
  disk_count = len(disk_info)
7386
  disks = []
7387
  if template_name == constants.DT_DISKLESS:
7388
    pass
7389
  elif template_name == constants.DT_PLAIN:
7390
    if len(secondary_nodes) != 0:
7391
      raise errors.ProgrammerError("Wrong template configuration")
7392

    
7393
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7394
                                      for i in range(disk_count)])
7395
    for idx, disk in enumerate(disk_info):
7396
      disk_index = idx + base_index
7397
      vg = disk.get(constants.IDISK_VG, vgname)
7398
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7399
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7400
                              size=disk[constants.IDISK_SIZE],
7401
                              logical_id=(vg, names[idx]),
7402
                              iv_name="disk/%d" % disk_index,
7403
                              mode=disk[constants.IDISK_MODE])
7404
      disks.append(disk_dev)
7405
  elif template_name == constants.DT_DRBD8:
7406
    if len(secondary_nodes) != 1:
7407
      raise errors.ProgrammerError("Wrong template configuration")
7408
    remote_node = secondary_nodes[0]
7409
    minors = lu.cfg.AllocateDRBDMinor(
7410
      [primary_node, remote_node] * len(disk_info), instance_name)
7411

    
7412
    names = []
7413
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7414
                                               for i in range(disk_count)]):
7415
      names.append(lv_prefix + "_data")
7416
      names.append(lv_prefix + "_meta")
7417
    for idx, disk in enumerate(disk_info):
7418
      disk_index = idx + base_index
7419
      data_vg = disk.get(constants.IDISK_VG, vgname)
7420
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7421
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7422
                                      disk[constants.IDISK_SIZE],
7423
                                      [data_vg, meta_vg],
7424
                                      names[idx * 2:idx * 2 + 2],
7425
                                      "disk/%d" % disk_index,
7426
                                      minors[idx * 2], minors[idx * 2 + 1])
7427
      disk_dev.mode = disk[constants.IDISK_MODE]
7428
      disks.append(disk_dev)
7429
  elif template_name == constants.DT_FILE:
7430
    if len(secondary_nodes) != 0:
7431
      raise errors.ProgrammerError("Wrong template configuration")
7432

    
7433
    opcodes.RequireFileStorage()
7434

    
7435
    for idx, disk in enumerate(disk_info):
7436
      disk_index = idx + base_index
7437
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7438
                              size=disk[constants.IDISK_SIZE],
7439
                              iv_name="disk/%d" % disk_index,
7440
                              logical_id=(file_driver,
7441
                                          "%s/disk%d" % (file_storage_dir,
7442
                                                         disk_index)),
7443
                              mode=disk[constants.IDISK_MODE])
7444
      disks.append(disk_dev)
7445
  elif template_name == constants.DT_SHARED_FILE:
7446
    if len(secondary_nodes) != 0:
7447
      raise errors.ProgrammerError("Wrong template configuration")
7448

    
7449
    opcodes.RequireSharedFileStorage()
7450

    
7451
    for idx, disk in enumerate(disk_info):
7452
      disk_index = idx + base_index
7453
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7454
                              size=disk[constants.IDISK_SIZE],
7455
                              iv_name="disk/%d" % disk_index,
7456
                              logical_id=(file_driver,
7457
                                          "%s/disk%d" % (file_storage_dir,
7458
                                                         disk_index)),
7459
                              mode=disk[constants.IDISK_MODE])
7460
      disks.append(disk_dev)
7461
  elif template_name == constants.DT_BLOCK:
7462
    if len(secondary_nodes) != 0:
7463
      raise errors.ProgrammerError("Wrong template configuration")
7464

    
7465
    for idx, disk in enumerate(disk_info):
7466
      disk_index = idx + base_index
7467
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7468
                              size=disk[constants.IDISK_SIZE],
7469
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7470
                                          disk[constants.IDISK_ADOPT]),
7471
                              iv_name="disk/%d" % disk_index,
7472
                              mode=disk[constants.IDISK_MODE])
7473
      disks.append(disk_dev)
7474

    
7475
  else:
7476
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7477
  return disks
7478

    
7479

    
7480
def _GetInstanceInfoText(instance):
7481
  """Compute that text that should be added to the disk's metadata.
7482

7483
  """
7484
  return "originstname+%s" % instance.name
7485

    
7486

    
7487
def _CalcEta(time_taken, written, total_size):
7488
  """Calculates the ETA based on size written and total size.
7489

7490
  @param time_taken: The time taken so far
7491
  @param written: amount written so far
7492
  @param total_size: The total size of data to be written
7493
  @return: The remaining time in seconds
7494

7495
  """
7496
  avg_time = time_taken / float(written)
7497
  return (total_size - written) * avg_time
7498

    
7499

    
7500
def _WipeDisks(lu, instance):
7501
  """Wipes instance disks.
7502

7503
  @type lu: L{LogicalUnit}
7504
  @param lu: the logical unit on whose behalf we execute
7505
  @type instance: L{objects.Instance}
7506
  @param instance: the instance whose disks we should create
7507
  @return: the success of the wipe
7508

7509
  """
7510
  node = instance.primary_node
7511

    
7512
  for device in instance.disks:
7513
    lu.cfg.SetDiskID(device, node)
7514

    
7515
  logging.info("Pause sync of instance %s disks", instance.name)
7516
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7517

    
7518
  for idx, success in enumerate(result.payload):
7519
    if not success:
7520
      logging.warn("pause-sync of instance %s for disks %d failed",
7521
                   instance.name, idx)
7522

    
7523
  try:
7524
    for idx, device in enumerate(instance.disks):
7525
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7526
      # MAX_WIPE_CHUNK at max
7527
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7528
                            constants.MIN_WIPE_CHUNK_PERCENT)
7529
      # we _must_ make this an int, otherwise rounding errors will
7530
      # occur
7531
      wipe_chunk_size = int(wipe_chunk_size)
7532

    
7533
      lu.LogInfo("* Wiping disk %d", idx)
7534
      logging.info("Wiping disk %d for instance %s, node %s using"
7535
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7536

    
7537
      offset = 0
7538
      size = device.size
7539
      last_output = 0
7540
      start_time = time.time()
7541

    
7542
      while offset < size:
7543
        wipe_size = min(wipe_chunk_size, size - offset)
7544
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7545
                      idx, offset, wipe_size)
7546
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7547
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7548
                     (idx, offset, wipe_size))
7549
        now = time.time()
7550
        offset += wipe_size
7551
        if now - last_output >= 60:
7552
          eta = _CalcEta(now - start_time, offset, size)
7553
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7554
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7555
          last_output = now
7556
  finally:
7557
    logging.info("Resume sync of instance %s disks", instance.name)
7558

    
7559
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7560

    
7561
    for idx, success in enumerate(result.payload):
7562
      if not success:
7563
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7564
                      " look at the status and troubleshoot the issue", idx)
7565
        logging.warn("resume-sync of instance %s for disks %d failed",
7566
                     instance.name, idx)
7567

    
7568

    
7569
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7570
  """Create all disks for an instance.
7571

7572
  This abstracts away some work from AddInstance.
7573

7574
  @type lu: L{LogicalUnit}
7575
  @param lu: the logical unit on whose behalf we execute
7576
  @type instance: L{objects.Instance}
7577
  @param instance: the instance whose disks we should create
7578
  @type to_skip: list
7579
  @param to_skip: list of indices to skip
7580
  @type target_node: string
7581
  @param target_node: if passed, overrides the target node for creation
7582
  @rtype: boolean
7583
  @return: the success of the creation
7584

7585
  """
7586
  info = _GetInstanceInfoText(instance)
7587
  if target_node is None:
7588
    pnode = instance.primary_node
7589
    all_nodes = instance.all_nodes
7590
  else:
7591
    pnode = target_node
7592
    all_nodes = [pnode]
7593

    
7594
  if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7595
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7596
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7597

    
7598
    result.Raise("Failed to create directory '%s' on"
7599
                 " node %s" % (file_storage_dir, pnode))
7600

    
7601
  # Note: this needs to be kept in sync with adding of disks in
7602
  # LUInstanceSetParams
7603
  for idx, device in enumerate(instance.disks):
7604
    if to_skip and idx in to_skip:
7605
      continue
7606
    logging.info("Creating volume %s for instance %s",
7607
                 device.iv_name, instance.name)
7608
    #HARDCODE
7609
    for node in all_nodes:
7610
      f_create = node == pnode
7611
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7612

    
7613

    
7614
def _RemoveDisks(lu, instance, target_node=None):
7615
  """Remove all disks for an instance.
7616

7617
  This abstracts away some work from `AddInstance()` and
7618
  `RemoveInstance()`. Note that in case some of the devices couldn't
7619
  be removed, the removal will continue with the other ones (compare
7620
  with `_CreateDisks()`).
7621

7622
  @type lu: L{LogicalUnit}
7623
  @param lu: the logical unit on whose behalf we execute
7624
  @type instance: L{objects.Instance}
7625
  @param instance: the instance whose disks we should remove
7626
  @type target_node: string
7627
  @param target_node: used to override the node on which to remove the disks
7628
  @rtype: boolean
7629
  @return: the success of the removal
7630

7631
  """
7632
  logging.info("Removing block devices for instance %s", instance.name)
7633

    
7634
  all_result = True
7635
  for device in instance.disks:
7636
    if target_node:
7637
      edata = [(target_node, device)]
7638
    else:
7639
      edata = device.ComputeNodeTree(instance.primary_node)
7640
    for node, disk in edata:
7641
      lu.cfg.SetDiskID(disk, node)
7642
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7643
      if msg:
7644
        lu.LogWarning("Could not remove block device %s on node %s,"
7645
                      " continuing anyway: %s", device.iv_name, node, msg)
7646
        all_result = False
7647

    
7648
  if instance.disk_template == constants.DT_FILE:
7649
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7650
    if target_node:
7651
      tgt = target_node
7652
    else:
7653
      tgt = instance.primary_node
7654
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7655
    if result.fail_msg:
7656
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7657
                    file_storage_dir, instance.primary_node, result.fail_msg)
7658
      all_result = False
7659

    
7660
  return all_result
7661

    
7662

    
7663
def _ComputeDiskSizePerVG(disk_template, disks):
7664
  """Compute disk size requirements in the volume group
7665

7666
  """
7667
  def _compute(disks, payload):
7668
    """Universal algorithm.
7669

7670
    """
7671
    vgs = {}
7672
    for disk in disks:
7673
      vgs[disk[constants.IDISK_VG]] = \
7674
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7675

    
7676
    return vgs
7677

    
7678
  # Required free disk space as a function of disk and swap space
7679
  req_size_dict = {
7680
    constants.DT_DISKLESS: {},
7681
    constants.DT_PLAIN: _compute(disks, 0),
7682
    # 128 MB are added for drbd metadata for each disk
7683
    constants.DT_DRBD8: _compute(disks, 128),
7684
    constants.DT_FILE: {},
7685
    constants.DT_SHARED_FILE: {},
7686
  }
7687

    
7688
  if disk_template not in req_size_dict:
7689
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7690
                                 " is unknown" %  disk_template)
7691

    
7692
  return req_size_dict[disk_template]
7693

    
7694

    
7695
def _ComputeDiskSize(disk_template, disks):
7696
  """Compute disk size requirements in the volume group
7697

7698
  """
7699
  # Required free disk space as a function of disk and swap space
7700
  req_size_dict = {
7701
    constants.DT_DISKLESS: None,
7702
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7703
    # 128 MB are added for drbd metadata for each disk
7704
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7705
    constants.DT_FILE: None,
7706
    constants.DT_SHARED_FILE: 0,
7707
    constants.DT_BLOCK: 0,
7708
  }
7709

    
7710
  if disk_template not in req_size_dict:
7711
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7712
                                 " is unknown" %  disk_template)
7713

    
7714
  return req_size_dict[disk_template]
7715

    
7716

    
7717
def _FilterVmNodes(lu, nodenames):
7718
  """Filters out non-vm_capable nodes from a list.
7719

7720
  @type lu: L{LogicalUnit}
7721
  @param lu: the logical unit for which we check
7722
  @type nodenames: list
7723
  @param nodenames: the list of nodes on which we should check
7724
  @rtype: list
7725
  @return: the list of vm-capable nodes
7726

7727
  """
7728
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7729
  return [name for name in nodenames if name not in vm_nodes]
7730

    
7731

    
7732
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7733
  """Hypervisor parameter validation.
7734

7735
  This function abstract the hypervisor parameter validation to be
7736
  used in both instance create and instance modify.
7737

7738
  @type lu: L{LogicalUnit}
7739
  @param lu: the logical unit for which we check
7740
  @type nodenames: list
7741
  @param nodenames: the list of nodes on which we should check
7742
  @type hvname: string
7743
  @param hvname: the name of the hypervisor we should use
7744
  @type hvparams: dict
7745
  @param hvparams: the parameters which we need to check
7746
  @raise errors.OpPrereqError: if the parameters are not valid
7747

7748
  """
7749
  nodenames = _FilterVmNodes(lu, nodenames)
7750
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7751
                                                  hvname,
7752
                                                  hvparams)
7753
  for node in nodenames:
7754
    info = hvinfo[node]
7755
    if info.offline:
7756
      continue
7757
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7758

    
7759

    
7760
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7761
  """OS parameters validation.
7762

7763
  @type lu: L{LogicalUnit}
7764
  @param lu: the logical unit for which we check
7765
  @type required: boolean
7766
  @param required: whether the validation should fail if the OS is not
7767
      found
7768
  @type nodenames: list
7769
  @param nodenames: the list of nodes on which we should check
7770
  @type osname: string
7771
  @param osname: the name of the hypervisor we should use
7772
  @type osparams: dict
7773
  @param osparams: the parameters which we need to check
7774
  @raise errors.OpPrereqError: if the parameters are not valid
7775

7776
  """
7777
  nodenames = _FilterVmNodes(lu, nodenames)
7778
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7779
                                   [constants.OS_VALIDATE_PARAMETERS],
7780
                                   osparams)
7781
  for node, nres in result.items():
7782
    # we don't check for offline cases since this should be run only
7783
    # against the master node and/or an instance's nodes
7784
    nres.Raise("OS Parameters validation failed on node %s" % node)
7785
    if not nres.payload:
7786
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7787
                 osname, node)
7788

    
7789

    
7790
class LUInstanceCreate(LogicalUnit):
7791
  """Create an instance.
7792

7793
  """
7794
  HPATH = "instance-add"
7795
  HTYPE = constants.HTYPE_INSTANCE
7796
  REQ_BGL = False
7797

    
7798
  def CheckArguments(self):
7799
    """Check arguments.
7800

7801
    """
7802
    # do not require name_check to ease forward/backward compatibility
7803
    # for tools
7804
    if self.op.no_install and self.op.start:
7805
      self.LogInfo("No-installation mode selected, disabling startup")
7806
      self.op.start = False
7807
    # validate/normalize the instance name
7808
    self.op.instance_name = \
7809
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7810

    
7811
    if self.op.ip_check and not self.op.name_check:
7812
      # TODO: make the ip check more flexible and not depend on the name check
7813
      raise errors.OpPrereqError("Cannot do IP address check without a name"
7814
                                 " check", errors.ECODE_INVAL)
7815

    
7816
    # check nics' parameter names
7817
    for nic in self.op.nics:
7818
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7819

    
7820
    # check disks. parameter names and consistent adopt/no-adopt strategy
7821
    has_adopt = has_no_adopt = False
7822
    for disk in self.op.disks:
7823
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7824
      if constants.IDISK_ADOPT in disk:
7825
        has_adopt = True
7826
      else:
7827
        has_no_adopt = True
7828
    if has_adopt and has_no_adopt:
7829
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7830
                                 errors.ECODE_INVAL)
7831
    if has_adopt:
7832
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7833
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7834
                                   " '%s' disk template" %
7835
                                   self.op.disk_template,
7836
                                   errors.ECODE_INVAL)
7837
      if self.op.iallocator is not None:
7838
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7839
                                   " iallocator script", errors.ECODE_INVAL)
7840
      if self.op.mode == constants.INSTANCE_IMPORT:
7841
        raise errors.OpPrereqError("Disk adoption not allowed for"
7842
                                   " instance import", errors.ECODE_INVAL)
7843
    else:
7844
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
7845
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7846
                                   " but no 'adopt' parameter given" %
7847
                                   self.op.disk_template,
7848
                                   errors.ECODE_INVAL)
7849

    
7850
    self.adopt_disks = has_adopt
7851

    
7852
    # instance name verification
7853
    if self.op.name_check:
7854
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7855
      self.op.instance_name = self.hostname1.name
7856
      # used in CheckPrereq for ip ping check
7857
      self.check_ip = self.hostname1.ip
7858
    else:
7859
      self.check_ip = None
7860

    
7861
    # file storage checks
7862
    if (self.op.file_driver and
7863
        not self.op.file_driver in constants.FILE_DRIVER):
7864
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7865
                                 self.op.file_driver, errors.ECODE_INVAL)
7866

    
7867
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7868
      raise errors.OpPrereqError("File storage directory path not absolute",
7869
                                 errors.ECODE_INVAL)
7870

    
7871
    ### Node/iallocator related checks
7872
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7873

    
7874
    if self.op.pnode is not None:
7875
      if self.op.disk_template in constants.DTS_INT_MIRROR:
7876
        if self.op.snode is None:
7877
          raise errors.OpPrereqError("The networked disk templates need"
7878
                                     " a mirror node", errors.ECODE_INVAL)
7879
      elif self.op.snode:
7880
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7881
                        " template")
7882
        self.op.snode = None
7883

    
7884
    self._cds = _GetClusterDomainSecret()
7885

    
7886
    if self.op.mode == constants.INSTANCE_IMPORT:
7887
      # On import force_variant must be True, because if we forced it at
7888
      # initial install, our only chance when importing it back is that it
7889
      # works again!
7890
      self.op.force_variant = True
7891

    
7892
      if self.op.no_install:
7893
        self.LogInfo("No-installation mode has no effect during import")
7894

    
7895
    elif self.op.mode == constants.INSTANCE_CREATE:
7896
      if self.op.os_type is None:
7897
        raise errors.OpPrereqError("No guest OS specified",
7898
                                   errors.ECODE_INVAL)
7899
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7900
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7901
                                   " installation" % self.op.os_type,
7902
                                   errors.ECODE_STATE)
7903
      if self.op.disk_template is None:
7904
        raise errors.OpPrereqError("No disk template specified",
7905
                                   errors.ECODE_INVAL)
7906

    
7907
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7908
      # Check handshake to ensure both clusters have the same domain secret
7909
      src_handshake = self.op.source_handshake
7910
      if not src_handshake:
7911
        raise errors.OpPrereqError("Missing source handshake",
7912
                                   errors.ECODE_INVAL)
7913

    
7914
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7915
                                                           src_handshake)
7916
      if errmsg:
7917
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7918
                                   errors.ECODE_INVAL)
7919

    
7920
      # Load and check source CA
7921
      self.source_x509_ca_pem = self.op.source_x509_ca
7922
      if not self.source_x509_ca_pem:
7923
        raise errors.OpPrereqError("Missing source X509 CA",
7924
                                   errors.ECODE_INVAL)
7925

    
7926
      try:
7927
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7928
                                                    self._cds)
7929
      except OpenSSL.crypto.Error, err:
7930
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7931
                                   (err, ), errors.ECODE_INVAL)
7932

    
7933
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7934
      if errcode is not None:
7935
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7936
                                   errors.ECODE_INVAL)
7937

    
7938
      self.source_x509_ca = cert
7939

    
7940
      src_instance_name = self.op.source_instance_name
7941
      if not src_instance_name:
7942
        raise errors.OpPrereqError("Missing source instance name",
7943
                                   errors.ECODE_INVAL)
7944

    
7945
      self.source_instance_name = \
7946
          netutils.GetHostname(name=src_instance_name).name
7947

    
7948
    else:
7949
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
7950
                                 self.op.mode, errors.ECODE_INVAL)
7951

    
7952
  def ExpandNames(self):
7953
    """ExpandNames for CreateInstance.
7954

7955
    Figure out the right locks for instance creation.
7956

7957
    """
7958
    self.needed_locks = {}
7959

    
7960
    instance_name = self.op.instance_name
7961
    # this is just a preventive check, but someone might still add this
7962
    # instance in the meantime, and creation will fail at lock-add time
7963
    if instance_name in self.cfg.GetInstanceList():
7964
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7965
                                 instance_name, errors.ECODE_EXISTS)
7966

    
7967
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7968

    
7969
    if self.op.iallocator:
7970
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7971
    else:
7972
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7973
      nodelist = [self.op.pnode]
7974
      if self.op.snode is not None:
7975
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7976
        nodelist.append(self.op.snode)
7977
      self.needed_locks[locking.LEVEL_NODE] = nodelist
7978

    
7979
    # in case of import lock the source node too
7980
    if self.op.mode == constants.INSTANCE_IMPORT:
7981
      src_node = self.op.src_node
7982
      src_path = self.op.src_path
7983

    
7984
      if src_path is None:
7985
        self.op.src_path = src_path = self.op.instance_name
7986

    
7987
      if src_node is None:
7988
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7989
        self.op.src_node = None
7990
        if os.path.isabs(src_path):
7991
          raise errors.OpPrereqError("Importing an instance from an absolute"
7992
                                     " path requires a source node option",
7993
                                     errors.ECODE_INVAL)
7994
      else:
7995
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7996
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7997
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7998
        if not os.path.isabs(src_path):
7999
          self.op.src_path = src_path = \
8000
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8001

    
8002
  def _RunAllocator(self):
8003
    """Run the allocator based on input opcode.
8004

8005
    """
8006
    nics = [n.ToDict() for n in self.nics]
8007
    ial = IAllocator(self.cfg, self.rpc,
8008
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8009
                     name=self.op.instance_name,
8010
                     disk_template=self.op.disk_template,
8011
                     tags=self.op.tags,
8012
                     os=self.op.os_type,
8013
                     vcpus=self.be_full[constants.BE_VCPUS],
8014
                     memory=self.be_full[constants.BE_MEMORY],
8015
                     disks=self.disks,
8016
                     nics=nics,
8017
                     hypervisor=self.op.hypervisor,
8018
                     )
8019

    
8020
    ial.Run(self.op.iallocator)
8021

    
8022
    if not ial.success:
8023
      raise errors.OpPrereqError("Can't compute nodes using"
8024
                                 " iallocator '%s': %s" %
8025
                                 (self.op.iallocator, ial.info),
8026
                                 errors.ECODE_NORES)
8027
    if len(ial.result) != ial.required_nodes:
8028
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8029
                                 " of nodes (%s), required %s" %
8030
                                 (self.op.iallocator, len(ial.result),
8031
                                  ial.required_nodes), errors.ECODE_FAULT)
8032
    self.op.pnode = ial.result[0]
8033
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8034
                 self.op.instance_name, self.op.iallocator,
8035
                 utils.CommaJoin(ial.result))
8036
    if ial.required_nodes == 2:
8037
      self.op.snode = ial.result[1]
8038

    
8039
  def BuildHooksEnv(self):
8040
    """Build hooks env.
8041

8042
    This runs on master, primary and secondary nodes of the instance.
8043

8044
    """
8045
    env = {
8046
      "ADD_MODE": self.op.mode,
8047
      }
8048
    if self.op.mode == constants.INSTANCE_IMPORT:
8049
      env["SRC_NODE"] = self.op.src_node
8050
      env["SRC_PATH"] = self.op.src_path
8051
      env["SRC_IMAGES"] = self.src_images
8052

    
8053
    env.update(_BuildInstanceHookEnv(
8054
      name=self.op.instance_name,
8055
      primary_node=self.op.pnode,
8056
      secondary_nodes=self.secondaries,
8057
      status=self.op.start,
8058
      os_type=self.op.os_type,
8059
      memory=self.be_full[constants.BE_MEMORY],
8060
      vcpus=self.be_full[constants.BE_VCPUS],
8061
      nics=_NICListToTuple(self, self.nics),
8062
      disk_template=self.op.disk_template,
8063
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8064
             for d in self.disks],
8065
      bep=self.be_full,
8066
      hvp=self.hv_full,
8067
      hypervisor_name=self.op.hypervisor,
8068
      tags=self.op.tags,
8069
    ))
8070

    
8071
    return env
8072

    
8073
  def BuildHooksNodes(self):
8074
    """Build hooks nodes.
8075

8076
    """
8077
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8078
    return nl, nl
8079

    
8080
  def _ReadExportInfo(self):
8081
    """Reads the export information from disk.
8082

8083
    It will override the opcode source node and path with the actual
8084
    information, if these two were not specified before.
8085

8086
    @return: the export information
8087

8088
    """
8089
    assert self.op.mode == constants.INSTANCE_IMPORT
8090

    
8091
    src_node = self.op.src_node
8092
    src_path = self.op.src_path
8093

    
8094
    if src_node is None:
8095
      locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
8096
      exp_list = self.rpc.call_export_list(locked_nodes)
8097
      found = False
8098
      for node in exp_list:
8099
        if exp_list[node].fail_msg:
8100
          continue
8101
        if src_path in exp_list[node].payload:
8102
          found = True
8103
          self.op.src_node = src_node = node
8104
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8105
                                                       src_path)
8106
          break
8107
      if not found:
8108
        raise errors.OpPrereqError("No export found for relative path %s" %
8109
                                    src_path, errors.ECODE_INVAL)
8110

    
8111
    _CheckNodeOnline(self, src_node)
8112
    result = self.rpc.call_export_info(src_node, src_path)
8113
    result.Raise("No export or invalid export found in dir %s" % src_path)
8114

    
8115
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8116
    if not export_info.has_section(constants.INISECT_EXP):
8117
      raise errors.ProgrammerError("Corrupted export config",
8118
                                   errors.ECODE_ENVIRON)
8119

    
8120
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8121
    if (int(ei_version) != constants.EXPORT_VERSION):
8122
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8123
                                 (ei_version, constants.EXPORT_VERSION),
8124
                                 errors.ECODE_ENVIRON)
8125
    return export_info
8126

    
8127
  def _ReadExportParams(self, einfo):
8128
    """Use export parameters as defaults.
8129

8130
    In case the opcode doesn't specify (as in override) some instance
8131
    parameters, then try to use them from the export information, if
8132
    that declares them.
8133

8134
    """
8135
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8136

    
8137
    if self.op.disk_template is None:
8138
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8139
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8140
                                          "disk_template")
8141
      else:
8142
        raise errors.OpPrereqError("No disk template specified and the export"
8143
                                   " is missing the disk_template information",
8144
                                   errors.ECODE_INVAL)
8145

    
8146
    if not self.op.disks:
8147
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8148
        disks = []
8149
        # TODO: import the disk iv_name too
8150
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8151
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8152
          disks.append({constants.IDISK_SIZE: disk_sz})
8153
        self.op.disks = disks
8154
      else:
8155
        raise errors.OpPrereqError("No disk info specified and the export"
8156
                                   " is missing the disk information",
8157
                                   errors.ECODE_INVAL)
8158

    
8159
    if (not self.op.nics and
8160
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8161
      nics = []
8162
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8163
        ndict = {}
8164
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8165
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8166
          ndict[name] = v
8167
        nics.append(ndict)
8168
      self.op.nics = nics
8169

    
8170
    if (self.op.hypervisor is None and
8171
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8172
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8173
    if einfo.has_section(constants.INISECT_HYP):
8174
      # use the export parameters but do not override the ones
8175
      # specified by the user
8176
      for name, value in einfo.items(constants.INISECT_HYP):
8177
        if name not in self.op.hvparams:
8178
          self.op.hvparams[name] = value
8179

    
8180
    if einfo.has_section(constants.INISECT_BEP):
8181
      # use the parameters, without overriding
8182
      for name, value in einfo.items(constants.INISECT_BEP):
8183
        if name not in self.op.beparams:
8184
          self.op.beparams[name] = value
8185
    else:
8186
      # try to read the parameters old style, from the main section
8187
      for name in constants.BES_PARAMETERS:
8188
        if (name not in self.op.beparams and
8189
            einfo.has_option(constants.INISECT_INS, name)):
8190
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8191

    
8192
    if einfo.has_section(constants.INISECT_OSP):
8193
      # use the parameters, without overriding
8194
      for name, value in einfo.items(constants.INISECT_OSP):
8195
        if name not in self.op.osparams:
8196
          self.op.osparams[name] = value
8197

    
8198
  def _RevertToDefaults(self, cluster):
8199
    """Revert the instance parameters to the default values.
8200

8201
    """
8202
    # hvparams
8203
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8204
    for name in self.op.hvparams.keys():
8205
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8206
        del self.op.hvparams[name]
8207
    # beparams
8208
    be_defs = cluster.SimpleFillBE({})
8209
    for name in self.op.beparams.keys():
8210
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8211
        del self.op.beparams[name]
8212
    # nic params
8213
    nic_defs = cluster.SimpleFillNIC({})
8214
    for nic in self.op.nics:
8215
      for name in constants.NICS_PARAMETERS:
8216
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8217
          del nic[name]
8218
    # osparams
8219
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8220
    for name in self.op.osparams.keys():
8221
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8222
        del self.op.osparams[name]
8223

    
8224
  def CheckPrereq(self):
8225
    """Check prerequisites.
8226

8227
    """
8228
    if self.op.mode == constants.INSTANCE_IMPORT:
8229
      export_info = self._ReadExportInfo()
8230
      self._ReadExportParams(export_info)
8231

    
8232
    if (not self.cfg.GetVGName() and
8233
        self.op.disk_template not in constants.DTS_NOT_LVM):
8234
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8235
                                 " instances", errors.ECODE_STATE)
8236

    
8237
    if self.op.hypervisor is None:
8238
      self.op.hypervisor = self.cfg.GetHypervisorType()
8239

    
8240
    cluster = self.cfg.GetClusterInfo()
8241
    enabled_hvs = cluster.enabled_hypervisors
8242
    if self.op.hypervisor not in enabled_hvs:
8243
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8244
                                 " cluster (%s)" % (self.op.hypervisor,
8245
                                  ",".join(enabled_hvs)),
8246
                                 errors.ECODE_STATE)
8247

    
8248
    # Check tag validity
8249
    for tag in self.op.tags:
8250
      objects.TaggableObject.ValidateTag(tag)
8251

    
8252
    # check hypervisor parameter syntax (locally)
8253
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8254
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8255
                                      self.op.hvparams)
8256
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8257
    hv_type.CheckParameterSyntax(filled_hvp)
8258
    self.hv_full = filled_hvp
8259
    # check that we don't specify global parameters on an instance
8260
    _CheckGlobalHvParams(self.op.hvparams)
8261

    
8262
    # fill and remember the beparams dict
8263
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8264
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8265

    
8266
    # build os parameters
8267
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8268

    
8269
    # now that hvp/bep are in final format, let's reset to defaults,
8270
    # if told to do so
8271
    if self.op.identify_defaults:
8272
      self._RevertToDefaults(cluster)
8273

    
8274
    # NIC buildup
8275
    self.nics = []
8276
    for idx, nic in enumerate(self.op.nics):
8277
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8278
      nic_mode = nic_mode_req
8279
      if nic_mode is None:
8280
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8281

    
8282
      # in routed mode, for the first nic, the default ip is 'auto'
8283
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8284
        default_ip_mode = constants.VALUE_AUTO
8285
      else:
8286
        default_ip_mode = constants.VALUE_NONE
8287

    
8288
      # ip validity checks
8289
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8290
      if ip is None or ip.lower() == constants.VALUE_NONE:
8291
        nic_ip = None
8292
      elif ip.lower() == constants.VALUE_AUTO:
8293
        if not self.op.name_check:
8294
          raise errors.OpPrereqError("IP address set to auto but name checks"
8295
                                     " have been skipped",
8296
                                     errors.ECODE_INVAL)
8297
        nic_ip = self.hostname1.ip
8298
      else:
8299
        if not netutils.IPAddress.IsValid(ip):
8300
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8301
                                     errors.ECODE_INVAL)
8302
        nic_ip = ip
8303

    
8304
      # TODO: check the ip address for uniqueness
8305
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8306
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8307
                                   errors.ECODE_INVAL)
8308

    
8309
      # MAC address verification
8310
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8311
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8312
        mac = utils.NormalizeAndValidateMac(mac)
8313

    
8314
        try:
8315
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8316
        except errors.ReservationError:
8317
          raise errors.OpPrereqError("MAC address %s already in use"
8318
                                     " in cluster" % mac,
8319
                                     errors.ECODE_NOTUNIQUE)
8320

    
8321
      #  Build nic parameters
8322
      link = nic.get(constants.INIC_LINK, None)
8323
      nicparams = {}
8324
      if nic_mode_req:
8325
        nicparams[constants.NIC_MODE] = nic_mode_req
8326
      if link:
8327
        nicparams[constants.NIC_LINK] = link
8328

    
8329
      check_params = cluster.SimpleFillNIC(nicparams)
8330
      objects.NIC.CheckParameterSyntax(check_params)
8331
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8332

    
8333
    # disk checks/pre-build
8334
    default_vg = self.cfg.GetVGName()
8335
    self.disks = []
8336
    for disk in self.op.disks:
8337
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8338
      if mode not in constants.DISK_ACCESS_SET:
8339
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8340
                                   mode, errors.ECODE_INVAL)
8341
      size = disk.get(constants.IDISK_SIZE, None)
8342
      if size is None:
8343
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8344
      try:
8345
        size = int(size)
8346
      except (TypeError, ValueError):
8347
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8348
                                   errors.ECODE_INVAL)
8349

    
8350
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8351
      new_disk = {
8352
        constants.IDISK_SIZE: size,
8353
        constants.IDISK_MODE: mode,
8354
        constants.IDISK_VG: data_vg,
8355
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8356
        }
8357
      if constants.IDISK_ADOPT in disk:
8358
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8359
      self.disks.append(new_disk)
8360

    
8361
    if self.op.mode == constants.INSTANCE_IMPORT:
8362

    
8363
      # Check that the new instance doesn't have less disks than the export
8364
      instance_disks = len(self.disks)
8365
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8366
      if instance_disks < export_disks:
8367
        raise errors.OpPrereqError("Not enough disks to import."
8368
                                   " (instance: %d, export: %d)" %
8369
                                   (instance_disks, export_disks),
8370
                                   errors.ECODE_INVAL)
8371

    
8372
      disk_images = []
8373
      for idx in range(export_disks):
8374
        option = 'disk%d_dump' % idx
8375
        if export_info.has_option(constants.INISECT_INS, option):
8376
          # FIXME: are the old os-es, disk sizes, etc. useful?
8377
          export_name = export_info.get(constants.INISECT_INS, option)
8378
          image = utils.PathJoin(self.op.src_path, export_name)
8379
          disk_images.append(image)
8380
        else:
8381
          disk_images.append(False)
8382

    
8383
      self.src_images = disk_images
8384

    
8385
      old_name = export_info.get(constants.INISECT_INS, 'name')
8386
      try:
8387
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8388
      except (TypeError, ValueError), err:
8389
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8390
                                   " an integer: %s" % str(err),
8391
                                   errors.ECODE_STATE)
8392
      if self.op.instance_name == old_name:
8393
        for idx, nic in enumerate(self.nics):
8394
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8395
            nic_mac_ini = 'nic%d_mac' % idx
8396
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8397

    
8398
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8399

    
8400
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8401
    if self.op.ip_check:
8402
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8403
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8404
                                   (self.check_ip, self.op.instance_name),
8405
                                   errors.ECODE_NOTUNIQUE)
8406

    
8407
    #### mac address generation
8408
    # By generating here the mac address both the allocator and the hooks get
8409
    # the real final mac address rather than the 'auto' or 'generate' value.
8410
    # There is a race condition between the generation and the instance object
8411
    # creation, which means that we know the mac is valid now, but we're not
8412
    # sure it will be when we actually add the instance. If things go bad
8413
    # adding the instance will abort because of a duplicate mac, and the
8414
    # creation job will fail.
8415
    for nic in self.nics:
8416
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8417
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8418

    
8419
    #### allocator run
8420

    
8421
    if self.op.iallocator is not None:
8422
      self._RunAllocator()
8423

    
8424
    #### node related checks
8425

    
8426
    # check primary node
8427
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8428
    assert self.pnode is not None, \
8429
      "Cannot retrieve locked node %s" % self.op.pnode
8430
    if pnode.offline:
8431
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8432
                                 pnode.name, errors.ECODE_STATE)
8433
    if pnode.drained:
8434
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8435
                                 pnode.name, errors.ECODE_STATE)
8436
    if not pnode.vm_capable:
8437
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8438
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8439

    
8440
    self.secondaries = []
8441

    
8442
    # mirror node verification
8443
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8444
      if self.op.snode == pnode.name:
8445
        raise errors.OpPrereqError("The secondary node cannot be the"
8446
                                   " primary node", errors.ECODE_INVAL)
8447
      _CheckNodeOnline(self, self.op.snode)
8448
      _CheckNodeNotDrained(self, self.op.snode)
8449
      _CheckNodeVmCapable(self, self.op.snode)
8450
      self.secondaries.append(self.op.snode)
8451

    
8452
    nodenames = [pnode.name] + self.secondaries
8453

    
8454
    if not self.adopt_disks:
8455
      # Check lv size requirements, if not adopting
8456
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8457
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8458

    
8459
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8460
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8461
                                disk[constants.IDISK_ADOPT])
8462
                     for disk in self.disks])
8463
      if len(all_lvs) != len(self.disks):
8464
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8465
                                   errors.ECODE_INVAL)
8466
      for lv_name in all_lvs:
8467
        try:
8468
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8469
          # to ReserveLV uses the same syntax
8470
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8471
        except errors.ReservationError:
8472
          raise errors.OpPrereqError("LV named %s used by another instance" %
8473
                                     lv_name, errors.ECODE_NOTUNIQUE)
8474

    
8475
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8476
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8477

    
8478
      node_lvs = self.rpc.call_lv_list([pnode.name],
8479
                                       vg_names.payload.keys())[pnode.name]
8480
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8481
      node_lvs = node_lvs.payload
8482

    
8483
      delta = all_lvs.difference(node_lvs.keys())
8484
      if delta:
8485
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8486
                                   utils.CommaJoin(delta),
8487
                                   errors.ECODE_INVAL)
8488
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8489
      if online_lvs:
8490
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8491
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8492
                                   errors.ECODE_STATE)
8493
      # update the size of disk based on what is found
8494
      for dsk in self.disks:
8495
        dsk[constants.IDISK_SIZE] = \
8496
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8497
                                        dsk[constants.IDISK_ADOPT])][0]))
8498

    
8499
    elif self.op.disk_template == constants.DT_BLOCK:
8500
      # Normalize and de-duplicate device paths
8501
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8502
                       for disk in self.disks])
8503
      if len(all_disks) != len(self.disks):
8504
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8505
                                   errors.ECODE_INVAL)
8506
      baddisks = [d for d in all_disks
8507
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8508
      if baddisks:
8509
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8510
                                   " cannot be adopted" %
8511
                                   (", ".join(baddisks),
8512
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8513
                                   errors.ECODE_INVAL)
8514

    
8515
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8516
                                            list(all_disks))[pnode.name]
8517
      node_disks.Raise("Cannot get block device information from node %s" %
8518
                       pnode.name)
8519
      node_disks = node_disks.payload
8520
      delta = all_disks.difference(node_disks.keys())
8521
      if delta:
8522
        raise errors.OpPrereqError("Missing block device(s): %s" %
8523
                                   utils.CommaJoin(delta),
8524
                                   errors.ECODE_INVAL)
8525
      for dsk in self.disks:
8526
        dsk[constants.IDISK_SIZE] = \
8527
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8528

    
8529
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8530

    
8531
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8532
    # check OS parameters (remotely)
8533
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8534

    
8535
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8536

    
8537
    # memory check on primary node
8538
    if self.op.start:
8539
      _CheckNodeFreeMemory(self, self.pnode.name,
8540
                           "creating instance %s" % self.op.instance_name,
8541
                           self.be_full[constants.BE_MEMORY],
8542
                           self.op.hypervisor)
8543

    
8544
    self.dry_run_result = list(nodenames)
8545

    
8546
  def Exec(self, feedback_fn):
8547
    """Create and add the instance to the cluster.
8548

8549
    """
8550
    instance = self.op.instance_name
8551
    pnode_name = self.pnode.name
8552

    
8553
    ht_kind = self.op.hypervisor
8554
    if ht_kind in constants.HTS_REQ_PORT:
8555
      network_port = self.cfg.AllocatePort()
8556
    else:
8557
      network_port = None
8558

    
8559
    if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8560
      # this is needed because os.path.join does not accept None arguments
8561
      if self.op.file_storage_dir is None:
8562
        string_file_storage_dir = ""
8563
      else:
8564
        string_file_storage_dir = self.op.file_storage_dir
8565

    
8566
      # build the full file storage dir path
8567
      if self.op.disk_template == constants.DT_SHARED_FILE:
8568
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8569
      else:
8570
        get_fsd_fn = self.cfg.GetFileStorageDir
8571

    
8572
      file_storage_dir = utils.PathJoin(get_fsd_fn(),
8573
                                        string_file_storage_dir, instance)
8574
    else:
8575
      file_storage_dir = ""
8576

    
8577
    disks = _GenerateDiskTemplate(self,
8578
                                  self.op.disk_template,
8579
                                  instance, pnode_name,
8580
                                  self.secondaries,
8581
                                  self.disks,
8582
                                  file_storage_dir,
8583
                                  self.op.file_driver,
8584
                                  0,
8585
                                  feedback_fn)
8586

    
8587
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8588
                            primary_node=pnode_name,
8589
                            nics=self.nics, disks=disks,
8590
                            disk_template=self.op.disk_template,
8591
                            admin_up=False,
8592
                            network_port=network_port,
8593
                            beparams=self.op.beparams,
8594
                            hvparams=self.op.hvparams,
8595
                            hypervisor=self.op.hypervisor,
8596
                            osparams=self.op.osparams,
8597
                            tags=self.op.tags,
8598
                            )
8599

    
8600
    if self.adopt_disks:
8601
      if self.op.disk_template == constants.DT_PLAIN:
8602
        # rename LVs to the newly-generated names; we need to construct
8603
        # 'fake' LV disks with the old data, plus the new unique_id
8604
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8605
        rename_to = []
8606
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8607
          rename_to.append(t_dsk.logical_id)
8608
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8609
          self.cfg.SetDiskID(t_dsk, pnode_name)
8610
        result = self.rpc.call_blockdev_rename(pnode_name,
8611
                                               zip(tmp_disks, rename_to))
8612
        result.Raise("Failed to rename adoped LVs")
8613
    else:
8614
      feedback_fn("* creating instance disks...")
8615
      try:
8616
        _CreateDisks(self, iobj)
8617
      except errors.OpExecError:
8618
        self.LogWarning("Device creation failed, reverting...")
8619
        try:
8620
          _RemoveDisks(self, iobj)
8621
        finally:
8622
          self.cfg.ReleaseDRBDMinors(instance)
8623
          raise
8624

    
8625
    feedback_fn("adding instance %s to cluster config" % instance)
8626

    
8627
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8628

    
8629
    # Declare that we don't want to remove the instance lock anymore, as we've
8630
    # added the instance to the config
8631
    del self.remove_locks[locking.LEVEL_INSTANCE]
8632

    
8633
    if self.op.mode == constants.INSTANCE_IMPORT:
8634
      # Release unused nodes
8635
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8636
    else:
8637
      # Release all nodes
8638
      _ReleaseLocks(self, locking.LEVEL_NODE)
8639

    
8640
    disk_abort = False
8641
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8642
      feedback_fn("* wiping instance disks...")
8643
      try:
8644
        _WipeDisks(self, iobj)
8645
      except errors.OpExecError, err:
8646
        logging.exception("Wiping disks failed")
8647
        self.LogWarning("Wiping instance disks failed (%s)", err)
8648
        disk_abort = True
8649

    
8650
    if disk_abort:
8651
      # Something is already wrong with the disks, don't do anything else
8652
      pass
8653
    elif self.op.wait_for_sync:
8654
      disk_abort = not _WaitForSync(self, iobj)
8655
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8656
      # make sure the disks are not degraded (still sync-ing is ok)
8657
      time.sleep(15)
8658
      feedback_fn("* checking mirrors status")
8659
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8660
    else:
8661
      disk_abort = False
8662

    
8663
    if disk_abort:
8664
      _RemoveDisks(self, iobj)
8665
      self.cfg.RemoveInstance(iobj.name)
8666
      # Make sure the instance lock gets removed
8667
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8668
      raise errors.OpExecError("There are some degraded disks for"
8669
                               " this instance")
8670

    
8671
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8672
      if self.op.mode == constants.INSTANCE_CREATE:
8673
        if not self.op.no_install:
8674
          feedback_fn("* running the instance OS create scripts...")
8675
          # FIXME: pass debug option from opcode to backend
8676
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8677
                                                 self.op.debug_level)
8678
          result.Raise("Could not add os for instance %s"
8679
                       " on node %s" % (instance, pnode_name))
8680

    
8681
      elif self.op.mode == constants.INSTANCE_IMPORT:
8682
        feedback_fn("* running the instance OS import scripts...")
8683

    
8684
        transfers = []
8685

    
8686
        for idx, image in enumerate(self.src_images):
8687
          if not image:
8688
            continue
8689

    
8690
          # FIXME: pass debug option from opcode to backend
8691
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8692
                                             constants.IEIO_FILE, (image, ),
8693
                                             constants.IEIO_SCRIPT,
8694
                                             (iobj.disks[idx], idx),
8695
                                             None)
8696
          transfers.append(dt)
8697

    
8698
        import_result = \
8699
          masterd.instance.TransferInstanceData(self, feedback_fn,
8700
                                                self.op.src_node, pnode_name,
8701
                                                self.pnode.secondary_ip,
8702
                                                iobj, transfers)
8703
        if not compat.all(import_result):
8704
          self.LogWarning("Some disks for instance %s on node %s were not"
8705
                          " imported successfully" % (instance, pnode_name))
8706

    
8707
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8708
        feedback_fn("* preparing remote import...")
8709
        # The source cluster will stop the instance before attempting to make a
8710
        # connection. In some cases stopping an instance can take a long time,
8711
        # hence the shutdown timeout is added to the connection timeout.
8712
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8713
                           self.op.source_shutdown_timeout)
8714
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8715

    
8716
        assert iobj.primary_node == self.pnode.name
8717
        disk_results = \
8718
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8719
                                        self.source_x509_ca,
8720
                                        self._cds, timeouts)
8721
        if not compat.all(disk_results):
8722
          # TODO: Should the instance still be started, even if some disks
8723
          # failed to import (valid for local imports, too)?
8724
          self.LogWarning("Some disks for instance %s on node %s were not"
8725
                          " imported successfully" % (instance, pnode_name))
8726

    
8727
        # Run rename script on newly imported instance
8728
        assert iobj.name == instance
8729
        feedback_fn("Running rename script for %s" % instance)
8730
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8731
                                                   self.source_instance_name,
8732
                                                   self.op.debug_level)
8733
        if result.fail_msg:
8734
          self.LogWarning("Failed to run rename script for %s on node"
8735
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8736

    
8737
      else:
8738
        # also checked in the prereq part
8739
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8740
                                     % self.op.mode)
8741

    
8742
    if self.op.start:
8743
      iobj.admin_up = True
8744
      self.cfg.Update(iobj, feedback_fn)
8745
      logging.info("Starting instance %s on node %s", instance, pnode_name)
8746
      feedback_fn("* starting instance...")
8747
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8748
      result.Raise("Could not start instance")
8749

    
8750
    return list(iobj.all_nodes)
8751

    
8752

    
8753
class LUInstanceConsole(NoHooksLU):
8754
  """Connect to an instance's console.
8755

8756
  This is somewhat special in that it returns the command line that
8757
  you need to run on the master node in order to connect to the
8758
  console.
8759

8760
  """
8761
  REQ_BGL = False
8762

    
8763
  def ExpandNames(self):
8764
    self._ExpandAndLockInstance()
8765

    
8766
  def CheckPrereq(self):
8767
    """Check prerequisites.
8768

8769
    This checks that the instance is in the cluster.
8770

8771
    """
8772
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8773
    assert self.instance is not None, \
8774
      "Cannot retrieve locked instance %s" % self.op.instance_name
8775
    _CheckNodeOnline(self, self.instance.primary_node)
8776

    
8777
  def Exec(self, feedback_fn):
8778
    """Connect to the console of an instance
8779

8780
    """
8781
    instance = self.instance
8782
    node = instance.primary_node
8783

    
8784
    node_insts = self.rpc.call_instance_list([node],
8785
                                             [instance.hypervisor])[node]
8786
    node_insts.Raise("Can't get node information from %s" % node)
8787

    
8788
    if instance.name not in node_insts.payload:
8789
      if instance.admin_up:
8790
        state = constants.INSTST_ERRORDOWN
8791
      else:
8792
        state = constants.INSTST_ADMINDOWN
8793
      raise errors.OpExecError("Instance %s is not running (state %s)" %
8794
                               (instance.name, state))
8795

    
8796
    logging.debug("Connecting to console of %s on %s", instance.name, node)
8797

    
8798
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8799

    
8800

    
8801
def _GetInstanceConsole(cluster, instance):
8802
  """Returns console information for an instance.
8803

8804
  @type cluster: L{objects.Cluster}
8805
  @type instance: L{objects.Instance}
8806
  @rtype: dict
8807

8808
  """
8809
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
8810
  # beparams and hvparams are passed separately, to avoid editing the
8811
  # instance and then saving the defaults in the instance itself.
8812
  hvparams = cluster.FillHV(instance)
8813
  beparams = cluster.FillBE(instance)
8814
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8815

    
8816
  assert console.instance == instance.name
8817
  assert console.Validate()
8818

    
8819
  return console.ToDict()
8820

    
8821

    
8822
class LUInstanceReplaceDisks(LogicalUnit):
8823
  """Replace the disks of an instance.
8824

8825
  """
8826
  HPATH = "mirrors-replace"
8827
  HTYPE = constants.HTYPE_INSTANCE
8828
  REQ_BGL = False
8829

    
8830
  def CheckArguments(self):
8831
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8832
                                  self.op.iallocator)
8833

    
8834
  def ExpandNames(self):
8835
    self._ExpandAndLockInstance()
8836

    
8837
    assert locking.LEVEL_NODE not in self.needed_locks
8838
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
8839

    
8840
    assert self.op.iallocator is None or self.op.remote_node is None, \
8841
      "Conflicting options"
8842

    
8843
    if self.op.remote_node is not None:
8844
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8845

    
8846
      # Warning: do not remove the locking of the new secondary here
8847
      # unless DRBD8.AddChildren is changed to work in parallel;
8848
      # currently it doesn't since parallel invocations of
8849
      # FindUnusedMinor will conflict
8850
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
8851
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8852
    else:
8853
      self.needed_locks[locking.LEVEL_NODE] = []
8854
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8855

    
8856
      if self.op.iallocator is not None:
8857
        # iallocator will select a new node in the same group
8858
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
8859

    
8860
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8861
                                   self.op.iallocator, self.op.remote_node,
8862
                                   self.op.disks, False, self.op.early_release)
8863

    
8864
    self.tasklets = [self.replacer]
8865

    
8866
  def DeclareLocks(self, level):
8867
    if level == locking.LEVEL_NODEGROUP:
8868
      assert self.op.remote_node is None
8869
      assert self.op.iallocator is not None
8870
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
8871

    
8872
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
8873
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
8874
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8875

    
8876
    elif level == locking.LEVEL_NODE:
8877
      if self.op.iallocator is not None:
8878
        assert self.op.remote_node is None
8879
        assert not self.needed_locks[locking.LEVEL_NODE]
8880

    
8881
        # Lock member nodes of all locked groups
8882
        self.needed_locks[locking.LEVEL_NODE] = [node_name
8883
          for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
8884
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
8885
      else:
8886
        self._LockInstancesNodes()
8887

    
8888
  def BuildHooksEnv(self):
8889
    """Build hooks env.
8890

8891
    This runs on the master, the primary and all the secondaries.
8892

8893
    """
8894
    instance = self.replacer.instance
8895
    env = {
8896
      "MODE": self.op.mode,
8897
      "NEW_SECONDARY": self.op.remote_node,
8898
      "OLD_SECONDARY": instance.secondary_nodes[0],
8899
      }
8900
    env.update(_BuildInstanceHookEnvByObject(self, instance))
8901
    return env
8902

    
8903
  def BuildHooksNodes(self):
8904
    """Build hooks nodes.
8905

8906
    """
8907
    instance = self.replacer.instance
8908
    nl = [
8909
      self.cfg.GetMasterNode(),
8910
      instance.primary_node,
8911
      ]
8912
    if self.op.remote_node is not None:
8913
      nl.append(self.op.remote_node)
8914
    return nl, nl
8915

    
8916
  def CheckPrereq(self):
8917
    """Check prerequisites.
8918

8919
    """
8920
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
8921
            self.op.iallocator is None)
8922

    
8923
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
8924
    if owned_groups:
8925
      groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8926
      if owned_groups != groups:
8927
        raise errors.OpExecError("Node groups used by instance '%s' changed"
8928
                                 " since lock was acquired, current list is %r,"
8929
                                 " used to be '%s'" %
8930
                                 (self.op.instance_name,
8931
                                  utils.CommaJoin(groups),
8932
                                  utils.CommaJoin(owned_groups)))
8933

    
8934
    return LogicalUnit.CheckPrereq(self)
8935

    
8936

    
8937
class TLReplaceDisks(Tasklet):
8938
  """Replaces disks for an instance.
8939

8940
  Note: Locking is not within the scope of this class.
8941

8942
  """
8943
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8944
               disks, delay_iallocator, early_release):
8945
    """Initializes this class.
8946

8947
    """
8948
    Tasklet.__init__(self, lu)
8949

    
8950
    # Parameters
8951
    self.instance_name = instance_name
8952
    self.mode = mode
8953
    self.iallocator_name = iallocator_name
8954
    self.remote_node = remote_node
8955
    self.disks = disks
8956
    self.delay_iallocator = delay_iallocator
8957
    self.early_release = early_release
8958

    
8959
    # Runtime data
8960
    self.instance = None
8961
    self.new_node = None
8962
    self.target_node = None
8963
    self.other_node = None
8964
    self.remote_node_info = None
8965
    self.node_secondary_ip = None
8966

    
8967
  @staticmethod
8968
  def CheckArguments(mode, remote_node, iallocator):
8969
    """Helper function for users of this class.
8970

8971
    """
8972
    # check for valid parameter combination
8973
    if mode == constants.REPLACE_DISK_CHG:
8974
      if remote_node is None and iallocator is None:
8975
        raise errors.OpPrereqError("When changing the secondary either an"
8976
                                   " iallocator script must be used or the"
8977
                                   " new node given", errors.ECODE_INVAL)
8978

    
8979
      if remote_node is not None and iallocator is not None:
8980
        raise errors.OpPrereqError("Give either the iallocator or the new"
8981
                                   " secondary, not both", errors.ECODE_INVAL)
8982

    
8983
    elif remote_node is not None or iallocator is not None:
8984
      # Not replacing the secondary
8985
      raise errors.OpPrereqError("The iallocator and new node options can"
8986
                                 " only be used when changing the"
8987
                                 " secondary node", errors.ECODE_INVAL)
8988

    
8989
  @staticmethod
8990
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8991
    """Compute a new secondary node using an IAllocator.
8992

8993
    """
8994
    ial = IAllocator(lu.cfg, lu.rpc,
8995
                     mode=constants.IALLOCATOR_MODE_RELOC,
8996
                     name=instance_name,
8997
                     relocate_from=relocate_from)
8998

    
8999
    ial.Run(iallocator_name)
9000

    
9001
    if not ial.success:
9002
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9003
                                 " %s" % (iallocator_name, ial.info),
9004
                                 errors.ECODE_NORES)
9005

    
9006
    if len(ial.result) != ial.required_nodes:
9007
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9008
                                 " of nodes (%s), required %s" %
9009
                                 (iallocator_name,
9010
                                  len(ial.result), ial.required_nodes),
9011
                                 errors.ECODE_FAULT)
9012

    
9013
    remote_node_name = ial.result[0]
9014

    
9015
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9016
               instance_name, remote_node_name)
9017

    
9018
    return remote_node_name
9019

    
9020
  def _FindFaultyDisks(self, node_name):
9021
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9022
                                    node_name, True)
9023

    
9024
  def _CheckDisksActivated(self, instance):
9025
    """Checks if the instance disks are activated.
9026

9027
    @param instance: The instance to check disks
9028
    @return: True if they are activated, False otherwise
9029

9030
    """
9031
    nodes = instance.all_nodes
9032

    
9033
    for idx, dev in enumerate(instance.disks):
9034
      for node in nodes:
9035
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9036
        self.cfg.SetDiskID(dev, node)
9037

    
9038
        result = self.rpc.call_blockdev_find(node, dev)
9039

    
9040
        if result.offline:
9041
          continue
9042
        elif result.fail_msg or not result.payload:
9043
          return False
9044

    
9045
    return True
9046

    
9047
  def CheckPrereq(self):
9048
    """Check prerequisites.
9049

9050
    This checks that the instance is in the cluster.
9051

9052
    """
9053
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9054
    assert instance is not None, \
9055
      "Cannot retrieve locked instance %s" % self.instance_name
9056

    
9057
    if instance.disk_template != constants.DT_DRBD8:
9058
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9059
                                 " instances", errors.ECODE_INVAL)
9060

    
9061
    if len(instance.secondary_nodes) != 1:
9062
      raise errors.OpPrereqError("The instance has a strange layout,"
9063
                                 " expected one secondary but found %d" %
9064
                                 len(instance.secondary_nodes),
9065
                                 errors.ECODE_FAULT)
9066

    
9067
    if not self.delay_iallocator:
9068
      self._CheckPrereq2()
9069

    
9070
  def _CheckPrereq2(self):
9071
    """Check prerequisites, second part.
9072

9073
    This function should always be part of CheckPrereq. It was separated and is
9074
    now called from Exec because during node evacuation iallocator was only
9075
    called with an unmodified cluster model, not taking planned changes into
9076
    account.
9077

9078
    """
9079
    instance = self.instance
9080
    secondary_node = instance.secondary_nodes[0]
9081

    
9082
    if self.iallocator_name is None:
9083
      remote_node = self.remote_node
9084
    else:
9085
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9086
                                       instance.name, instance.secondary_nodes)
9087

    
9088
    if remote_node is None:
9089
      self.remote_node_info = None
9090
    else:
9091
      assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
9092
             "Remote node '%s' is not locked" % remote_node
9093

    
9094
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9095
      assert self.remote_node_info is not None, \
9096
        "Cannot retrieve locked node %s" % remote_node
9097

    
9098
    if remote_node == self.instance.primary_node:
9099
      raise errors.OpPrereqError("The specified node is the primary node of"
9100
                                 " the instance", errors.ECODE_INVAL)
9101

    
9102
    if remote_node == secondary_node:
9103
      raise errors.OpPrereqError("The specified node is already the"
9104
                                 " secondary node of the instance",
9105
                                 errors.ECODE_INVAL)
9106

    
9107
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9108
                                    constants.REPLACE_DISK_CHG):
9109
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9110
                                 errors.ECODE_INVAL)
9111

    
9112
    if self.mode == constants.REPLACE_DISK_AUTO:
9113
      if not self._CheckDisksActivated(instance):
9114
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9115
                                   " first" % self.instance_name,
9116
                                   errors.ECODE_STATE)
9117
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9118
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9119

    
9120
      if faulty_primary and faulty_secondary:
9121
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9122
                                   " one node and can not be repaired"
9123
                                   " automatically" % self.instance_name,
9124
                                   errors.ECODE_STATE)
9125

    
9126
      if faulty_primary:
9127
        self.disks = faulty_primary
9128
        self.target_node = instance.primary_node
9129
        self.other_node = secondary_node
9130
        check_nodes = [self.target_node, self.other_node]
9131
      elif faulty_secondary:
9132
        self.disks = faulty_secondary
9133
        self.target_node = secondary_node
9134
        self.other_node = instance.primary_node
9135
        check_nodes = [self.target_node, self.other_node]
9136
      else:
9137
        self.disks = []
9138
        check_nodes = []
9139

    
9140
    else:
9141
      # Non-automatic modes
9142
      if self.mode == constants.REPLACE_DISK_PRI:
9143
        self.target_node = instance.primary_node
9144
        self.other_node = secondary_node
9145
        check_nodes = [self.target_node, self.other_node]
9146

    
9147
      elif self.mode == constants.REPLACE_DISK_SEC:
9148
        self.target_node = secondary_node
9149
        self.other_node = instance.primary_node
9150
        check_nodes = [self.target_node, self.other_node]
9151

    
9152
      elif self.mode == constants.REPLACE_DISK_CHG:
9153
        self.new_node = remote_node
9154
        self.other_node = instance.primary_node
9155
        self.target_node = secondary_node
9156
        check_nodes = [self.new_node, self.other_node]
9157

    
9158
        _CheckNodeNotDrained(self.lu, remote_node)
9159
        _CheckNodeVmCapable(self.lu, remote_node)
9160

    
9161
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9162
        assert old_node_info is not None
9163
        if old_node_info.offline and not self.early_release:
9164
          # doesn't make sense to delay the release
9165
          self.early_release = True
9166
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9167
                          " early-release mode", secondary_node)
9168

    
9169
      else:
9170
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9171
                                     self.mode)
9172

    
9173
      # If not specified all disks should be replaced
9174
      if not self.disks:
9175
        self.disks = range(len(self.instance.disks))
9176

    
9177
    for node in check_nodes:
9178
      _CheckNodeOnline(self.lu, node)
9179

    
9180
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9181
                                                          self.other_node,
9182
                                                          self.target_node]
9183
                              if node_name is not None)
9184

    
9185
    # Release unneeded node locks
9186
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9187

    
9188
    # Release any owned node group
9189
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9190
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9191

    
9192
    # Check whether disks are valid
9193
    for disk_idx in self.disks:
9194
      instance.FindDisk(disk_idx)
9195

    
9196
    # Get secondary node IP addresses
9197
    self.node_secondary_ip = \
9198
      dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
9199
           for node_name in touched_nodes)
9200

    
9201
  def Exec(self, feedback_fn):
9202
    """Execute disk replacement.
9203

9204
    This dispatches the disk replacement to the appropriate handler.
9205

9206
    """
9207
    if self.delay_iallocator:
9208
      self._CheckPrereq2()
9209

    
9210
    if __debug__:
9211
      # Verify owned locks before starting operation
9212
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9213
      assert set(owned_locks) == set(self.node_secondary_ip), \
9214
          ("Incorrect node locks, owning %s, expected %s" %
9215
           (owned_locks, self.node_secondary_ip.keys()))
9216

    
9217
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9218
      assert list(owned_locks) == [self.instance_name], \
9219
          "Instance '%s' not locked" % self.instance_name
9220

    
9221
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9222
          "Should not own any node group lock at this point"
9223

    
9224
    if not self.disks:
9225
      feedback_fn("No disks need replacement")
9226
      return
9227

    
9228
    feedback_fn("Replacing disk(s) %s for %s" %
9229
                (utils.CommaJoin(self.disks), self.instance.name))
9230

    
9231
    activate_disks = (not self.instance.admin_up)
9232

    
9233
    # Activate the instance disks if we're replacing them on a down instance
9234
    if activate_disks:
9235
      _StartInstanceDisks(self.lu, self.instance, True)
9236

    
9237
    try:
9238
      # Should we replace the secondary node?
9239
      if self.new_node is not None:
9240
        fn = self._ExecDrbd8Secondary
9241
      else:
9242
        fn = self._ExecDrbd8DiskOnly
9243

    
9244
      result = fn(feedback_fn)
9245
    finally:
9246
      # Deactivate the instance disks if we're replacing them on a
9247
      # down instance
9248
      if activate_disks:
9249
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9250

    
9251
    if __debug__:
9252
      # Verify owned locks
9253
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9254
      nodes = frozenset(self.node_secondary_ip)
9255
      assert ((self.early_release and not owned_locks) or
9256
              (not self.early_release and not (set(owned_locks) - nodes))), \
9257
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9258
         " nodes=%r" % (self.early_release, owned_locks, nodes))
9259

    
9260
    return result
9261

    
9262
  def _CheckVolumeGroup(self, nodes):
9263
    self.lu.LogInfo("Checking volume groups")
9264

    
9265
    vgname = self.cfg.GetVGName()
9266

    
9267
    # Make sure volume group exists on all involved nodes
9268
    results = self.rpc.call_vg_list(nodes)
9269
    if not results:
9270
      raise errors.OpExecError("Can't list volume groups on the nodes")
9271

    
9272
    for node in nodes:
9273
      res = results[node]
9274
      res.Raise("Error checking node %s" % node)
9275
      if vgname not in res.payload:
9276
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9277
                                 (vgname, node))
9278

    
9279
  def _CheckDisksExistence(self, nodes):
9280
    # Check disk existence
9281
    for idx, dev in enumerate(self.instance.disks):
9282
      if idx not in self.disks:
9283
        continue
9284

    
9285
      for node in nodes:
9286
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9287
        self.cfg.SetDiskID(dev, node)
9288

    
9289
        result = self.rpc.call_blockdev_find(node, dev)
9290

    
9291
        msg = result.fail_msg
9292
        if msg or not result.payload:
9293
          if not msg:
9294
            msg = "disk not found"
9295
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9296
                                   (idx, node, msg))
9297

    
9298
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9299
    for idx, dev in enumerate(self.instance.disks):
9300
      if idx not in self.disks:
9301
        continue
9302

    
9303
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9304
                      (idx, node_name))
9305

    
9306
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9307
                                   ldisk=ldisk):
9308
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9309
                                 " replace disks for instance %s" %
9310
                                 (node_name, self.instance.name))
9311

    
9312
  def _CreateNewStorage(self, node_name):
9313
    iv_names = {}
9314

    
9315
    for idx, dev in enumerate(self.instance.disks):
9316
      if idx not in self.disks:
9317
        continue
9318

    
9319
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9320

    
9321
      self.cfg.SetDiskID(dev, node_name)
9322

    
9323
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9324
      names = _GenerateUniqueNames(self.lu, lv_names)
9325

    
9326
      vg_data = dev.children[0].logical_id[0]
9327
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9328
                             logical_id=(vg_data, names[0]))
9329
      vg_meta = dev.children[1].logical_id[0]
9330
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9331
                             logical_id=(vg_meta, names[1]))
9332

    
9333
      new_lvs = [lv_data, lv_meta]
9334
      old_lvs = dev.children
9335
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9336

    
9337
      # we pass force_create=True to force the LVM creation
9338
      for new_lv in new_lvs:
9339
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9340
                        _GetInstanceInfoText(self.instance), False)
9341

    
9342
    return iv_names
9343

    
9344
  def _CheckDevices(self, node_name, iv_names):
9345
    for name, (dev, _, _) in iv_names.iteritems():
9346
      self.cfg.SetDiskID(dev, node_name)
9347

    
9348
      result = self.rpc.call_blockdev_find(node_name, dev)
9349

    
9350
      msg = result.fail_msg
9351
      if msg or not result.payload:
9352
        if not msg:
9353
          msg = "disk not found"
9354
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9355
                                 (name, msg))
9356

    
9357
      if result.payload.is_degraded:
9358
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9359

    
9360
  def _RemoveOldStorage(self, node_name, iv_names):
9361
    for name, (_, old_lvs, _) in iv_names.iteritems():
9362
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9363

    
9364
      for lv in old_lvs:
9365
        self.cfg.SetDiskID(lv, node_name)
9366

    
9367
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9368
        if msg:
9369
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9370
                             hint="remove unused LVs manually")
9371

    
9372
  def _ExecDrbd8DiskOnly(self, feedback_fn):
9373
    """Replace a disk on the primary or secondary for DRBD 8.
9374

9375
    The algorithm for replace is quite complicated:
9376

9377
      1. for each disk to be replaced:
9378

9379
        1. create new LVs on the target node with unique names
9380
        1. detach old LVs from the drbd device
9381
        1. rename old LVs to name_replaced.<time_t>
9382
        1. rename new LVs to old LVs
9383
        1. attach the new LVs (with the old names now) to the drbd device
9384

9385
      1. wait for sync across all devices
9386

9387
      1. for each modified disk:
9388

9389
        1. remove old LVs (which have the name name_replaces.<time_t>)
9390

9391
    Failures are not very well handled.
9392

9393
    """
9394
    steps_total = 6
9395

    
9396
    # Step: check device activation
9397
    self.lu.LogStep(1, steps_total, "Check device existence")
9398
    self._CheckDisksExistence([self.other_node, self.target_node])
9399
    self._CheckVolumeGroup([self.target_node, self.other_node])
9400

    
9401
    # Step: check other node consistency
9402
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9403
    self._CheckDisksConsistency(self.other_node,
9404
                                self.other_node == self.instance.primary_node,
9405
                                False)
9406

    
9407
    # Step: create new storage
9408
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9409
    iv_names = self._CreateNewStorage(self.target_node)
9410

    
9411
    # Step: for each lv, detach+rename*2+attach
9412
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9413
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9414
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9415

    
9416
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9417
                                                     old_lvs)
9418
      result.Raise("Can't detach drbd from local storage on node"
9419
                   " %s for device %s" % (self.target_node, dev.iv_name))
9420
      #dev.children = []
9421
      #cfg.Update(instance)
9422

    
9423
      # ok, we created the new LVs, so now we know we have the needed
9424
      # storage; as such, we proceed on the target node to rename
9425
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9426
      # using the assumption that logical_id == physical_id (which in
9427
      # turn is the unique_id on that node)
9428

    
9429
      # FIXME(iustin): use a better name for the replaced LVs
9430
      temp_suffix = int(time.time())
9431
      ren_fn = lambda d, suff: (d.physical_id[0],
9432
                                d.physical_id[1] + "_replaced-%s" % suff)
9433

    
9434
      # Build the rename list based on what LVs exist on the node
9435
      rename_old_to_new = []
9436
      for to_ren in old_lvs:
9437
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9438
        if not result.fail_msg and result.payload:
9439
          # device exists
9440
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9441

    
9442
      self.lu.LogInfo("Renaming the old LVs on the target node")
9443
      result = self.rpc.call_blockdev_rename(self.target_node,
9444
                                             rename_old_to_new)
9445
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9446

    
9447
      # Now we rename the new LVs to the old LVs
9448
      self.lu.LogInfo("Renaming the new LVs on the target node")
9449
      rename_new_to_old = [(new, old.physical_id)
9450
                           for old, new in zip(old_lvs, new_lvs)]
9451
      result = self.rpc.call_blockdev_rename(self.target_node,
9452
                                             rename_new_to_old)
9453
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9454

    
9455
      for old, new in zip(old_lvs, new_lvs):
9456
        new.logical_id = old.logical_id
9457
        self.cfg.SetDiskID(new, self.target_node)
9458

    
9459
      for disk in old_lvs:
9460
        disk.logical_id = ren_fn(disk, temp_suffix)
9461
        self.cfg.SetDiskID(disk, self.target_node)
9462

    
9463
      # Now that the new lvs have the old name, we can add them to the device
9464
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9465
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9466
                                                  new_lvs)
9467
      msg = result.fail_msg
9468
      if msg:
9469
        for new_lv in new_lvs:
9470
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9471
                                               new_lv).fail_msg
9472
          if msg2:
9473
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9474
                               hint=("cleanup manually the unused logical"
9475
                                     "volumes"))
9476
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9477

    
9478
      dev.children = new_lvs
9479

    
9480
      self.cfg.Update(self.instance, feedback_fn)
9481

    
9482
    cstep = 5
9483
    if self.early_release:
9484
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9485
      cstep += 1
9486
      self._RemoveOldStorage(self.target_node, iv_names)
9487
      # WARNING: we release both node locks here, do not do other RPCs
9488
      # than WaitForSync to the primary node
9489
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9490
                    names=[self.target_node, self.other_node])
9491

    
9492
    # Wait for sync
9493
    # This can fail as the old devices are degraded and _WaitForSync
9494
    # does a combined result over all disks, so we don't check its return value
9495
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9496
    cstep += 1
9497
    _WaitForSync(self.lu, self.instance)
9498

    
9499
    # Check all devices manually
9500
    self._CheckDevices(self.instance.primary_node, iv_names)
9501

    
9502
    # Step: remove old storage
9503
    if not self.early_release:
9504
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9505
      cstep += 1
9506
      self._RemoveOldStorage(self.target_node, iv_names)
9507

    
9508
  def _ExecDrbd8Secondary(self, feedback_fn):
9509
    """Replace the secondary node for DRBD 8.
9510

9511
    The algorithm for replace is quite complicated:
9512
      - for all disks of the instance:
9513
        - create new LVs on the new node with same names
9514
        - shutdown the drbd device on the old secondary
9515
        - disconnect the drbd network on the primary
9516
        - create the drbd device on the new secondary
9517
        - network attach the drbd on the primary, using an artifice:
9518
          the drbd code for Attach() will connect to the network if it
9519
          finds a device which is connected to the good local disks but
9520
          not network enabled
9521
      - wait for sync across all devices
9522
      - remove all disks from the old secondary
9523

9524
    Failures are not very well handled.
9525

9526
    """
9527
    steps_total = 6
9528

    
9529
    # Step: check device activation
9530
    self.lu.LogStep(1, steps_total, "Check device existence")
9531
    self._CheckDisksExistence([self.instance.primary_node])
9532
    self._CheckVolumeGroup([self.instance.primary_node])
9533

    
9534
    # Step: check other node consistency
9535
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9536
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9537

    
9538
    # Step: create new storage
9539
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9540
    for idx, dev in enumerate(self.instance.disks):
9541
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9542
                      (self.new_node, idx))
9543
      # we pass force_create=True to force LVM creation
9544
      for new_lv in dev.children:
9545
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9546
                        _GetInstanceInfoText(self.instance), False)
9547

    
9548
    # Step 4: dbrd minors and drbd setups changes
9549
    # after this, we must manually remove the drbd minors on both the
9550
    # error and the success paths
9551
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9552
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9553
                                         for dev in self.instance.disks],
9554
                                        self.instance.name)
9555
    logging.debug("Allocated minors %r", minors)
9556

    
9557
    iv_names = {}
9558
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9559
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9560
                      (self.new_node, idx))
9561
      # create new devices on new_node; note that we create two IDs:
9562
      # one without port, so the drbd will be activated without
9563
      # networking information on the new node at this stage, and one
9564
      # with network, for the latter activation in step 4
9565
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9566
      if self.instance.primary_node == o_node1:
9567
        p_minor = o_minor1
9568
      else:
9569
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9570
        p_minor = o_minor2
9571

    
9572
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9573
                      p_minor, new_minor, o_secret)
9574
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9575
                    p_minor, new_minor, o_secret)
9576

    
9577
      iv_names[idx] = (dev, dev.children, new_net_id)
9578
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9579
                    new_net_id)
9580
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9581
                              logical_id=new_alone_id,
9582
                              children=dev.children,
9583
                              size=dev.size)
9584
      try:
9585
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9586
                              _GetInstanceInfoText(self.instance), False)
9587
      except errors.GenericError:
9588
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9589
        raise
9590

    
9591
    # We have new devices, shutdown the drbd on the old secondary
9592
    for idx, dev in enumerate(self.instance.disks):
9593
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9594
      self.cfg.SetDiskID(dev, self.target_node)
9595
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9596
      if msg:
9597
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9598
                           "node: %s" % (idx, msg),
9599
                           hint=("Please cleanup this device manually as"
9600
                                 " soon as possible"))
9601

    
9602
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9603
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9604
                                               self.node_secondary_ip,
9605
                                               self.instance.disks)\
9606
                                              [self.instance.primary_node]
9607

    
9608
    msg = result.fail_msg
9609
    if msg:
9610
      # detaches didn't succeed (unlikely)
9611
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9612
      raise errors.OpExecError("Can't detach the disks from the network on"
9613
                               " old node: %s" % (msg,))
9614

    
9615
    # if we managed to detach at least one, we update all the disks of
9616
    # the instance to point to the new secondary
9617
    self.lu.LogInfo("Updating instance configuration")
9618
    for dev, _, new_logical_id in iv_names.itervalues():
9619
      dev.logical_id = new_logical_id
9620
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9621

    
9622
    self.cfg.Update(self.instance, feedback_fn)
9623

    
9624
    # and now perform the drbd attach
9625
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9626
                    " (standalone => connected)")
9627
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9628
                                            self.new_node],
9629
                                           self.node_secondary_ip,
9630
                                           self.instance.disks,
9631
                                           self.instance.name,
9632
                                           False)
9633
    for to_node, to_result in result.items():
9634
      msg = to_result.fail_msg
9635
      if msg:
9636
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9637
                           to_node, msg,
9638
                           hint=("please do a gnt-instance info to see the"
9639
                                 " status of disks"))
9640
    cstep = 5
9641
    if self.early_release:
9642
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9643
      cstep += 1
9644
      self._RemoveOldStorage(self.target_node, iv_names)
9645
      # WARNING: we release all node locks here, do not do other RPCs
9646
      # than WaitForSync to the primary node
9647
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9648
                    names=[self.instance.primary_node,
9649
                           self.target_node,
9650
                           self.new_node])
9651

    
9652
    # Wait for sync
9653
    # This can fail as the old devices are degraded and _WaitForSync
9654
    # does a combined result over all disks, so we don't check its return value
9655
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9656
    cstep += 1
9657
    _WaitForSync(self.lu, self.instance)
9658

    
9659
    # Check all devices manually
9660
    self._CheckDevices(self.instance.primary_node, iv_names)
9661

    
9662
    # Step: remove old storage
9663
    if not self.early_release:
9664
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9665
      self._RemoveOldStorage(self.target_node, iv_names)
9666

    
9667

    
9668
class LURepairNodeStorage(NoHooksLU):
9669
  """Repairs the volume group on a node.
9670

9671
  """
9672
  REQ_BGL = False
9673

    
9674
  def CheckArguments(self):
9675
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9676

    
9677
    storage_type = self.op.storage_type
9678

    
9679
    if (constants.SO_FIX_CONSISTENCY not in
9680
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9681
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9682
                                 " repaired" % storage_type,
9683
                                 errors.ECODE_INVAL)
9684

    
9685
  def ExpandNames(self):
9686
    self.needed_locks = {
9687
      locking.LEVEL_NODE: [self.op.node_name],
9688
      }
9689

    
9690
  def _CheckFaultyDisks(self, instance, node_name):
9691
    """Ensure faulty disks abort the opcode or at least warn."""
9692
    try:
9693
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9694
                                  node_name, True):
9695
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9696
                                   " node '%s'" % (instance.name, node_name),
9697
                                   errors.ECODE_STATE)
9698
    except errors.OpPrereqError, err:
9699
      if self.op.ignore_consistency:
9700
        self.proc.LogWarning(str(err.args[0]))
9701
      else:
9702
        raise
9703

    
9704
  def CheckPrereq(self):
9705
    """Check prerequisites.
9706

9707
    """
9708
    # Check whether any instance on this node has faulty disks
9709
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9710
      if not inst.admin_up:
9711
        continue
9712
      check_nodes = set(inst.all_nodes)
9713
      check_nodes.discard(self.op.node_name)
9714
      for inst_node_name in check_nodes:
9715
        self._CheckFaultyDisks(inst, inst_node_name)
9716

    
9717
  def Exec(self, feedback_fn):
9718
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9719
                (self.op.name, self.op.node_name))
9720

    
9721
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9722
    result = self.rpc.call_storage_execute(self.op.node_name,
9723
                                           self.op.storage_type, st_args,
9724
                                           self.op.name,
9725
                                           constants.SO_FIX_CONSISTENCY)
9726
    result.Raise("Failed to repair storage unit '%s' on %s" %
9727
                 (self.op.name, self.op.node_name))
9728

    
9729

    
9730
class LUNodeEvacStrategy(NoHooksLU):
9731
  """Computes the node evacuation strategy.
9732

9733
  """
9734
  REQ_BGL = False
9735

    
9736
  def CheckArguments(self):
9737
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9738

    
9739
  def ExpandNames(self):
9740
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9741
    self.needed_locks = locks = {}
9742
    if self.op.remote_node is None:
9743
      locks[locking.LEVEL_NODE] = locking.ALL_SET
9744
    else:
9745
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9746
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9747

    
9748
  def Exec(self, feedback_fn):
9749
    instances = []
9750
    for node in self.op.nodes:
9751
      instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9752
    if not instances:
9753
      return []
9754

    
9755
    if self.op.remote_node is not None:
9756
      result = []
9757
      for i in instances:
9758
        if i.primary_node == self.op.remote_node:
9759
          raise errors.OpPrereqError("Node %s is the primary node of"
9760
                                     " instance %s, cannot use it as"
9761
                                     " secondary" %
9762
                                     (self.op.remote_node, i.name),
9763
                                     errors.ECODE_INVAL)
9764
        result.append([i.name, self.op.remote_node])
9765
    else:
9766
      ial = IAllocator(self.cfg, self.rpc,
9767
                       mode=constants.IALLOCATOR_MODE_MEVAC,
9768
                       evac_nodes=self.op.nodes)
9769
      ial.Run(self.op.iallocator, validate=True)
9770
      if not ial.success:
9771
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9772
                                 errors.ECODE_NORES)
9773
      result = ial.result
9774
    return result
9775

    
9776

    
9777
class LUInstanceGrowDisk(LogicalUnit):
9778
  """Grow a disk of an instance.
9779

9780
  """
9781
  HPATH = "disk-grow"
9782
  HTYPE = constants.HTYPE_INSTANCE
9783
  REQ_BGL = False
9784

    
9785
  def ExpandNames(self):
9786
    self._ExpandAndLockInstance()
9787
    self.needed_locks[locking.LEVEL_NODE] = []
9788
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9789

    
9790
  def DeclareLocks(self, level):
9791
    if level == locking.LEVEL_NODE:
9792
      self._LockInstancesNodes()
9793

    
9794
  def BuildHooksEnv(self):
9795
    """Build hooks env.
9796

9797
    This runs on the master, the primary and all the secondaries.
9798

9799
    """
9800
    env = {
9801
      "DISK": self.op.disk,
9802
      "AMOUNT": self.op.amount,
9803
      }
9804
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9805
    return env
9806

    
9807
  def BuildHooksNodes(self):
9808
    """Build hooks nodes.
9809

9810
    """
9811
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9812
    return (nl, nl)
9813

    
9814
  def CheckPrereq(self):
9815
    """Check prerequisites.
9816

9817
    This checks that the instance is in the cluster.
9818

9819
    """
9820
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9821
    assert instance is not None, \
9822
      "Cannot retrieve locked instance %s" % self.op.instance_name
9823
    nodenames = list(instance.all_nodes)
9824
    for node in nodenames:
9825
      _CheckNodeOnline(self, node)
9826

    
9827
    self.instance = instance
9828

    
9829
    if instance.disk_template not in constants.DTS_GROWABLE:
9830
      raise errors.OpPrereqError("Instance's disk layout does not support"
9831
                                 " growing", errors.ECODE_INVAL)
9832

    
9833
    self.disk = instance.FindDisk(self.op.disk)
9834

    
9835
    if instance.disk_template not in (constants.DT_FILE,
9836
                                      constants.DT_SHARED_FILE):
9837
      # TODO: check the free disk space for file, when that feature will be
9838
      # supported
9839
      _CheckNodesFreeDiskPerVG(self, nodenames,
9840
                               self.disk.ComputeGrowth(self.op.amount))
9841

    
9842
  def Exec(self, feedback_fn):
9843
    """Execute disk grow.
9844

9845
    """
9846
    instance = self.instance
9847
    disk = self.disk
9848

    
9849
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9850
    if not disks_ok:
9851
      raise errors.OpExecError("Cannot activate block device to grow")
9852

    
9853
    # First run all grow ops in dry-run mode
9854
    for node in instance.all_nodes:
9855
      self.cfg.SetDiskID(disk, node)
9856
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
9857
      result.Raise("Grow request failed to node %s" % node)
9858

    
9859
    # We know that (as far as we can test) operations across different
9860
    # nodes will succeed, time to run it for real
9861
    for node in instance.all_nodes:
9862
      self.cfg.SetDiskID(disk, node)
9863
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
9864
      result.Raise("Grow request failed to node %s" % node)
9865

    
9866
      # TODO: Rewrite code to work properly
9867
      # DRBD goes into sync mode for a short amount of time after executing the
9868
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9869
      # calling "resize" in sync mode fails. Sleeping for a short amount of
9870
      # time is a work-around.
9871
      time.sleep(5)
9872

    
9873
    disk.RecordGrow(self.op.amount)
9874
    self.cfg.Update(instance, feedback_fn)
9875
    if self.op.wait_for_sync:
9876
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
9877
      if disk_abort:
9878
        self.proc.LogWarning("Disk sync-ing has not returned a good"
9879
                             " status; please check the instance")
9880
      if not instance.admin_up:
9881
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9882
    elif not instance.admin_up:
9883
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
9884
                           " not supposed to be running because no wait for"
9885
                           " sync mode was requested")
9886

    
9887

    
9888
class LUInstanceQueryData(NoHooksLU):
9889
  """Query runtime instance data.
9890

9891
  """
9892
  REQ_BGL = False
9893

    
9894
  def ExpandNames(self):
9895
    self.needed_locks = {}
9896

    
9897
    # Use locking if requested or when non-static information is wanted
9898
    if not (self.op.static or self.op.use_locking):
9899
      self.LogWarning("Non-static data requested, locks need to be acquired")
9900
      self.op.use_locking = True
9901

    
9902
    if self.op.instances or not self.op.use_locking:
9903
      # Expand instance names right here
9904
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
9905
    else:
9906
      # Will use acquired locks
9907
      self.wanted_names = None
9908

    
9909
    if self.op.use_locking:
9910
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9911

    
9912
      if self.wanted_names is None:
9913
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9914
      else:
9915
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9916

    
9917
      self.needed_locks[locking.LEVEL_NODE] = []
9918
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9919
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9920

    
9921
  def DeclareLocks(self, level):
9922
    if self.op.use_locking and level == locking.LEVEL_NODE:
9923
      self._LockInstancesNodes()
9924

    
9925
  def CheckPrereq(self):
9926
    """Check prerequisites.
9927

9928
    This only checks the optional instance list against the existing names.
9929

9930
    """
9931
    if self.wanted_names is None:
9932
      assert self.op.use_locking, "Locking was not used"
9933
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
9934

    
9935
    self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9936
                             for name in self.wanted_names]
9937

    
9938
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
9939
    """Returns the status of a block device
9940

9941
    """
9942
    if self.op.static or not node:
9943
      return None
9944

    
9945
    self.cfg.SetDiskID(dev, node)
9946

    
9947
    result = self.rpc.call_blockdev_find(node, dev)
9948
    if result.offline:
9949
      return None
9950

    
9951
    result.Raise("Can't compute disk status for %s" % instance_name)
9952

    
9953
    status = result.payload
9954
    if status is None:
9955
      return None
9956

    
9957
    return (status.dev_path, status.major, status.minor,
9958
            status.sync_percent, status.estimated_time,
9959
            status.is_degraded, status.ldisk_status)
9960

    
9961
  def _ComputeDiskStatus(self, instance, snode, dev):
9962
    """Compute block device status.
9963

9964
    """
9965
    if dev.dev_type in constants.LDS_DRBD:
9966
      # we change the snode then (otherwise we use the one passed in)
9967
      if dev.logical_id[0] == instance.primary_node:
9968
        snode = dev.logical_id[1]
9969
      else:
9970
        snode = dev.logical_id[0]
9971

    
9972
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9973
                                              instance.name, dev)
9974
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9975

    
9976
    if dev.children:
9977
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
9978
                      for child in dev.children]
9979
    else:
9980
      dev_children = []
9981

    
9982
    return {
9983
      "iv_name": dev.iv_name,
9984
      "dev_type": dev.dev_type,
9985
      "logical_id": dev.logical_id,
9986
      "physical_id": dev.physical_id,
9987
      "pstatus": dev_pstatus,
9988
      "sstatus": dev_sstatus,
9989
      "children": dev_children,
9990
      "mode": dev.mode,
9991
      "size": dev.size,
9992
      }
9993

    
9994
  def Exec(self, feedback_fn):
9995
    """Gather and return data"""
9996
    result = {}
9997

    
9998
    cluster = self.cfg.GetClusterInfo()
9999

    
10000
    for instance in self.wanted_instances:
10001
      if not self.op.static:
10002
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10003
                                                  instance.name,
10004
                                                  instance.hypervisor)
10005
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10006
        remote_info = remote_info.payload
10007
        if remote_info and "state" in remote_info:
10008
          remote_state = "up"
10009
        else:
10010
          remote_state = "down"
10011
      else:
10012
        remote_state = None
10013
      if instance.admin_up:
10014
        config_state = "up"
10015
      else:
10016
        config_state = "down"
10017

    
10018
      disks = [self._ComputeDiskStatus(instance, None, device)
10019
               for device in instance.disks]
10020

    
10021
      result[instance.name] = {
10022
        "name": instance.name,
10023
        "config_state": config_state,
10024
        "run_state": remote_state,
10025
        "pnode": instance.primary_node,
10026
        "snodes": instance.secondary_nodes,
10027
        "os": instance.os,
10028
        # this happens to be the same format used for hooks
10029
        "nics": _NICListToTuple(self, instance.nics),
10030
        "disk_template": instance.disk_template,
10031
        "disks": disks,
10032
        "hypervisor": instance.hypervisor,
10033
        "network_port": instance.network_port,
10034
        "hv_instance": instance.hvparams,
10035
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10036
        "be_instance": instance.beparams,
10037
        "be_actual": cluster.FillBE(instance),
10038
        "os_instance": instance.osparams,
10039
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10040
        "serial_no": instance.serial_no,
10041
        "mtime": instance.mtime,
10042
        "ctime": instance.ctime,
10043
        "uuid": instance.uuid,
10044
        }
10045

    
10046
    return result
10047

    
10048

    
10049
class LUInstanceSetParams(LogicalUnit):
10050
  """Modifies an instances's parameters.
10051

10052
  """
10053
  HPATH = "instance-modify"
10054
  HTYPE = constants.HTYPE_INSTANCE
10055
  REQ_BGL = False
10056

    
10057
  def CheckArguments(self):
10058
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10059
            self.op.hvparams or self.op.beparams or self.op.os_name):
10060
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10061

    
10062
    if self.op.hvparams:
10063
      _CheckGlobalHvParams(self.op.hvparams)
10064

    
10065
    # Disk validation
10066
    disk_addremove = 0
10067
    for disk_op, disk_dict in self.op.disks:
10068
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10069
      if disk_op == constants.DDM_REMOVE:
10070
        disk_addremove += 1
10071
        continue
10072
      elif disk_op == constants.DDM_ADD:
10073
        disk_addremove += 1
10074
      else:
10075
        if not isinstance(disk_op, int):
10076
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10077
        if not isinstance(disk_dict, dict):
10078
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10079
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10080

    
10081
      if disk_op == constants.DDM_ADD:
10082
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10083
        if mode not in constants.DISK_ACCESS_SET:
10084
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10085
                                     errors.ECODE_INVAL)
10086
        size = disk_dict.get(constants.IDISK_SIZE, None)
10087
        if size is None:
10088
          raise errors.OpPrereqError("Required disk parameter size missing",
10089
                                     errors.ECODE_INVAL)
10090
        try:
10091
          size = int(size)
10092
        except (TypeError, ValueError), err:
10093
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10094
                                     str(err), errors.ECODE_INVAL)
10095
        disk_dict[constants.IDISK_SIZE] = size
10096
      else:
10097
        # modification of disk
10098
        if constants.IDISK_SIZE in disk_dict:
10099
          raise errors.OpPrereqError("Disk size change not possible, use"
10100
                                     " grow-disk", errors.ECODE_INVAL)
10101

    
10102
    if disk_addremove > 1:
10103
      raise errors.OpPrereqError("Only one disk add or remove operation"
10104
                                 " supported at a time", errors.ECODE_INVAL)
10105

    
10106
    if self.op.disks and self.op.disk_template is not None:
10107
      raise errors.OpPrereqError("Disk template conversion and other disk"
10108
                                 " changes not supported at the same time",
10109
                                 errors.ECODE_INVAL)
10110

    
10111
    if (self.op.disk_template and
10112
        self.op.disk_template in constants.DTS_INT_MIRROR and
10113
        self.op.remote_node is None):
10114
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10115
                                 " one requires specifying a secondary node",
10116
                                 errors.ECODE_INVAL)
10117

    
10118
    # NIC validation
10119
    nic_addremove = 0
10120
    for nic_op, nic_dict in self.op.nics:
10121
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10122
      if nic_op == constants.DDM_REMOVE:
10123
        nic_addremove += 1
10124
        continue
10125
      elif nic_op == constants.DDM_ADD:
10126
        nic_addremove += 1
10127
      else:
10128
        if not isinstance(nic_op, int):
10129
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10130
        if not isinstance(nic_dict, dict):
10131
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10132
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10133

    
10134
      # nic_dict should be a dict
10135
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10136
      if nic_ip is not None:
10137
        if nic_ip.lower() == constants.VALUE_NONE:
10138
          nic_dict[constants.INIC_IP] = None
10139
        else:
10140
          if not netutils.IPAddress.IsValid(nic_ip):
10141
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10142
                                       errors.ECODE_INVAL)
10143

    
10144
      nic_bridge = nic_dict.get('bridge', None)
10145
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10146
      if nic_bridge and nic_link:
10147
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10148
                                   " at the same time", errors.ECODE_INVAL)
10149
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10150
        nic_dict['bridge'] = None
10151
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10152
        nic_dict[constants.INIC_LINK] = None
10153

    
10154
      if nic_op == constants.DDM_ADD:
10155
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10156
        if nic_mac is None:
10157
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10158

    
10159
      if constants.INIC_MAC in nic_dict:
10160
        nic_mac = nic_dict[constants.INIC_MAC]
10161
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10162
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10163

    
10164
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10165
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10166
                                     " modifying an existing nic",
10167
                                     errors.ECODE_INVAL)
10168

    
10169
    if nic_addremove > 1:
10170
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10171
                                 " supported at a time", errors.ECODE_INVAL)
10172

    
10173
  def ExpandNames(self):
10174
    self._ExpandAndLockInstance()
10175
    self.needed_locks[locking.LEVEL_NODE] = []
10176
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10177

    
10178
  def DeclareLocks(self, level):
10179
    if level == locking.LEVEL_NODE:
10180
      self._LockInstancesNodes()
10181
      if self.op.disk_template and self.op.remote_node:
10182
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10183
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10184

    
10185
  def BuildHooksEnv(self):
10186
    """Build hooks env.
10187

10188
    This runs on the master, primary and secondaries.
10189

10190
    """
10191
    args = dict()
10192
    if constants.BE_MEMORY in self.be_new:
10193
      args['memory'] = self.be_new[constants.BE_MEMORY]
10194
    if constants.BE_VCPUS in self.be_new:
10195
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
10196
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10197
    # information at all.
10198
    if self.op.nics:
10199
      args['nics'] = []
10200
      nic_override = dict(self.op.nics)
10201
      for idx, nic in enumerate(self.instance.nics):
10202
        if idx in nic_override:
10203
          this_nic_override = nic_override[idx]
10204
        else:
10205
          this_nic_override = {}
10206
        if constants.INIC_IP in this_nic_override:
10207
          ip = this_nic_override[constants.INIC_IP]
10208
        else:
10209
          ip = nic.ip
10210
        if constants.INIC_MAC in this_nic_override:
10211
          mac = this_nic_override[constants.INIC_MAC]
10212
        else:
10213
          mac = nic.mac
10214
        if idx in self.nic_pnew:
10215
          nicparams = self.nic_pnew[idx]
10216
        else:
10217
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10218
        mode = nicparams[constants.NIC_MODE]
10219
        link = nicparams[constants.NIC_LINK]
10220
        args['nics'].append((ip, mac, mode, link))
10221
      if constants.DDM_ADD in nic_override:
10222
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10223
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10224
        nicparams = self.nic_pnew[constants.DDM_ADD]
10225
        mode = nicparams[constants.NIC_MODE]
10226
        link = nicparams[constants.NIC_LINK]
10227
        args['nics'].append((ip, mac, mode, link))
10228
      elif constants.DDM_REMOVE in nic_override:
10229
        del args['nics'][-1]
10230

    
10231
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10232
    if self.op.disk_template:
10233
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10234

    
10235
    return env
10236

    
10237
  def BuildHooksNodes(self):
10238
    """Build hooks nodes.
10239

10240
    """
10241
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10242
    return (nl, nl)
10243

    
10244
  def CheckPrereq(self):
10245
    """Check prerequisites.
10246

10247
    This only checks the instance list against the existing names.
10248

10249
    """
10250
    # checking the new params on the primary/secondary nodes
10251

    
10252
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10253
    cluster = self.cluster = self.cfg.GetClusterInfo()
10254
    assert self.instance is not None, \
10255
      "Cannot retrieve locked instance %s" % self.op.instance_name
10256
    pnode = instance.primary_node
10257
    nodelist = list(instance.all_nodes)
10258

    
10259
    # OS change
10260
    if self.op.os_name and not self.op.force:
10261
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10262
                      self.op.force_variant)
10263
      instance_os = self.op.os_name
10264
    else:
10265
      instance_os = instance.os
10266

    
10267
    if self.op.disk_template:
10268
      if instance.disk_template == self.op.disk_template:
10269
        raise errors.OpPrereqError("Instance already has disk template %s" %
10270
                                   instance.disk_template, errors.ECODE_INVAL)
10271

    
10272
      if (instance.disk_template,
10273
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10274
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10275
                                   " %s to %s" % (instance.disk_template,
10276
                                                  self.op.disk_template),
10277
                                   errors.ECODE_INVAL)
10278
      _CheckInstanceDown(self, instance, "cannot change disk template")
10279
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10280
        if self.op.remote_node == pnode:
10281
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10282
                                     " as the primary node of the instance" %
10283
                                     self.op.remote_node, errors.ECODE_STATE)
10284
        _CheckNodeOnline(self, self.op.remote_node)
10285
        _CheckNodeNotDrained(self, self.op.remote_node)
10286
        # FIXME: here we assume that the old instance type is DT_PLAIN
10287
        assert instance.disk_template == constants.DT_PLAIN
10288
        disks = [{constants.IDISK_SIZE: d.size,
10289
                  constants.IDISK_VG: d.logical_id[0]}
10290
                 for d in instance.disks]
10291
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10292
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10293

    
10294
    # hvparams processing
10295
    if self.op.hvparams:
10296
      hv_type = instance.hypervisor
10297
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10298
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10299
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10300

    
10301
      # local check
10302
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10303
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10304
      self.hv_new = hv_new # the new actual values
10305
      self.hv_inst = i_hvdict # the new dict (without defaults)
10306
    else:
10307
      self.hv_new = self.hv_inst = {}
10308

    
10309
    # beparams processing
10310
    if self.op.beparams:
10311
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10312
                                   use_none=True)
10313
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10314
      be_new = cluster.SimpleFillBE(i_bedict)
10315
      self.be_new = be_new # the new actual values
10316
      self.be_inst = i_bedict # the new dict (without defaults)
10317
    else:
10318
      self.be_new = self.be_inst = {}
10319
    be_old = cluster.FillBE(instance)
10320

    
10321
    # osparams processing
10322
    if self.op.osparams:
10323
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10324
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10325
      self.os_inst = i_osdict # the new dict (without defaults)
10326
    else:
10327
      self.os_inst = {}
10328

    
10329
    self.warn = []
10330

    
10331
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10332
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10333
      mem_check_list = [pnode]
10334
      if be_new[constants.BE_AUTO_BALANCE]:
10335
        # either we changed auto_balance to yes or it was from before
10336
        mem_check_list.extend(instance.secondary_nodes)
10337
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10338
                                                  instance.hypervisor)
10339
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10340
                                         instance.hypervisor)
10341
      pninfo = nodeinfo[pnode]
10342
      msg = pninfo.fail_msg
10343
      if msg:
10344
        # Assume the primary node is unreachable and go ahead
10345
        self.warn.append("Can't get info from primary node %s: %s" %
10346
                         (pnode,  msg))
10347
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
10348
        self.warn.append("Node data from primary node %s doesn't contain"
10349
                         " free memory information" % pnode)
10350
      elif instance_info.fail_msg:
10351
        self.warn.append("Can't get instance runtime information: %s" %
10352
                        instance_info.fail_msg)
10353
      else:
10354
        if instance_info.payload:
10355
          current_mem = int(instance_info.payload['memory'])
10356
        else:
10357
          # Assume instance not running
10358
          # (there is a slight race condition here, but it's not very probable,
10359
          # and we have no other way to check)
10360
          current_mem = 0
10361
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10362
                    pninfo.payload['memory_free'])
10363
        if miss_mem > 0:
10364
          raise errors.OpPrereqError("This change will prevent the instance"
10365
                                     " from starting, due to %d MB of memory"
10366
                                     " missing on its primary node" % miss_mem,
10367
                                     errors.ECODE_NORES)
10368

    
10369
      if be_new[constants.BE_AUTO_BALANCE]:
10370
        for node, nres in nodeinfo.items():
10371
          if node not in instance.secondary_nodes:
10372
            continue
10373
          nres.Raise("Can't get info from secondary node %s" % node,
10374
                     prereq=True, ecode=errors.ECODE_STATE)
10375
          if not isinstance(nres.payload.get('memory_free', None), int):
10376
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10377
                                       " memory information" % node,
10378
                                       errors.ECODE_STATE)
10379
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
10380
            raise errors.OpPrereqError("This change will prevent the instance"
10381
                                       " from failover to its secondary node"
10382
                                       " %s, due to not enough memory" % node,
10383
                                       errors.ECODE_STATE)
10384

    
10385
    # NIC processing
10386
    self.nic_pnew = {}
10387
    self.nic_pinst = {}
10388
    for nic_op, nic_dict in self.op.nics:
10389
      if nic_op == constants.DDM_REMOVE:
10390
        if not instance.nics:
10391
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10392
                                     errors.ECODE_INVAL)
10393
        continue
10394
      if nic_op != constants.DDM_ADD:
10395
        # an existing nic
10396
        if not instance.nics:
10397
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10398
                                     " no NICs" % nic_op,
10399
                                     errors.ECODE_INVAL)
10400
        if nic_op < 0 or nic_op >= len(instance.nics):
10401
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10402
                                     " are 0 to %d" %
10403
                                     (nic_op, len(instance.nics) - 1),
10404
                                     errors.ECODE_INVAL)
10405
        old_nic_params = instance.nics[nic_op].nicparams
10406
        old_nic_ip = instance.nics[nic_op].ip
10407
      else:
10408
        old_nic_params = {}
10409
        old_nic_ip = None
10410

    
10411
      update_params_dict = dict([(key, nic_dict[key])
10412
                                 for key in constants.NICS_PARAMETERS
10413
                                 if key in nic_dict])
10414

    
10415
      if 'bridge' in nic_dict:
10416
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
10417

    
10418
      new_nic_params = _GetUpdatedParams(old_nic_params,
10419
                                         update_params_dict)
10420
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10421
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10422
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10423
      self.nic_pinst[nic_op] = new_nic_params
10424
      self.nic_pnew[nic_op] = new_filled_nic_params
10425
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10426

    
10427
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10428
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10429
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10430
        if msg:
10431
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10432
          if self.op.force:
10433
            self.warn.append(msg)
10434
          else:
10435
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10436
      if new_nic_mode == constants.NIC_MODE_ROUTED:
10437
        if constants.INIC_IP in nic_dict:
10438
          nic_ip = nic_dict[constants.INIC_IP]
10439
        else:
10440
          nic_ip = old_nic_ip
10441
        if nic_ip is None:
10442
          raise errors.OpPrereqError('Cannot set the nic ip to None'
10443
                                     ' on a routed nic', errors.ECODE_INVAL)
10444
      if constants.INIC_MAC in nic_dict:
10445
        nic_mac = nic_dict[constants.INIC_MAC]
10446
        if nic_mac is None:
10447
          raise errors.OpPrereqError('Cannot set the nic mac to None',
10448
                                     errors.ECODE_INVAL)
10449
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10450
          # otherwise generate the mac
10451
          nic_dict[constants.INIC_MAC] = \
10452
            self.cfg.GenerateMAC(self.proc.GetECId())
10453
        else:
10454
          # or validate/reserve the current one
10455
          try:
10456
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10457
          except errors.ReservationError:
10458
            raise errors.OpPrereqError("MAC address %s already in use"
10459
                                       " in cluster" % nic_mac,
10460
                                       errors.ECODE_NOTUNIQUE)
10461

    
10462
    # DISK processing
10463
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10464
      raise errors.OpPrereqError("Disk operations not supported for"
10465
                                 " diskless instances",
10466
                                 errors.ECODE_INVAL)
10467
    for disk_op, _ in self.op.disks:
10468
      if disk_op == constants.DDM_REMOVE:
10469
        if len(instance.disks) == 1:
10470
          raise errors.OpPrereqError("Cannot remove the last disk of"
10471
                                     " an instance", errors.ECODE_INVAL)
10472
        _CheckInstanceDown(self, instance, "cannot remove disks")
10473

    
10474
      if (disk_op == constants.DDM_ADD and
10475
          len(instance.disks) >= constants.MAX_DISKS):
10476
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10477
                                   " add more" % constants.MAX_DISKS,
10478
                                   errors.ECODE_STATE)
10479
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10480
        # an existing disk
10481
        if disk_op < 0 or disk_op >= len(instance.disks):
10482
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10483
                                     " are 0 to %d" %
10484
                                     (disk_op, len(instance.disks)),
10485
                                     errors.ECODE_INVAL)
10486

    
10487
    return
10488

    
10489
  def _ConvertPlainToDrbd(self, feedback_fn):
10490
    """Converts an instance from plain to drbd.
10491

10492
    """
10493
    feedback_fn("Converting template to drbd")
10494
    instance = self.instance
10495
    pnode = instance.primary_node
10496
    snode = self.op.remote_node
10497

    
10498
    # create a fake disk info for _GenerateDiskTemplate
10499
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10500
                  constants.IDISK_VG: d.logical_id[0]}
10501
                 for d in instance.disks]
10502
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10503
                                      instance.name, pnode, [snode],
10504
                                      disk_info, None, None, 0, feedback_fn)
10505
    info = _GetInstanceInfoText(instance)
10506
    feedback_fn("Creating aditional volumes...")
10507
    # first, create the missing data and meta devices
10508
    for disk in new_disks:
10509
      # unfortunately this is... not too nice
10510
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10511
                            info, True)
10512
      for child in disk.children:
10513
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10514
    # at this stage, all new LVs have been created, we can rename the
10515
    # old ones
10516
    feedback_fn("Renaming original volumes...")
10517
    rename_list = [(o, n.children[0].logical_id)
10518
                   for (o, n) in zip(instance.disks, new_disks)]
10519
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10520
    result.Raise("Failed to rename original LVs")
10521

    
10522
    feedback_fn("Initializing DRBD devices...")
10523
    # all child devices are in place, we can now create the DRBD devices
10524
    for disk in new_disks:
10525
      for node in [pnode, snode]:
10526
        f_create = node == pnode
10527
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10528

    
10529
    # at this point, the instance has been modified
10530
    instance.disk_template = constants.DT_DRBD8
10531
    instance.disks = new_disks
10532
    self.cfg.Update(instance, feedback_fn)
10533

    
10534
    # disks are created, waiting for sync
10535
    disk_abort = not _WaitForSync(self, instance,
10536
                                  oneshot=not self.op.wait_for_sync)
10537
    if disk_abort:
10538
      raise errors.OpExecError("There are some degraded disks for"
10539
                               " this instance, please cleanup manually")
10540

    
10541
  def _ConvertDrbdToPlain(self, feedback_fn):
10542
    """Converts an instance from drbd to plain.
10543

10544
    """
10545
    instance = self.instance
10546
    assert len(instance.secondary_nodes) == 1
10547
    pnode = instance.primary_node
10548
    snode = instance.secondary_nodes[0]
10549
    feedback_fn("Converting template to plain")
10550

    
10551
    old_disks = instance.disks
10552
    new_disks = [d.children[0] for d in old_disks]
10553

    
10554
    # copy over size and mode
10555
    for parent, child in zip(old_disks, new_disks):
10556
      child.size = parent.size
10557
      child.mode = parent.mode
10558

    
10559
    # update instance structure
10560
    instance.disks = new_disks
10561
    instance.disk_template = constants.DT_PLAIN
10562
    self.cfg.Update(instance, feedback_fn)
10563

    
10564
    feedback_fn("Removing volumes on the secondary node...")
10565
    for disk in old_disks:
10566
      self.cfg.SetDiskID(disk, snode)
10567
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10568
      if msg:
10569
        self.LogWarning("Could not remove block device %s on node %s,"
10570
                        " continuing anyway: %s", disk.iv_name, snode, msg)
10571

    
10572
    feedback_fn("Removing unneeded volumes on the primary node...")
10573
    for idx, disk in enumerate(old_disks):
10574
      meta = disk.children[1]
10575
      self.cfg.SetDiskID(meta, pnode)
10576
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10577
      if msg:
10578
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
10579
                        " continuing anyway: %s", idx, pnode, msg)
10580

    
10581
  def Exec(self, feedback_fn):
10582
    """Modifies an instance.
10583

10584
    All parameters take effect only at the next restart of the instance.
10585

10586
    """
10587
    # Process here the warnings from CheckPrereq, as we don't have a
10588
    # feedback_fn there.
10589
    for warn in self.warn:
10590
      feedback_fn("WARNING: %s" % warn)
10591

    
10592
    result = []
10593
    instance = self.instance
10594
    # disk changes
10595
    for disk_op, disk_dict in self.op.disks:
10596
      if disk_op == constants.DDM_REMOVE:
10597
        # remove the last disk
10598
        device = instance.disks.pop()
10599
        device_idx = len(instance.disks)
10600
        for node, disk in device.ComputeNodeTree(instance.primary_node):
10601
          self.cfg.SetDiskID(disk, node)
10602
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10603
          if msg:
10604
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
10605
                            " continuing anyway", device_idx, node, msg)
10606
        result.append(("disk/%d" % device_idx, "remove"))
10607
      elif disk_op == constants.DDM_ADD:
10608
        # add a new disk
10609
        if instance.disk_template in (constants.DT_FILE,
10610
                                        constants.DT_SHARED_FILE):
10611
          file_driver, file_path = instance.disks[0].logical_id
10612
          file_path = os.path.dirname(file_path)
10613
        else:
10614
          file_driver = file_path = None
10615
        disk_idx_base = len(instance.disks)
10616
        new_disk = _GenerateDiskTemplate(self,
10617
                                         instance.disk_template,
10618
                                         instance.name, instance.primary_node,
10619
                                         instance.secondary_nodes,
10620
                                         [disk_dict],
10621
                                         file_path,
10622
                                         file_driver,
10623
                                         disk_idx_base, feedback_fn)[0]
10624
        instance.disks.append(new_disk)
10625
        info = _GetInstanceInfoText(instance)
10626

    
10627
        logging.info("Creating volume %s for instance %s",
10628
                     new_disk.iv_name, instance.name)
10629
        # Note: this needs to be kept in sync with _CreateDisks
10630
        #HARDCODE
10631
        for node in instance.all_nodes:
10632
          f_create = node == instance.primary_node
10633
          try:
10634
            _CreateBlockDev(self, node, instance, new_disk,
10635
                            f_create, info, f_create)
10636
          except errors.OpExecError, err:
10637
            self.LogWarning("Failed to create volume %s (%s) on"
10638
                            " node %s: %s",
10639
                            new_disk.iv_name, new_disk, node, err)
10640
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10641
                       (new_disk.size, new_disk.mode)))
10642
      else:
10643
        # change a given disk
10644
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10645
        result.append(("disk.mode/%d" % disk_op,
10646
                       disk_dict[constants.IDISK_MODE]))
10647

    
10648
    if self.op.disk_template:
10649
      r_shut = _ShutdownInstanceDisks(self, instance)
10650
      if not r_shut:
10651
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10652
                                 " proceed with disk template conversion")
10653
      mode = (instance.disk_template, self.op.disk_template)
10654
      try:
10655
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
10656
      except:
10657
        self.cfg.ReleaseDRBDMinors(instance.name)
10658
        raise
10659
      result.append(("disk_template", self.op.disk_template))
10660

    
10661
    # NIC changes
10662
    for nic_op, nic_dict in self.op.nics:
10663
      if nic_op == constants.DDM_REMOVE:
10664
        # remove the last nic
10665
        del instance.nics[-1]
10666
        result.append(("nic.%d" % len(instance.nics), "remove"))
10667
      elif nic_op == constants.DDM_ADD:
10668
        # mac and bridge should be set, by now
10669
        mac = nic_dict[constants.INIC_MAC]
10670
        ip = nic_dict.get(constants.INIC_IP, None)
10671
        nicparams = self.nic_pinst[constants.DDM_ADD]
10672
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10673
        instance.nics.append(new_nic)
10674
        result.append(("nic.%d" % (len(instance.nics) - 1),
10675
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
10676
                       (new_nic.mac, new_nic.ip,
10677
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10678
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10679
                       )))
10680
      else:
10681
        for key in (constants.INIC_MAC, constants.INIC_IP):
10682
          if key in nic_dict:
10683
            setattr(instance.nics[nic_op], key, nic_dict[key])
10684
        if nic_op in self.nic_pinst:
10685
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10686
        for key, val in nic_dict.iteritems():
10687
          result.append(("nic.%s/%d" % (key, nic_op), val))
10688

    
10689
    # hvparams changes
10690
    if self.op.hvparams:
10691
      instance.hvparams = self.hv_inst
10692
      for key, val in self.op.hvparams.iteritems():
10693
        result.append(("hv/%s" % key, val))
10694

    
10695
    # beparams changes
10696
    if self.op.beparams:
10697
      instance.beparams = self.be_inst
10698
      for key, val in self.op.beparams.iteritems():
10699
        result.append(("be/%s" % key, val))
10700

    
10701
    # OS change
10702
    if self.op.os_name:
10703
      instance.os = self.op.os_name
10704

    
10705
    # osparams changes
10706
    if self.op.osparams:
10707
      instance.osparams = self.os_inst
10708
      for key, val in self.op.osparams.iteritems():
10709
        result.append(("os/%s" % key, val))
10710

    
10711
    self.cfg.Update(instance, feedback_fn)
10712

    
10713
    return result
10714

    
10715
  _DISK_CONVERSIONS = {
10716
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10717
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10718
    }
10719

    
10720

    
10721
class LUBackupQuery(NoHooksLU):
10722
  """Query the exports list
10723

10724
  """
10725
  REQ_BGL = False
10726

    
10727
  def ExpandNames(self):
10728
    self.needed_locks = {}
10729
    self.share_locks[locking.LEVEL_NODE] = 1
10730
    if not self.op.nodes:
10731
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10732
    else:
10733
      self.needed_locks[locking.LEVEL_NODE] = \
10734
        _GetWantedNodes(self, self.op.nodes)
10735

    
10736
  def Exec(self, feedback_fn):
10737
    """Compute the list of all the exported system images.
10738

10739
    @rtype: dict
10740
    @return: a dictionary with the structure node->(export-list)
10741
        where export-list is a list of the instances exported on
10742
        that node.
10743

10744
    """
10745
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
10746
    rpcresult = self.rpc.call_export_list(self.nodes)
10747
    result = {}
10748
    for node in rpcresult:
10749
      if rpcresult[node].fail_msg:
10750
        result[node] = False
10751
      else:
10752
        result[node] = rpcresult[node].payload
10753

    
10754
    return result
10755

    
10756

    
10757
class LUBackupPrepare(NoHooksLU):
10758
  """Prepares an instance for an export and returns useful information.
10759

10760
  """
10761
  REQ_BGL = False
10762

    
10763
  def ExpandNames(self):
10764
    self._ExpandAndLockInstance()
10765

    
10766
  def CheckPrereq(self):
10767
    """Check prerequisites.
10768

10769
    """
10770
    instance_name = self.op.instance_name
10771

    
10772
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10773
    assert self.instance is not None, \
10774
          "Cannot retrieve locked instance %s" % self.op.instance_name
10775
    _CheckNodeOnline(self, self.instance.primary_node)
10776

    
10777
    self._cds = _GetClusterDomainSecret()
10778

    
10779
  def Exec(self, feedback_fn):
10780
    """Prepares an instance for an export.
10781

10782
    """
10783
    instance = self.instance
10784

    
10785
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10786
      salt = utils.GenerateSecret(8)
10787

    
10788
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10789
      result = self.rpc.call_x509_cert_create(instance.primary_node,
10790
                                              constants.RIE_CERT_VALIDITY)
10791
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
10792

    
10793
      (name, cert_pem) = result.payload
10794

    
10795
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10796
                                             cert_pem)
10797

    
10798
      return {
10799
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10800
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10801
                          salt),
10802
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10803
        }
10804

    
10805
    return None
10806

    
10807

    
10808
class LUBackupExport(LogicalUnit):
10809
  """Export an instance to an image in the cluster.
10810

10811
  """
10812
  HPATH = "instance-export"
10813
  HTYPE = constants.HTYPE_INSTANCE
10814
  REQ_BGL = False
10815

    
10816
  def CheckArguments(self):
10817
    """Check the arguments.
10818

10819
    """
10820
    self.x509_key_name = self.op.x509_key_name
10821
    self.dest_x509_ca_pem = self.op.destination_x509_ca
10822

    
10823
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10824
      if not self.x509_key_name:
10825
        raise errors.OpPrereqError("Missing X509 key name for encryption",
10826
                                   errors.ECODE_INVAL)
10827

    
10828
      if not self.dest_x509_ca_pem:
10829
        raise errors.OpPrereqError("Missing destination X509 CA",
10830
                                   errors.ECODE_INVAL)
10831

    
10832
  def ExpandNames(self):
10833
    self._ExpandAndLockInstance()
10834

    
10835
    # Lock all nodes for local exports
10836
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10837
      # FIXME: lock only instance primary and destination node
10838
      #
10839
      # Sad but true, for now we have do lock all nodes, as we don't know where
10840
      # the previous export might be, and in this LU we search for it and
10841
      # remove it from its current node. In the future we could fix this by:
10842
      #  - making a tasklet to search (share-lock all), then create the
10843
      #    new one, then one to remove, after
10844
      #  - removing the removal operation altogether
10845
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10846

    
10847
  def DeclareLocks(self, level):
10848
    """Last minute lock declaration."""
10849
    # All nodes are locked anyway, so nothing to do here.
10850

    
10851
  def BuildHooksEnv(self):
10852
    """Build hooks env.
10853

10854
    This will run on the master, primary node and target node.
10855

10856
    """
10857
    env = {
10858
      "EXPORT_MODE": self.op.mode,
10859
      "EXPORT_NODE": self.op.target_node,
10860
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10861
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10862
      # TODO: Generic function for boolean env variables
10863
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10864
      }
10865

    
10866
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10867

    
10868
    return env
10869

    
10870
  def BuildHooksNodes(self):
10871
    """Build hooks nodes.
10872

10873
    """
10874
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10875

    
10876
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10877
      nl.append(self.op.target_node)
10878

    
10879
    return (nl, nl)
10880

    
10881
  def CheckPrereq(self):
10882
    """Check prerequisites.
10883

10884
    This checks that the instance and node names are valid.
10885

10886
    """
10887
    instance_name = self.op.instance_name
10888

    
10889
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10890
    assert self.instance is not None, \
10891
          "Cannot retrieve locked instance %s" % self.op.instance_name
10892
    _CheckNodeOnline(self, self.instance.primary_node)
10893

    
10894
    if (self.op.remove_instance and self.instance.admin_up and
10895
        not self.op.shutdown):
10896
      raise errors.OpPrereqError("Can not remove instance without shutting it"
10897
                                 " down before")
10898

    
10899
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10900
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10901
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10902
      assert self.dst_node is not None
10903

    
10904
      _CheckNodeOnline(self, self.dst_node.name)
10905
      _CheckNodeNotDrained(self, self.dst_node.name)
10906

    
10907
      self._cds = None
10908
      self.dest_disk_info = None
10909
      self.dest_x509_ca = None
10910

    
10911
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10912
      self.dst_node = None
10913

    
10914
      if len(self.op.target_node) != len(self.instance.disks):
10915
        raise errors.OpPrereqError(("Received destination information for %s"
10916
                                    " disks, but instance %s has %s disks") %
10917
                                   (len(self.op.target_node), instance_name,
10918
                                    len(self.instance.disks)),
10919
                                   errors.ECODE_INVAL)
10920

    
10921
      cds = _GetClusterDomainSecret()
10922

    
10923
      # Check X509 key name
10924
      try:
10925
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10926
      except (TypeError, ValueError), err:
10927
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10928

    
10929
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10930
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10931
                                   errors.ECODE_INVAL)
10932

    
10933
      # Load and verify CA
10934
      try:
10935
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10936
      except OpenSSL.crypto.Error, err:
10937
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10938
                                   (err, ), errors.ECODE_INVAL)
10939

    
10940
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10941
      if errcode is not None:
10942
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10943
                                   (msg, ), errors.ECODE_INVAL)
10944

    
10945
      self.dest_x509_ca = cert
10946

    
10947
      # Verify target information
10948
      disk_info = []
10949
      for idx, disk_data in enumerate(self.op.target_node):
10950
        try:
10951
          (host, port, magic) = \
10952
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10953
        except errors.GenericError, err:
10954
          raise errors.OpPrereqError("Target info for disk %s: %s" %
10955
                                     (idx, err), errors.ECODE_INVAL)
10956

    
10957
        disk_info.append((host, port, magic))
10958

    
10959
      assert len(disk_info) == len(self.op.target_node)
10960
      self.dest_disk_info = disk_info
10961

    
10962
    else:
10963
      raise errors.ProgrammerError("Unhandled export mode %r" %
10964
                                   self.op.mode)
10965

    
10966
    # instance disk type verification
10967
    # TODO: Implement export support for file-based disks
10968
    for disk in self.instance.disks:
10969
      if disk.dev_type == constants.LD_FILE:
10970
        raise errors.OpPrereqError("Export not supported for instances with"
10971
                                   " file-based disks", errors.ECODE_INVAL)
10972

    
10973
  def _CleanupExports(self, feedback_fn):
10974
    """Removes exports of current instance from all other nodes.
10975

10976
    If an instance in a cluster with nodes A..D was exported to node C, its
10977
    exports will be removed from the nodes A, B and D.
10978

10979
    """
10980
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
10981

    
10982
    nodelist = self.cfg.GetNodeList()
10983
    nodelist.remove(self.dst_node.name)
10984

    
10985
    # on one-node clusters nodelist will be empty after the removal
10986
    # if we proceed the backup would be removed because OpBackupQuery
10987
    # substitutes an empty list with the full cluster node list.
10988
    iname = self.instance.name
10989
    if nodelist:
10990
      feedback_fn("Removing old exports for instance %s" % iname)
10991
      exportlist = self.rpc.call_export_list(nodelist)
10992
      for node in exportlist:
10993
        if exportlist[node].fail_msg:
10994
          continue
10995
        if iname in exportlist[node].payload:
10996
          msg = self.rpc.call_export_remove(node, iname).fail_msg
10997
          if msg:
10998
            self.LogWarning("Could not remove older export for instance %s"
10999
                            " on node %s: %s", iname, node, msg)
11000

    
11001
  def Exec(self, feedback_fn):
11002
    """Export an instance to an image in the cluster.
11003

11004
    """
11005
    assert self.op.mode in constants.EXPORT_MODES
11006

    
11007
    instance = self.instance
11008
    src_node = instance.primary_node
11009

    
11010
    if self.op.shutdown:
11011
      # shutdown the instance, but not the disks
11012
      feedback_fn("Shutting down instance %s" % instance.name)
11013
      result = self.rpc.call_instance_shutdown(src_node, instance,
11014
                                               self.op.shutdown_timeout)
11015
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11016
      result.Raise("Could not shutdown instance %s on"
11017
                   " node %s" % (instance.name, src_node))
11018

    
11019
    # set the disks ID correctly since call_instance_start needs the
11020
    # correct drbd minor to create the symlinks
11021
    for disk in instance.disks:
11022
      self.cfg.SetDiskID(disk, src_node)
11023

    
11024
    activate_disks = (not instance.admin_up)
11025

    
11026
    if activate_disks:
11027
      # Activate the instance disks if we'exporting a stopped instance
11028
      feedback_fn("Activating disks for %s" % instance.name)
11029
      _StartInstanceDisks(self, instance, None)
11030

    
11031
    try:
11032
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11033
                                                     instance)
11034

    
11035
      helper.CreateSnapshots()
11036
      try:
11037
        if (self.op.shutdown and instance.admin_up and
11038
            not self.op.remove_instance):
11039
          assert not activate_disks
11040
          feedback_fn("Starting instance %s" % instance.name)
11041
          result = self.rpc.call_instance_start(src_node, instance, None, None)
11042
          msg = result.fail_msg
11043
          if msg:
11044
            feedback_fn("Failed to start instance: %s" % msg)
11045
            _ShutdownInstanceDisks(self, instance)
11046
            raise errors.OpExecError("Could not start instance: %s" % msg)
11047

    
11048
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11049
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11050
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11051
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11052
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11053

    
11054
          (key_name, _, _) = self.x509_key_name
11055

    
11056
          dest_ca_pem = \
11057
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11058
                                            self.dest_x509_ca)
11059

    
11060
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11061
                                                     key_name, dest_ca_pem,
11062
                                                     timeouts)
11063
      finally:
11064
        helper.Cleanup()
11065

    
11066
      # Check for backwards compatibility
11067
      assert len(dresults) == len(instance.disks)
11068
      assert compat.all(isinstance(i, bool) for i in dresults), \
11069
             "Not all results are boolean: %r" % dresults
11070

    
11071
    finally:
11072
      if activate_disks:
11073
        feedback_fn("Deactivating disks for %s" % instance.name)
11074
        _ShutdownInstanceDisks(self, instance)
11075

    
11076
    if not (compat.all(dresults) and fin_resu):
11077
      failures = []
11078
      if not fin_resu:
11079
        failures.append("export finalization")
11080
      if not compat.all(dresults):
11081
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11082
                               if not dsk)
11083
        failures.append("disk export: disk(s) %s" % fdsk)
11084

    
11085
      raise errors.OpExecError("Export failed, errors in %s" %
11086
                               utils.CommaJoin(failures))
11087

    
11088
    # At this point, the export was successful, we can cleanup/finish
11089

    
11090
    # Remove instance if requested
11091
    if self.op.remove_instance:
11092
      feedback_fn("Removing instance %s" % instance.name)
11093
      _RemoveInstance(self, feedback_fn, instance,
11094
                      self.op.ignore_remove_failures)
11095

    
11096
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11097
      self._CleanupExports(feedback_fn)
11098

    
11099
    return fin_resu, dresults
11100

    
11101

    
11102
class LUBackupRemove(NoHooksLU):
11103
  """Remove exports related to the named instance.
11104

11105
  """
11106
  REQ_BGL = False
11107

    
11108
  def ExpandNames(self):
11109
    self.needed_locks = {}
11110
    # We need all nodes to be locked in order for RemoveExport to work, but we
11111
    # don't need to lock the instance itself, as nothing will happen to it (and
11112
    # we can remove exports also for a removed instance)
11113
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11114

    
11115
  def Exec(self, feedback_fn):
11116
    """Remove any export.
11117

11118
    """
11119
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11120
    # If the instance was not found we'll try with the name that was passed in.
11121
    # This will only work if it was an FQDN, though.
11122
    fqdn_warn = False
11123
    if not instance_name:
11124
      fqdn_warn = True
11125
      instance_name = self.op.instance_name
11126

    
11127
    locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
11128
    exportlist = self.rpc.call_export_list(locked_nodes)
11129
    found = False
11130
    for node in exportlist:
11131
      msg = exportlist[node].fail_msg
11132
      if msg:
11133
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11134
        continue
11135
      if instance_name in exportlist[node].payload:
11136
        found = True
11137
        result = self.rpc.call_export_remove(node, instance_name)
11138
        msg = result.fail_msg
11139
        if msg:
11140
          logging.error("Could not remove export for instance %s"
11141
                        " on node %s: %s", instance_name, node, msg)
11142

    
11143
    if fqdn_warn and not found:
11144
      feedback_fn("Export not found. If trying to remove an export belonging"
11145
                  " to a deleted instance please use its Fully Qualified"
11146
                  " Domain Name.")
11147

    
11148

    
11149
class LUGroupAdd(LogicalUnit):
11150
  """Logical unit for creating node groups.
11151

11152
  """
11153
  HPATH = "group-add"
11154
  HTYPE = constants.HTYPE_GROUP
11155
  REQ_BGL = False
11156

    
11157
  def ExpandNames(self):
11158
    # We need the new group's UUID here so that we can create and acquire the
11159
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11160
    # that it should not check whether the UUID exists in the configuration.
11161
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11162
    self.needed_locks = {}
11163
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11164

    
11165
  def CheckPrereq(self):
11166
    """Check prerequisites.
11167

11168
    This checks that the given group name is not an existing node group
11169
    already.
11170

11171
    """
11172
    try:
11173
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11174
    except errors.OpPrereqError:
11175
      pass
11176
    else:
11177
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11178
                                 " node group (UUID: %s)" %
11179
                                 (self.op.group_name, existing_uuid),
11180
                                 errors.ECODE_EXISTS)
11181

    
11182
    if self.op.ndparams:
11183
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11184

    
11185
  def BuildHooksEnv(self):
11186
    """Build hooks env.
11187

11188
    """
11189
    return {
11190
      "GROUP_NAME": self.op.group_name,
11191
      }
11192

    
11193
  def BuildHooksNodes(self):
11194
    """Build hooks nodes.
11195

11196
    """
11197
    mn = self.cfg.GetMasterNode()
11198
    return ([mn], [mn])
11199

    
11200
  def Exec(self, feedback_fn):
11201
    """Add the node group to the cluster.
11202

11203
    """
11204
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11205
                                  uuid=self.group_uuid,
11206
                                  alloc_policy=self.op.alloc_policy,
11207
                                  ndparams=self.op.ndparams)
11208

    
11209
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11210
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11211

    
11212

    
11213
class LUGroupAssignNodes(NoHooksLU):
11214
  """Logical unit for assigning nodes to groups.
11215

11216
  """
11217
  REQ_BGL = False
11218

    
11219
  def ExpandNames(self):
11220
    # These raise errors.OpPrereqError on their own:
11221
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11222
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11223

    
11224
    # We want to lock all the affected nodes and groups. We have readily
11225
    # available the list of nodes, and the *destination* group. To gather the
11226
    # list of "source" groups, we need to fetch node information later on.
11227
    self.needed_locks = {
11228
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11229
      locking.LEVEL_NODE: self.op.nodes,
11230
      }
11231

    
11232
  def DeclareLocks(self, level):
11233
    if level == locking.LEVEL_NODEGROUP:
11234
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11235

    
11236
      # Try to get all affected nodes' groups without having the group or node
11237
      # lock yet. Needs verification later in the code flow.
11238
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11239

    
11240
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11241

    
11242
  def CheckPrereq(self):
11243
    """Check prerequisites.
11244

11245
    """
11246
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11247
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11248
            frozenset(self.op.nodes))
11249

    
11250
    expected_locks = (set([self.group_uuid]) |
11251
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11252
    actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11253
    if actual_locks != expected_locks:
11254
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11255
                               " current groups are '%s', used to be '%s'" %
11256
                               (utils.CommaJoin(expected_locks),
11257
                                utils.CommaJoin(actual_locks)))
11258

    
11259
    self.node_data = self.cfg.GetAllNodesInfo()
11260
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11261
    instance_data = self.cfg.GetAllInstancesInfo()
11262

    
11263
    if self.group is None:
11264
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11265
                               (self.op.group_name, self.group_uuid))
11266

    
11267
    (new_splits, previous_splits) = \
11268
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11269
                                             for node in self.op.nodes],
11270
                                            self.node_data, instance_data)
11271

    
11272
    if new_splits:
11273
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11274

    
11275
      if not self.op.force:
11276
        raise errors.OpExecError("The following instances get split by this"
11277
                                 " change and --force was not given: %s" %
11278
                                 fmt_new_splits)
11279
      else:
11280
        self.LogWarning("This operation will split the following instances: %s",
11281
                        fmt_new_splits)
11282

    
11283
        if previous_splits:
11284
          self.LogWarning("In addition, these already-split instances continue"
11285
                          " to be split across groups: %s",
11286
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
11287

    
11288
  def Exec(self, feedback_fn):
11289
    """Assign nodes to a new group.
11290

11291
    """
11292
    for node in self.op.nodes:
11293
      self.node_data[node].group = self.group_uuid
11294

    
11295
    # FIXME: Depends on side-effects of modifying the result of
11296
    # C{cfg.GetAllNodesInfo}
11297

    
11298
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11299

    
11300
  @staticmethod
11301
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11302
    """Check for split instances after a node assignment.
11303

11304
    This method considers a series of node assignments as an atomic operation,
11305
    and returns information about split instances after applying the set of
11306
    changes.
11307

11308
    In particular, it returns information about newly split instances, and
11309
    instances that were already split, and remain so after the change.
11310

11311
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11312
    considered.
11313

11314
    @type changes: list of (node_name, new_group_uuid) pairs.
11315
    @param changes: list of node assignments to consider.
11316
    @param node_data: a dict with data for all nodes
11317
    @param instance_data: a dict with all instances to consider
11318
    @rtype: a two-tuple
11319
    @return: a list of instances that were previously okay and result split as a
11320
      consequence of this change, and a list of instances that were previously
11321
      split and this change does not fix.
11322

11323
    """
11324
    changed_nodes = dict((node, group) for node, group in changes
11325
                         if node_data[node].group != group)
11326

    
11327
    all_split_instances = set()
11328
    previously_split_instances = set()
11329

    
11330
    def InstanceNodes(instance):
11331
      return [instance.primary_node] + list(instance.secondary_nodes)
11332

    
11333
    for inst in instance_data.values():
11334
      if inst.disk_template not in constants.DTS_INT_MIRROR:
11335
        continue
11336

    
11337
      instance_nodes = InstanceNodes(inst)
11338

    
11339
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
11340
        previously_split_instances.add(inst.name)
11341

    
11342
      if len(set(changed_nodes.get(node, node_data[node].group)
11343
                 for node in instance_nodes)) > 1:
11344
        all_split_instances.add(inst.name)
11345

    
11346
    return (list(all_split_instances - previously_split_instances),
11347
            list(previously_split_instances & all_split_instances))
11348

    
11349

    
11350
class _GroupQuery(_QueryBase):
11351
  FIELDS = query.GROUP_FIELDS
11352

    
11353
  def ExpandNames(self, lu):
11354
    lu.needed_locks = {}
11355

    
11356
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11357
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11358

    
11359
    if not self.names:
11360
      self.wanted = [name_to_uuid[name]
11361
                     for name in utils.NiceSort(name_to_uuid.keys())]
11362
    else:
11363
      # Accept names to be either names or UUIDs.
11364
      missing = []
11365
      self.wanted = []
11366
      all_uuid = frozenset(self._all_groups.keys())
11367

    
11368
      for name in self.names:
11369
        if name in all_uuid:
11370
          self.wanted.append(name)
11371
        elif name in name_to_uuid:
11372
          self.wanted.append(name_to_uuid[name])
11373
        else:
11374
          missing.append(name)
11375

    
11376
      if missing:
11377
        raise errors.OpPrereqError("Some groups do not exist: %s" %
11378
                                   utils.CommaJoin(missing),
11379
                                   errors.ECODE_NOENT)
11380

    
11381
  def DeclareLocks(self, lu, level):
11382
    pass
11383

    
11384
  def _GetQueryData(self, lu):
11385
    """Computes the list of node groups and their attributes.
11386

11387
    """
11388
    do_nodes = query.GQ_NODE in self.requested_data
11389
    do_instances = query.GQ_INST in self.requested_data
11390

    
11391
    group_to_nodes = None
11392
    group_to_instances = None
11393

    
11394
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11395
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11396
    # latter GetAllInstancesInfo() is not enough, for we have to go through
11397
    # instance->node. Hence, we will need to process nodes even if we only need
11398
    # instance information.
11399
    if do_nodes or do_instances:
11400
      all_nodes = lu.cfg.GetAllNodesInfo()
11401
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11402
      node_to_group = {}
11403

    
11404
      for node in all_nodes.values():
11405
        if node.group in group_to_nodes:
11406
          group_to_nodes[node.group].append(node.name)
11407
          node_to_group[node.name] = node.group
11408

    
11409
      if do_instances:
11410
        all_instances = lu.cfg.GetAllInstancesInfo()
11411
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
11412

    
11413
        for instance in all_instances.values():
11414
          node = instance.primary_node
11415
          if node in node_to_group:
11416
            group_to_instances[node_to_group[node]].append(instance.name)
11417

    
11418
        if not do_nodes:
11419
          # Do not pass on node information if it was not requested.
11420
          group_to_nodes = None
11421

    
11422
    return query.GroupQueryData([self._all_groups[uuid]
11423
                                 for uuid in self.wanted],
11424
                                group_to_nodes, group_to_instances)
11425

    
11426

    
11427
class LUGroupQuery(NoHooksLU):
11428
  """Logical unit for querying node groups.
11429

11430
  """
11431
  REQ_BGL = False
11432

    
11433
  def CheckArguments(self):
11434
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11435
                          self.op.output_fields, False)
11436

    
11437
  def ExpandNames(self):
11438
    self.gq.ExpandNames(self)
11439

    
11440
  def Exec(self, feedback_fn):
11441
    return self.gq.OldStyleQuery(self)
11442

    
11443

    
11444
class LUGroupSetParams(LogicalUnit):
11445
  """Modifies the parameters of a node group.
11446

11447
  """
11448
  HPATH = "group-modify"
11449
  HTYPE = constants.HTYPE_GROUP
11450
  REQ_BGL = False
11451

    
11452
  def CheckArguments(self):
11453
    all_changes = [
11454
      self.op.ndparams,
11455
      self.op.alloc_policy,
11456
      ]
11457

    
11458
    if all_changes.count(None) == len(all_changes):
11459
      raise errors.OpPrereqError("Please pass at least one modification",
11460
                                 errors.ECODE_INVAL)
11461

    
11462
  def ExpandNames(self):
11463
    # This raises errors.OpPrereqError on its own:
11464
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11465

    
11466
    self.needed_locks = {
11467
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11468
      }
11469

    
11470
  def CheckPrereq(self):
11471
    """Check prerequisites.
11472

11473
    """
11474
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11475

    
11476
    if self.group is None:
11477
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11478
                               (self.op.group_name, self.group_uuid))
11479

    
11480
    if self.op.ndparams:
11481
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11482
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11483
      self.new_ndparams = new_ndparams
11484

    
11485
  def BuildHooksEnv(self):
11486
    """Build hooks env.
11487

11488
    """
11489
    return {
11490
      "GROUP_NAME": self.op.group_name,
11491
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
11492
      }
11493

    
11494
  def BuildHooksNodes(self):
11495
    """Build hooks nodes.
11496

11497
    """
11498
    mn = self.cfg.GetMasterNode()
11499
    return ([mn], [mn])
11500

    
11501
  def Exec(self, feedback_fn):
11502
    """Modifies the node group.
11503

11504
    """
11505
    result = []
11506

    
11507
    if self.op.ndparams:
11508
      self.group.ndparams = self.new_ndparams
11509
      result.append(("ndparams", str(self.group.ndparams)))
11510

    
11511
    if self.op.alloc_policy:
11512
      self.group.alloc_policy = self.op.alloc_policy
11513

    
11514
    self.cfg.Update(self.group, feedback_fn)
11515
    return result
11516

    
11517

    
11518

    
11519
class LUGroupRemove(LogicalUnit):
11520
  HPATH = "group-remove"
11521
  HTYPE = constants.HTYPE_GROUP
11522
  REQ_BGL = False
11523

    
11524
  def ExpandNames(self):
11525
    # This will raises errors.OpPrereqError on its own:
11526
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11527
    self.needed_locks = {
11528
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11529
      }
11530

    
11531
  def CheckPrereq(self):
11532
    """Check prerequisites.
11533

11534
    This checks that the given group name exists as a node group, that is
11535
    empty (i.e., contains no nodes), and that is not the last group of the
11536
    cluster.
11537

11538
    """
11539
    # Verify that the group is empty.
11540
    group_nodes = [node.name
11541
                   for node in self.cfg.GetAllNodesInfo().values()
11542
                   if node.group == self.group_uuid]
11543

    
11544
    if group_nodes:
11545
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
11546
                                 " nodes: %s" %
11547
                                 (self.op.group_name,
11548
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
11549
                                 errors.ECODE_STATE)
11550

    
11551
    # Verify the cluster would not be left group-less.
11552
    if len(self.cfg.GetNodeGroupList()) == 1:
11553
      raise errors.OpPrereqError("Group '%s' is the only group,"
11554
                                 " cannot be removed" %
11555
                                 self.op.group_name,
11556
                                 errors.ECODE_STATE)
11557

    
11558
  def BuildHooksEnv(self):
11559
    """Build hooks env.
11560

11561
    """
11562
    return {
11563
      "GROUP_NAME": self.op.group_name,
11564
      }
11565

    
11566
  def BuildHooksNodes(self):
11567
    """Build hooks nodes.
11568

11569
    """
11570
    mn = self.cfg.GetMasterNode()
11571
    return ([mn], [mn])
11572

    
11573
  def Exec(self, feedback_fn):
11574
    """Remove the node group.
11575

11576
    """
11577
    try:
11578
      self.cfg.RemoveNodeGroup(self.group_uuid)
11579
    except errors.ConfigurationError:
11580
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11581
                               (self.op.group_name, self.group_uuid))
11582

    
11583
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11584

    
11585

    
11586
class LUGroupRename(LogicalUnit):
11587
  HPATH = "group-rename"
11588
  HTYPE = constants.HTYPE_GROUP
11589
  REQ_BGL = False
11590

    
11591
  def ExpandNames(self):
11592
    # This raises errors.OpPrereqError on its own:
11593
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11594

    
11595
    self.needed_locks = {
11596
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11597
      }
11598

    
11599
  def CheckPrereq(self):
11600
    """Check prerequisites.
11601

11602
    Ensures requested new name is not yet used.
11603

11604
    """
11605
    try:
11606
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11607
    except errors.OpPrereqError:
11608
      pass
11609
    else:
11610
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11611
                                 " node group (UUID: %s)" %
11612
                                 (self.op.new_name, new_name_uuid),
11613
                                 errors.ECODE_EXISTS)
11614

    
11615
  def BuildHooksEnv(self):
11616
    """Build hooks env.
11617

11618
    """
11619
    return {
11620
      "OLD_NAME": self.op.group_name,
11621
      "NEW_NAME": self.op.new_name,
11622
      }
11623

    
11624
  def BuildHooksNodes(self):
11625
    """Build hooks nodes.
11626

11627
    """
11628
    mn = self.cfg.GetMasterNode()
11629

    
11630
    all_nodes = self.cfg.GetAllNodesInfo()
11631
    all_nodes.pop(mn, None)
11632

    
11633
    run_nodes = [mn]
11634
    run_nodes.extend(node.name for node in all_nodes.values()
11635
                     if node.group == self.group_uuid)
11636

    
11637
    return (run_nodes, run_nodes)
11638

    
11639
  def Exec(self, feedback_fn):
11640
    """Rename the node group.
11641

11642
    """
11643
    group = self.cfg.GetNodeGroup(self.group_uuid)
11644

    
11645
    if group is None:
11646
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11647
                               (self.op.group_name, self.group_uuid))
11648

    
11649
    group.name = self.op.new_name
11650
    self.cfg.Update(group, feedback_fn)
11651

    
11652
    return self.op.new_name
11653

    
11654

    
11655
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11656
  """Generic tags LU.
11657

11658
  This is an abstract class which is the parent of all the other tags LUs.
11659

11660
  """
11661
  def ExpandNames(self):
11662
    self.group_uuid = None
11663
    self.needed_locks = {}
11664
    if self.op.kind == constants.TAG_NODE:
11665
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11666
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
11667
    elif self.op.kind == constants.TAG_INSTANCE:
11668
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11669
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11670
    elif self.op.kind == constants.TAG_NODEGROUP:
11671
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11672

    
11673
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11674
    # not possible to acquire the BGL based on opcode parameters)
11675

    
11676
  def CheckPrereq(self):
11677
    """Check prerequisites.
11678

11679
    """
11680
    if self.op.kind == constants.TAG_CLUSTER:
11681
      self.target = self.cfg.GetClusterInfo()
11682
    elif self.op.kind == constants.TAG_NODE:
11683
      self.target = self.cfg.GetNodeInfo(self.op.name)
11684
    elif self.op.kind == constants.TAG_INSTANCE:
11685
      self.target = self.cfg.GetInstanceInfo(self.op.name)
11686
    elif self.op.kind == constants.TAG_NODEGROUP:
11687
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
11688
    else:
11689
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11690
                                 str(self.op.kind), errors.ECODE_INVAL)
11691

    
11692

    
11693
class LUTagsGet(TagsLU):
11694
  """Returns the tags of a given object.
11695

11696
  """
11697
  REQ_BGL = False
11698

    
11699
  def ExpandNames(self):
11700
    TagsLU.ExpandNames(self)
11701

    
11702
    # Share locks as this is only a read operation
11703
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11704

    
11705
  def Exec(self, feedback_fn):
11706
    """Returns the tag list.
11707

11708
    """
11709
    return list(self.target.GetTags())
11710

    
11711

    
11712
class LUTagsSearch(NoHooksLU):
11713
  """Searches the tags for a given pattern.
11714

11715
  """
11716
  REQ_BGL = False
11717

    
11718
  def ExpandNames(self):
11719
    self.needed_locks = {}
11720

    
11721
  def CheckPrereq(self):
11722
    """Check prerequisites.
11723

11724
    This checks the pattern passed for validity by compiling it.
11725

11726
    """
11727
    try:
11728
      self.re = re.compile(self.op.pattern)
11729
    except re.error, err:
11730
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11731
                                 (self.op.pattern, err), errors.ECODE_INVAL)
11732

    
11733
  def Exec(self, feedback_fn):
11734
    """Returns the tag list.
11735

11736
    """
11737
    cfg = self.cfg
11738
    tgts = [("/cluster", cfg.GetClusterInfo())]
11739
    ilist = cfg.GetAllInstancesInfo().values()
11740
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11741
    nlist = cfg.GetAllNodesInfo().values()
11742
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11743
    tgts.extend(("/nodegroup/%s" % n.name, n)
11744
                for n in cfg.GetAllNodeGroupsInfo().values())
11745
    results = []
11746
    for path, target in tgts:
11747
      for tag in target.GetTags():
11748
        if self.re.search(tag):
11749
          results.append((path, tag))
11750
    return results
11751

    
11752

    
11753
class LUTagsSet(TagsLU):
11754
  """Sets a tag on a given object.
11755

11756
  """
11757
  REQ_BGL = False
11758

    
11759
  def CheckPrereq(self):
11760
    """Check prerequisites.
11761

11762
    This checks the type and length of the tag name and value.
11763

11764
    """
11765
    TagsLU.CheckPrereq(self)
11766
    for tag in self.op.tags:
11767
      objects.TaggableObject.ValidateTag(tag)
11768

    
11769
  def Exec(self, feedback_fn):
11770
    """Sets the tag.
11771

11772
    """
11773
    try:
11774
      for tag in self.op.tags:
11775
        self.target.AddTag(tag)
11776
    except errors.TagError, err:
11777
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
11778
    self.cfg.Update(self.target, feedback_fn)
11779

    
11780

    
11781
class LUTagsDel(TagsLU):
11782
  """Delete a list of tags from a given object.
11783

11784
  """
11785
  REQ_BGL = False
11786

    
11787
  def CheckPrereq(self):
11788
    """Check prerequisites.
11789

11790
    This checks that we have the given tag.
11791

11792
    """
11793
    TagsLU.CheckPrereq(self)
11794
    for tag in self.op.tags:
11795
      objects.TaggableObject.ValidateTag(tag)
11796
    del_tags = frozenset(self.op.tags)
11797
    cur_tags = self.target.GetTags()
11798

    
11799
    diff_tags = del_tags - cur_tags
11800
    if diff_tags:
11801
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
11802
      raise errors.OpPrereqError("Tag(s) %s not found" %
11803
                                 (utils.CommaJoin(diff_names), ),
11804
                                 errors.ECODE_NOENT)
11805

    
11806
  def Exec(self, feedback_fn):
11807
    """Remove the tag from the object.
11808

11809
    """
11810
    for tag in self.op.tags:
11811
      self.target.RemoveTag(tag)
11812
    self.cfg.Update(self.target, feedback_fn)
11813

    
11814

    
11815
class LUTestDelay(NoHooksLU):
11816
  """Sleep for a specified amount of time.
11817

11818
  This LU sleeps on the master and/or nodes for a specified amount of
11819
  time.
11820

11821
  """
11822
  REQ_BGL = False
11823

    
11824
  def ExpandNames(self):
11825
    """Expand names and set required locks.
11826

11827
    This expands the node list, if any.
11828

11829
    """
11830
    self.needed_locks = {}
11831
    if self.op.on_nodes:
11832
      # _GetWantedNodes can be used here, but is not always appropriate to use
11833
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11834
      # more information.
11835
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11836
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11837

    
11838
  def _TestDelay(self):
11839
    """Do the actual sleep.
11840

11841
    """
11842
    if self.op.on_master:
11843
      if not utils.TestDelay(self.op.duration):
11844
        raise errors.OpExecError("Error during master delay test")
11845
    if self.op.on_nodes:
11846
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11847
      for node, node_result in result.items():
11848
        node_result.Raise("Failure during rpc call to node %s" % node)
11849

    
11850
  def Exec(self, feedback_fn):
11851
    """Execute the test delay opcode, with the wanted repetitions.
11852

11853
    """
11854
    if self.op.repeat == 0:
11855
      self._TestDelay()
11856
    else:
11857
      top_value = self.op.repeat - 1
11858
      for i in range(self.op.repeat):
11859
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11860
        self._TestDelay()
11861

    
11862

    
11863
class LUTestJqueue(NoHooksLU):
11864
  """Utility LU to test some aspects of the job queue.
11865

11866
  """
11867
  REQ_BGL = False
11868

    
11869
  # Must be lower than default timeout for WaitForJobChange to see whether it
11870
  # notices changed jobs
11871
  _CLIENT_CONNECT_TIMEOUT = 20.0
11872
  _CLIENT_CONFIRM_TIMEOUT = 60.0
11873

    
11874
  @classmethod
11875
  def _NotifyUsingSocket(cls, cb, errcls):
11876
    """Opens a Unix socket and waits for another program to connect.
11877

11878
    @type cb: callable
11879
    @param cb: Callback to send socket name to client
11880
    @type errcls: class
11881
    @param errcls: Exception class to use for errors
11882

11883
    """
11884
    # Using a temporary directory as there's no easy way to create temporary
11885
    # sockets without writing a custom loop around tempfile.mktemp and
11886
    # socket.bind
11887
    tmpdir = tempfile.mkdtemp()
11888
    try:
11889
      tmpsock = utils.PathJoin(tmpdir, "sock")
11890

    
11891
      logging.debug("Creating temporary socket at %s", tmpsock)
11892
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11893
      try:
11894
        sock.bind(tmpsock)
11895
        sock.listen(1)
11896

    
11897
        # Send details to client
11898
        cb(tmpsock)
11899

    
11900
        # Wait for client to connect before continuing
11901
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11902
        try:
11903
          (conn, _) = sock.accept()
11904
        except socket.error, err:
11905
          raise errcls("Client didn't connect in time (%s)" % err)
11906
      finally:
11907
        sock.close()
11908
    finally:
11909
      # Remove as soon as client is connected
11910
      shutil.rmtree(tmpdir)
11911

    
11912
    # Wait for client to close
11913
    try:
11914
      try:
11915
        # pylint: disable-msg=E1101
11916
        # Instance of '_socketobject' has no ... member
11917
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11918
        conn.recv(1)
11919
      except socket.error, err:
11920
        raise errcls("Client failed to confirm notification (%s)" % err)
11921
    finally:
11922
      conn.close()
11923

    
11924
  def _SendNotification(self, test, arg, sockname):
11925
    """Sends a notification to the client.
11926

11927
    @type test: string
11928
    @param test: Test name
11929
    @param arg: Test argument (depends on test)
11930
    @type sockname: string
11931
    @param sockname: Socket path
11932

11933
    """
11934
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11935

    
11936
  def _Notify(self, prereq, test, arg):
11937
    """Notifies the client of a test.
11938

11939
    @type prereq: bool
11940
    @param prereq: Whether this is a prereq-phase test
11941
    @type test: string
11942
    @param test: Test name
11943
    @param arg: Test argument (depends on test)
11944

11945
    """
11946
    if prereq:
11947
      errcls = errors.OpPrereqError
11948
    else:
11949
      errcls = errors.OpExecError
11950

    
11951
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11952
                                                  test, arg),
11953
                                   errcls)
11954

    
11955
  def CheckArguments(self):
11956
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11957
    self.expandnames_calls = 0
11958

    
11959
  def ExpandNames(self):
11960
    checkargs_calls = getattr(self, "checkargs_calls", 0)
11961
    if checkargs_calls < 1:
11962
      raise errors.ProgrammerError("CheckArguments was not called")
11963

    
11964
    self.expandnames_calls += 1
11965

    
11966
    if self.op.notify_waitlock:
11967
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
11968

    
11969
    self.LogInfo("Expanding names")
11970

    
11971
    # Get lock on master node (just to get a lock, not for a particular reason)
11972
    self.needed_locks = {
11973
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11974
      }
11975

    
11976
  def Exec(self, feedback_fn):
11977
    if self.expandnames_calls < 1:
11978
      raise errors.ProgrammerError("ExpandNames was not called")
11979

    
11980
    if self.op.notify_exec:
11981
      self._Notify(False, constants.JQT_EXEC, None)
11982

    
11983
    self.LogInfo("Executing")
11984

    
11985
    if self.op.log_messages:
11986
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11987
      for idx, msg in enumerate(self.op.log_messages):
11988
        self.LogInfo("Sending log message %s", idx + 1)
11989
        feedback_fn(constants.JQT_MSGPREFIX + msg)
11990
        # Report how many test messages have been sent
11991
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11992

    
11993
    if self.op.fail:
11994
      raise errors.OpExecError("Opcode failure was requested")
11995

    
11996
    return True
11997

    
11998

    
11999
class IAllocator(object):
12000
  """IAllocator framework.
12001

12002
  An IAllocator instance has three sets of attributes:
12003
    - cfg that is needed to query the cluster
12004
    - input data (all members of the _KEYS class attribute are required)
12005
    - four buffer attributes (in|out_data|text), that represent the
12006
      input (to the external script) in text and data structure format,
12007
      and the output from it, again in two formats
12008
    - the result variables from the script (success, info, nodes) for
12009
      easy usage
12010

12011
  """
12012
  # pylint: disable-msg=R0902
12013
  # lots of instance attributes
12014

    
12015
  def __init__(self, cfg, rpc, mode, **kwargs):
12016
    self.cfg = cfg
12017
    self.rpc = rpc
12018
    # init buffer variables
12019
    self.in_text = self.out_text = self.in_data = self.out_data = None
12020
    # init all input fields so that pylint is happy
12021
    self.mode = mode
12022
    self.memory = self.disks = self.disk_template = None
12023
    self.os = self.tags = self.nics = self.vcpus = None
12024
    self.hypervisor = None
12025
    self.relocate_from = None
12026
    self.name = None
12027
    self.evac_nodes = None
12028
    self.instances = None
12029
    self.reloc_mode = None
12030
    self.target_groups = []
12031
    # computed fields
12032
    self.required_nodes = None
12033
    # init result fields
12034
    self.success = self.info = self.result = None
12035

    
12036
    try:
12037
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12038
    except KeyError:
12039
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12040
                                   " IAllocator" % self.mode)
12041

    
12042
    keyset = [n for (n, _) in keydata]
12043

    
12044
    for key in kwargs:
12045
      if key not in keyset:
12046
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12047
                                     " IAllocator" % key)
12048
      setattr(self, key, kwargs[key])
12049

    
12050
    for key in keyset:
12051
      if key not in kwargs:
12052
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12053
                                     " IAllocator" % key)
12054
    self._BuildInputData(compat.partial(fn, self), keydata)
12055

    
12056
  def _ComputeClusterData(self):
12057
    """Compute the generic allocator input data.
12058

12059
    This is the data that is independent of the actual operation.
12060

12061
    """
12062
    cfg = self.cfg
12063
    cluster_info = cfg.GetClusterInfo()
12064
    # cluster data
12065
    data = {
12066
      "version": constants.IALLOCATOR_VERSION,
12067
      "cluster_name": cfg.GetClusterName(),
12068
      "cluster_tags": list(cluster_info.GetTags()),
12069
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12070
      # we don't have job IDs
12071
      }
12072
    ninfo = cfg.GetAllNodesInfo()
12073
    iinfo = cfg.GetAllInstancesInfo().values()
12074
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12075

    
12076
    # node data
12077
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
12078

    
12079
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12080
      hypervisor_name = self.hypervisor
12081
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12082
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12083
    elif self.mode in (constants.IALLOCATOR_MODE_MEVAC,
12084
                       constants.IALLOCATOR_MODE_MRELOC):
12085
      hypervisor_name = cluster_info.enabled_hypervisors[0]
12086

    
12087
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12088
                                        hypervisor_name)
12089
    node_iinfo = \
12090
      self.rpc.call_all_instances_info(node_list,
12091
                                       cluster_info.enabled_hypervisors)
12092

    
12093
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12094

    
12095
    config_ndata = self._ComputeBasicNodeData(ninfo)
12096
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12097
                                                 i_list, config_ndata)
12098
    assert len(data["nodes"]) == len(ninfo), \
12099
        "Incomplete node data computed"
12100

    
12101
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12102

    
12103
    self.in_data = data
12104

    
12105
  @staticmethod
12106
  def _ComputeNodeGroupData(cfg):
12107
    """Compute node groups data.
12108

12109
    """
12110
    ng = dict((guuid, {
12111
      "name": gdata.name,
12112
      "alloc_policy": gdata.alloc_policy,
12113
      })
12114
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12115

    
12116
    return ng
12117

    
12118
  @staticmethod
12119
  def _ComputeBasicNodeData(node_cfg):
12120
    """Compute global node data.
12121

12122
    @rtype: dict
12123
    @returns: a dict of name: (node dict, node config)
12124

12125
    """
12126
    # fill in static (config-based) values
12127
    node_results = dict((ninfo.name, {
12128
      "tags": list(ninfo.GetTags()),
12129
      "primary_ip": ninfo.primary_ip,
12130
      "secondary_ip": ninfo.secondary_ip,
12131
      "offline": ninfo.offline,
12132
      "drained": ninfo.drained,
12133
      "master_candidate": ninfo.master_candidate,
12134
      "group": ninfo.group,
12135
      "master_capable": ninfo.master_capable,
12136
      "vm_capable": ninfo.vm_capable,
12137
      })
12138
      for ninfo in node_cfg.values())
12139

    
12140
    return node_results
12141

    
12142
  @staticmethod
12143
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12144
                              node_results):
12145
    """Compute global node data.
12146

12147
    @param node_results: the basic node structures as filled from the config
12148

12149
    """
12150
    # make a copy of the current dict
12151
    node_results = dict(node_results)
12152
    for nname, nresult in node_data.items():
12153
      assert nname in node_results, "Missing basic data for node %s" % nname
12154
      ninfo = node_cfg[nname]
12155

    
12156
      if not (ninfo.offline or ninfo.drained):
12157
        nresult.Raise("Can't get data for node %s" % nname)
12158
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12159
                                nname)
12160
        remote_info = nresult.payload
12161

    
12162
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
12163
                     'vg_size', 'vg_free', 'cpu_total']:
12164
          if attr not in remote_info:
12165
            raise errors.OpExecError("Node '%s' didn't return attribute"
12166
                                     " '%s'" % (nname, attr))
12167
          if not isinstance(remote_info[attr], int):
12168
            raise errors.OpExecError("Node '%s' returned invalid value"
12169
                                     " for '%s': %s" %
12170
                                     (nname, attr, remote_info[attr]))
12171
        # compute memory used by primary instances
12172
        i_p_mem = i_p_up_mem = 0
12173
        for iinfo, beinfo in i_list:
12174
          if iinfo.primary_node == nname:
12175
            i_p_mem += beinfo[constants.BE_MEMORY]
12176
            if iinfo.name not in node_iinfo[nname].payload:
12177
              i_used_mem = 0
12178
            else:
12179
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
12180
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12181
            remote_info['memory_free'] -= max(0, i_mem_diff)
12182

    
12183
            if iinfo.admin_up:
12184
              i_p_up_mem += beinfo[constants.BE_MEMORY]
12185

    
12186
        # compute memory used by instances
12187
        pnr_dyn = {
12188
          "total_memory": remote_info['memory_total'],
12189
          "reserved_memory": remote_info['memory_dom0'],
12190
          "free_memory": remote_info['memory_free'],
12191
          "total_disk": remote_info['vg_size'],
12192
          "free_disk": remote_info['vg_free'],
12193
          "total_cpus": remote_info['cpu_total'],
12194
          "i_pri_memory": i_p_mem,
12195
          "i_pri_up_memory": i_p_up_mem,
12196
          }
12197
        pnr_dyn.update(node_results[nname])
12198
        node_results[nname] = pnr_dyn
12199

    
12200
    return node_results
12201

    
12202
  @staticmethod
12203
  def _ComputeInstanceData(cluster_info, i_list):
12204
    """Compute global instance data.
12205

12206
    """
12207
    instance_data = {}
12208
    for iinfo, beinfo in i_list:
12209
      nic_data = []
12210
      for nic in iinfo.nics:
12211
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12212
        nic_dict = {
12213
          "mac": nic.mac,
12214
          "ip": nic.ip,
12215
          "mode": filled_params[constants.NIC_MODE],
12216
          "link": filled_params[constants.NIC_LINK],
12217
          }
12218
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12219
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12220
        nic_data.append(nic_dict)
12221
      pir = {
12222
        "tags": list(iinfo.GetTags()),
12223
        "admin_up": iinfo.admin_up,
12224
        "vcpus": beinfo[constants.BE_VCPUS],
12225
        "memory": beinfo[constants.BE_MEMORY],
12226
        "os": iinfo.os,
12227
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12228
        "nics": nic_data,
12229
        "disks": [{constants.IDISK_SIZE: dsk.size,
12230
                   constants.IDISK_MODE: dsk.mode}
12231
                  for dsk in iinfo.disks],
12232
        "disk_template": iinfo.disk_template,
12233
        "hypervisor": iinfo.hypervisor,
12234
        }
12235
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12236
                                                 pir["disks"])
12237
      instance_data[iinfo.name] = pir
12238

    
12239
    return instance_data
12240

    
12241
  def _AddNewInstance(self):
12242
    """Add new instance data to allocator structure.
12243

12244
    This in combination with _AllocatorGetClusterData will create the
12245
    correct structure needed as input for the allocator.
12246

12247
    The checks for the completeness of the opcode must have already been
12248
    done.
12249

12250
    """
12251
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12252

    
12253
    if self.disk_template in constants.DTS_INT_MIRROR:
12254
      self.required_nodes = 2
12255
    else:
12256
      self.required_nodes = 1
12257

    
12258
    request = {
12259
      "name": self.name,
12260
      "disk_template": self.disk_template,
12261
      "tags": self.tags,
12262
      "os": self.os,
12263
      "vcpus": self.vcpus,
12264
      "memory": self.memory,
12265
      "disks": self.disks,
12266
      "disk_space_total": disk_space,
12267
      "nics": self.nics,
12268
      "required_nodes": self.required_nodes,
12269
      "hypervisor": self.hypervisor,
12270
      }
12271

    
12272
    return request
12273

    
12274
  def _AddRelocateInstance(self):
12275
    """Add relocate instance data to allocator structure.
12276

12277
    This in combination with _IAllocatorGetClusterData will create the
12278
    correct structure needed as input for the allocator.
12279

12280
    The checks for the completeness of the opcode must have already been
12281
    done.
12282

12283
    """
12284
    instance = self.cfg.GetInstanceInfo(self.name)
12285
    if instance is None:
12286
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
12287
                                   " IAllocator" % self.name)
12288

    
12289
    if instance.disk_template not in constants.DTS_MIRRORED:
12290
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12291
                                 errors.ECODE_INVAL)
12292

    
12293
    if instance.disk_template in constants.DTS_INT_MIRROR and \
12294
        len(instance.secondary_nodes) != 1:
12295
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
12296
                                 errors.ECODE_STATE)
12297

    
12298
    self.required_nodes = 1
12299
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12300
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12301

    
12302
    request = {
12303
      "name": self.name,
12304
      "disk_space_total": disk_space,
12305
      "required_nodes": self.required_nodes,
12306
      "relocate_from": self.relocate_from,
12307
      }
12308
    return request
12309

    
12310
  def _AddEvacuateNodes(self):
12311
    """Add evacuate nodes data to allocator structure.
12312

12313
    """
12314
    request = {
12315
      "evac_nodes": self.evac_nodes
12316
      }
12317
    return request
12318

    
12319
  def _AddMultiRelocate(self):
12320
    """Get data for multi-relocate requests.
12321

12322
    """
12323
    return {
12324
      "instances": self.instances,
12325
      "reloc_mode": self.reloc_mode,
12326
      "target_groups": self.target_groups,
12327
      }
12328

    
12329
  def _BuildInputData(self, fn, keydata):
12330
    """Build input data structures.
12331

12332
    """
12333
    self._ComputeClusterData()
12334

    
12335
    request = fn()
12336
    request["type"] = self.mode
12337
    for keyname, keytype in keydata:
12338
      if keyname not in request:
12339
        raise errors.ProgrammerError("Request parameter %s is missing" %
12340
                                     keyname)
12341
      val = request[keyname]
12342
      if not keytype(val):
12343
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
12344
                                     " validation, value %s, expected"
12345
                                     " type %s" % (keyname, val, keytype))
12346
    self.in_data["request"] = request
12347

    
12348
    self.in_text = serializer.Dump(self.in_data)
12349

    
12350
  _STRING_LIST = ht.TListOf(ht.TString)
12351
  _MODE_DATA = {
12352
    constants.IALLOCATOR_MODE_ALLOC:
12353
      (_AddNewInstance,
12354
       [
12355
        ("name", ht.TString),
12356
        ("memory", ht.TInt),
12357
        ("disks", ht.TListOf(ht.TDict)),
12358
        ("disk_template", ht.TString),
12359
        ("os", ht.TString),
12360
        ("tags", _STRING_LIST),
12361
        ("nics", ht.TListOf(ht.TDict)),
12362
        ("vcpus", ht.TInt),
12363
        ("hypervisor", ht.TString),
12364
        ], ht.TList),
12365
    constants.IALLOCATOR_MODE_RELOC:
12366
      (_AddRelocateInstance,
12367
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
12368
       ht.TList),
12369
    constants.IALLOCATOR_MODE_MEVAC:
12370
      (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
12371
       ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
12372
    constants.IALLOCATOR_MODE_MRELOC:
12373
      (_AddMultiRelocate, [
12374
        ("instances", _STRING_LIST),
12375
        ("reloc_mode", ht.TElemOf(constants.IALLOCATOR_MRELOC_MODES)),
12376
        ("target_groups", _STRING_LIST),
12377
        ],
12378
       ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12379
         # pylint: disable-msg=E1101
12380
         # Class '...' has no 'OP_ID' member
12381
         "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12382
                              opcodes.OpInstanceMigrate.OP_ID,
12383
                              opcodes.OpInstanceReplaceDisks.OP_ID])
12384
         })))),
12385
    }
12386

    
12387
  def Run(self, name, validate=True, call_fn=None):
12388
    """Run an instance allocator and return the results.
12389

12390
    """
12391
    if call_fn is None:
12392
      call_fn = self.rpc.call_iallocator_runner
12393

    
12394
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
12395
    result.Raise("Failure while running the iallocator script")
12396

    
12397
    self.out_text = result.payload
12398
    if validate:
12399
      self._ValidateResult()
12400

    
12401
  def _ValidateResult(self):
12402
    """Process the allocator results.
12403

12404
    This will process and if successful save the result in
12405
    self.out_data and the other parameters.
12406

12407
    """
12408
    try:
12409
      rdict = serializer.Load(self.out_text)
12410
    except Exception, err:
12411
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
12412

    
12413
    if not isinstance(rdict, dict):
12414
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
12415

    
12416
    # TODO: remove backwards compatiblity in later versions
12417
    if "nodes" in rdict and "result" not in rdict:
12418
      rdict["result"] = rdict["nodes"]
12419
      del rdict["nodes"]
12420

    
12421
    for key in "success", "info", "result":
12422
      if key not in rdict:
12423
        raise errors.OpExecError("Can't parse iallocator results:"
12424
                                 " missing key '%s'" % key)
12425
      setattr(self, key, rdict[key])
12426

    
12427
    if not self._result_check(self.result):
12428
      raise errors.OpExecError("Iallocator returned invalid result,"
12429
                               " expected %s, got %s" %
12430
                               (self._result_check, self.result),
12431
                               errors.ECODE_INVAL)
12432

    
12433
    if self.mode in (constants.IALLOCATOR_MODE_RELOC,
12434
                     constants.IALLOCATOR_MODE_MEVAC):
12435
      node2group = dict((name, ndata["group"])
12436
                        for (name, ndata) in self.in_data["nodes"].items())
12437

    
12438
      fn = compat.partial(self._NodesToGroups, node2group,
12439
                          self.in_data["nodegroups"])
12440

    
12441
      if self.mode == constants.IALLOCATOR_MODE_RELOC:
12442
        assert self.relocate_from is not None
12443
        assert self.required_nodes == 1
12444

    
12445
        request_groups = fn(self.relocate_from)
12446
        result_groups = fn(rdict["result"])
12447

    
12448
        if result_groups != request_groups:
12449
          raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12450
                                   " differ from original groups (%s)" %
12451
                                   (utils.CommaJoin(result_groups),
12452
                                    utils.CommaJoin(request_groups)))
12453
      elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
12454
        request_groups = fn(self.evac_nodes)
12455
        for (instance_name, secnode) in self.result:
12456
          result_groups = fn([secnode])
12457
          if result_groups != request_groups:
12458
            raise errors.OpExecError("Iallocator returned new secondary node"
12459
                                     " '%s' (group '%s') for instance '%s'"
12460
                                     " which is not in original group '%s'" %
12461
                                     (secnode, utils.CommaJoin(result_groups),
12462
                                      instance_name,
12463
                                      utils.CommaJoin(request_groups)))
12464
      else:
12465
        raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
12466

    
12467
    self.out_data = rdict
12468

    
12469
  @staticmethod
12470
  def _NodesToGroups(node2group, groups, nodes):
12471
    """Returns a list of unique group names for a list of nodes.
12472

12473
    @type node2group: dict
12474
    @param node2group: Map from node name to group UUID
12475
    @type groups: dict
12476
    @param groups: Group information
12477
    @type nodes: list
12478
    @param nodes: Node names
12479

12480
    """
12481
    result = set()
12482

    
12483
    for node in nodes:
12484
      try:
12485
        group_uuid = node2group[node]
12486
      except KeyError:
12487
        # Ignore unknown node
12488
        pass
12489
      else:
12490
        try:
12491
          group = groups[group_uuid]
12492
        except KeyError:
12493
          # Can't find group, let's use UUID
12494
          group_name = group_uuid
12495
        else:
12496
          group_name = group["name"]
12497

    
12498
        result.add(group_name)
12499

    
12500
    return sorted(result)
12501

    
12502

    
12503
class LUTestAllocator(NoHooksLU):
12504
  """Run allocator tests.
12505

12506
  This LU runs the allocator tests
12507

12508
  """
12509
  def CheckPrereq(self):
12510
    """Check prerequisites.
12511

12512
    This checks the opcode parameters depending on the director and mode test.
12513

12514
    """
12515
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12516
      for attr in ["memory", "disks", "disk_template",
12517
                   "os", "tags", "nics", "vcpus"]:
12518
        if not hasattr(self.op, attr):
12519
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
12520
                                     attr, errors.ECODE_INVAL)
12521
      iname = self.cfg.ExpandInstanceName(self.op.name)
12522
      if iname is not None:
12523
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
12524
                                   iname, errors.ECODE_EXISTS)
12525
      if not isinstance(self.op.nics, list):
12526
        raise errors.OpPrereqError("Invalid parameter 'nics'",
12527
                                   errors.ECODE_INVAL)
12528
      if not isinstance(self.op.disks, list):
12529
        raise errors.OpPrereqError("Invalid parameter 'disks'",
12530
                                   errors.ECODE_INVAL)
12531
      for row in self.op.disks:
12532
        if (not isinstance(row, dict) or
12533
            constants.IDISK_SIZE not in row or
12534
            not isinstance(row[constants.IDISK_SIZE], int) or
12535
            constants.IDISK_MODE not in row or
12536
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
12537
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
12538
                                     " parameter", errors.ECODE_INVAL)
12539
      if self.op.hypervisor is None:
12540
        self.op.hypervisor = self.cfg.GetHypervisorType()
12541
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12542
      fname = _ExpandInstanceName(self.cfg, self.op.name)
12543
      self.op.name = fname
12544
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12545
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12546
      if not hasattr(self.op, "evac_nodes"):
12547
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12548
                                   " opcode input", errors.ECODE_INVAL)
12549
    elif self.op.mode == constants.IALLOCATOR_MODE_MRELOC:
12550
      if self.op.instances:
12551
        self.op.instances = _GetWantedInstances(self, self.op.instances)
12552
      else:
12553
        raise errors.OpPrereqError("Missing instances to relocate",
12554
                                   errors.ECODE_INVAL)
12555
    else:
12556
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12557
                                 self.op.mode, errors.ECODE_INVAL)
12558

    
12559
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12560
      if self.op.allocator is None:
12561
        raise errors.OpPrereqError("Missing allocator name",
12562
                                   errors.ECODE_INVAL)
12563
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12564
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
12565
                                 self.op.direction, errors.ECODE_INVAL)
12566

    
12567
  def Exec(self, feedback_fn):
12568
    """Run the allocator test.
12569

12570
    """
12571
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12572
      ial = IAllocator(self.cfg, self.rpc,
12573
                       mode=self.op.mode,
12574
                       name=self.op.name,
12575
                       memory=self.op.memory,
12576
                       disks=self.op.disks,
12577
                       disk_template=self.op.disk_template,
12578
                       os=self.op.os,
12579
                       tags=self.op.tags,
12580
                       nics=self.op.nics,
12581
                       vcpus=self.op.vcpus,
12582
                       hypervisor=self.op.hypervisor,
12583
                       )
12584
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12585
      ial = IAllocator(self.cfg, self.rpc,
12586
                       mode=self.op.mode,
12587
                       name=self.op.name,
12588
                       relocate_from=list(self.relocate_from),
12589
                       )
12590
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12591
      ial = IAllocator(self.cfg, self.rpc,
12592
                       mode=self.op.mode,
12593
                       evac_nodes=self.op.evac_nodes)
12594
    elif self.op.mode == constants.IALLOCATOR_MODE_MRELOC:
12595
      ial = IAllocator(self.cfg, self.rpc,
12596
                       mode=self.op.mode,
12597
                       instances=self.op.instances,
12598
                       reloc_mode=self.op.reloc_mode,
12599
                       target_groups=self.op.target_groups)
12600
    else:
12601
      raise errors.ProgrammerError("Uncatched mode %s in"
12602
                                   " LUTestAllocator.Exec", self.op.mode)
12603

    
12604
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
12605
      result = ial.in_text
12606
    else:
12607
      ial.Run(self.op.allocator, validate=False)
12608
      result = ial.out_text
12609
    return result
12610

    
12611

    
12612
#: Query type implementations
12613
_QUERY_IMPL = {
12614
  constants.QR_INSTANCE: _InstanceQuery,
12615
  constants.QR_NODE: _NodeQuery,
12616
  constants.QR_GROUP: _GroupQuery,
12617
  constants.QR_OS: _OsQuery,
12618
  }
12619

    
12620
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12621

    
12622

    
12623
def _GetQueryImplementation(name):
12624
  """Returns the implemtnation for a query type.
12625

12626
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
12627

12628
  """
12629
  try:
12630
    return _QUERY_IMPL[name]
12631
  except KeyError:
12632
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12633
                               errors.ECODE_INVAL)