Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 0f8810df

History | View | Annotate | Download (444.2 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import query
58
from ganeti import qlang
59
from ganeti import opcodes
60
from ganeti import ht
61

    
62
import ganeti.masterd.instance # pylint: disable-msg=W0611
63

    
64

    
65
def _SupportsOob(cfg, node):
66
  """Tells if node supports OOB.
67

68
  @type cfg: L{config.ConfigWriter}
69
  @param cfg: The cluster configuration
70
  @type node: L{objects.Node}
71
  @param node: The node
72
  @return: The OOB script if supported or an empty string otherwise
73

74
  """
75
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
76

    
77

    
78
class ResultWithJobs:
79
  """Data container for LU results with jobs.
80

81
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83
  contained in the C{jobs} attribute and include the job IDs in the opcode
84
  result.
85

86
  """
87
  def __init__(self, jobs, **kwargs):
88
    """Initializes this class.
89

90
    Additional return values can be specified as keyword arguments.
91

92
    @type jobs: list of lists of L{opcode.OpCode}
93
    @param jobs: A list of lists of opcode objects
94

95
    """
96
    self.jobs = jobs
97
    self.other = kwargs
98

    
99

    
100
class LogicalUnit(object):
101
  """Logical Unit base class.
102

103
  Subclasses must follow these rules:
104
    - implement ExpandNames
105
    - implement CheckPrereq (except when tasklets are used)
106
    - implement Exec (except when tasklets are used)
107
    - implement BuildHooksEnv
108
    - implement BuildHooksNodes
109
    - redefine HPATH and HTYPE
110
    - optionally redefine their run requirements:
111
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
112

113
  Note that all commands require root permissions.
114

115
  @ivar dry_run_result: the value (if any) that will be returned to the caller
116
      in dry-run mode (signalled by opcode dry_run parameter)
117

118
  """
119
  HPATH = None
120
  HTYPE = None
121
  REQ_BGL = True
122

    
123
  def __init__(self, processor, op, context, rpc):
124
    """Constructor for LogicalUnit.
125

126
    This needs to be overridden in derived classes in order to check op
127
    validity.
128

129
    """
130
    self.proc = processor
131
    self.op = op
132
    self.cfg = context.cfg
133
    self.glm = context.glm
134
    self.context = context
135
    self.rpc = rpc
136
    # Dicts used to declare locking needs to mcpu
137
    self.needed_locks = None
138
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
139
    self.add_locks = {}
140
    self.remove_locks = {}
141
    # Used to force good behavior when calling helper functions
142
    self.recalculate_locks = {}
143
    # logging
144
    self.Log = processor.Log # pylint: disable-msg=C0103
145
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
146
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
147
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
148
    # support for dry-run
149
    self.dry_run_result = None
150
    # support for generic debug attribute
151
    if (not hasattr(self.op, "debug_level") or
152
        not isinstance(self.op.debug_level, int)):
153
      self.op.debug_level = 0
154

    
155
    # Tasklets
156
    self.tasklets = None
157

    
158
    # Validate opcode parameters and set defaults
159
    self.op.Validate(True)
160

    
161
    self.CheckArguments()
162

    
163
  def CheckArguments(self):
164
    """Check syntactic validity for the opcode arguments.
165

166
    This method is for doing a simple syntactic check and ensure
167
    validity of opcode parameters, without any cluster-related
168
    checks. While the same can be accomplished in ExpandNames and/or
169
    CheckPrereq, doing these separate is better because:
170

171
      - ExpandNames is left as as purely a lock-related function
172
      - CheckPrereq is run after we have acquired locks (and possible
173
        waited for them)
174

175
    The function is allowed to change the self.op attribute so that
176
    later methods can no longer worry about missing parameters.
177

178
    """
179
    pass
180

    
181
  def ExpandNames(self):
182
    """Expand names for this LU.
183

184
    This method is called before starting to execute the opcode, and it should
185
    update all the parameters of the opcode to their canonical form (e.g. a
186
    short node name must be fully expanded after this method has successfully
187
    completed). This way locking, hooks, logging, etc. can work correctly.
188

189
    LUs which implement this method must also populate the self.needed_locks
190
    member, as a dict with lock levels as keys, and a list of needed lock names
191
    as values. Rules:
192

193
      - use an empty dict if you don't need any lock
194
      - if you don't need any lock at a particular level omit that level
195
      - don't put anything for the BGL level
196
      - if you want all locks at a level use locking.ALL_SET as a value
197

198
    If you need to share locks (rather than acquire them exclusively) at one
199
    level you can modify self.share_locks, setting a true value (usually 1) for
200
    that level. By default locks are not shared.
201

202
    This function can also define a list of tasklets, which then will be
203
    executed in order instead of the usual LU-level CheckPrereq and Exec
204
    functions, if those are not defined by the LU.
205

206
    Examples::
207

208
      # Acquire all nodes and one instance
209
      self.needed_locks = {
210
        locking.LEVEL_NODE: locking.ALL_SET,
211
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
212
      }
213
      # Acquire just two nodes
214
      self.needed_locks = {
215
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
216
      }
217
      # Acquire no locks
218
      self.needed_locks = {} # No, you can't leave it to the default value None
219

220
    """
221
    # The implementation of this method is mandatory only if the new LU is
222
    # concurrent, so that old LUs don't need to be changed all at the same
223
    # time.
224
    if self.REQ_BGL:
225
      self.needed_locks = {} # Exclusive LUs don't need locks.
226
    else:
227
      raise NotImplementedError
228

    
229
  def DeclareLocks(self, level):
230
    """Declare LU locking needs for a level
231

232
    While most LUs can just declare their locking needs at ExpandNames time,
233
    sometimes there's the need to calculate some locks after having acquired
234
    the ones before. This function is called just before acquiring locks at a
235
    particular level, but after acquiring the ones at lower levels, and permits
236
    such calculations. It can be used to modify self.needed_locks, and by
237
    default it does nothing.
238

239
    This function is only called if you have something already set in
240
    self.needed_locks for the level.
241

242
    @param level: Locking level which is going to be locked
243
    @type level: member of ganeti.locking.LEVELS
244

245
    """
246

    
247
  def CheckPrereq(self):
248
    """Check prerequisites for this LU.
249

250
    This method should check that the prerequisites for the execution
251
    of this LU are fulfilled. It can do internode communication, but
252
    it should be idempotent - no cluster or system changes are
253
    allowed.
254

255
    The method should raise errors.OpPrereqError in case something is
256
    not fulfilled. Its return value is ignored.
257

258
    This method should also update all the parameters of the opcode to
259
    their canonical form if it hasn't been done by ExpandNames before.
260

261
    """
262
    if self.tasklets is not None:
263
      for (idx, tl) in enumerate(self.tasklets):
264
        logging.debug("Checking prerequisites for tasklet %s/%s",
265
                      idx + 1, len(self.tasklets))
266
        tl.CheckPrereq()
267
    else:
268
      pass
269

    
270
  def Exec(self, feedback_fn):
271
    """Execute the LU.
272

273
    This method should implement the actual work. It should raise
274
    errors.OpExecError for failures that are somewhat dealt with in
275
    code, or expected.
276

277
    """
278
    if self.tasklets is not None:
279
      for (idx, tl) in enumerate(self.tasklets):
280
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
281
        tl.Exec(feedback_fn)
282
    else:
283
      raise NotImplementedError
284

    
285
  def BuildHooksEnv(self):
286
    """Build hooks environment for this LU.
287

288
    @rtype: dict
289
    @return: Dictionary containing the environment that will be used for
290
      running the hooks for this LU. The keys of the dict must not be prefixed
291
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
292
      will extend the environment with additional variables. If no environment
293
      should be defined, an empty dictionary should be returned (not C{None}).
294
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
295
      will not be called.
296

297
    """
298
    raise NotImplementedError
299

    
300
  def BuildHooksNodes(self):
301
    """Build list of nodes to run LU's hooks.
302

303
    @rtype: tuple; (list, list)
304
    @return: Tuple containing a list of node names on which the hook
305
      should run before the execution and a list of node names on which the
306
      hook should run after the execution. No nodes should be returned as an
307
      empty list (and not None).
308
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
309
      will not be called.
310

311
    """
312
    raise NotImplementedError
313

    
314
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
315
    """Notify the LU about the results of its hooks.
316

317
    This method is called every time a hooks phase is executed, and notifies
318
    the Logical Unit about the hooks' result. The LU can then use it to alter
319
    its result based on the hooks.  By default the method does nothing and the
320
    previous result is passed back unchanged but any LU can define it if it
321
    wants to use the local cluster hook-scripts somehow.
322

323
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
324
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
325
    @param hook_results: the results of the multi-node hooks rpc call
326
    @param feedback_fn: function used send feedback back to the caller
327
    @param lu_result: the previous Exec result this LU had, or None
328
        in the PRE phase
329
    @return: the new Exec result, based on the previous result
330
        and hook results
331

332
    """
333
    # API must be kept, thus we ignore the unused argument and could
334
    # be a function warnings
335
    # pylint: disable-msg=W0613,R0201
336
    return lu_result
337

    
338
  def _ExpandAndLockInstance(self):
339
    """Helper function to expand and lock an instance.
340

341
    Many LUs that work on an instance take its name in self.op.instance_name
342
    and need to expand it and then declare the expanded name for locking. This
343
    function does it, and then updates self.op.instance_name to the expanded
344
    name. It also initializes needed_locks as a dict, if this hasn't been done
345
    before.
346

347
    """
348
    if self.needed_locks is None:
349
      self.needed_locks = {}
350
    else:
351
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
352
        "_ExpandAndLockInstance called with instance-level locks set"
353
    self.op.instance_name = _ExpandInstanceName(self.cfg,
354
                                                self.op.instance_name)
355
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
356

    
357
  def _LockInstancesNodes(self, primary_only=False):
358
    """Helper function to declare instances' nodes for locking.
359

360
    This function should be called after locking one or more instances to lock
361
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
362
    with all primary or secondary nodes for instances already locked and
363
    present in self.needed_locks[locking.LEVEL_INSTANCE].
364

365
    It should be called from DeclareLocks, and for safety only works if
366
    self.recalculate_locks[locking.LEVEL_NODE] is set.
367

368
    In the future it may grow parameters to just lock some instance's nodes, or
369
    to just lock primaries or secondary nodes, if needed.
370

371
    If should be called in DeclareLocks in a way similar to::
372

373
      if level == locking.LEVEL_NODE:
374
        self._LockInstancesNodes()
375

376
    @type primary_only: boolean
377
    @param primary_only: only lock primary nodes of locked instances
378

379
    """
380
    assert locking.LEVEL_NODE in self.recalculate_locks, \
381
      "_LockInstancesNodes helper function called with no nodes to recalculate"
382

    
383
    # TODO: check if we're really been called with the instance locks held
384

    
385
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
386
    # future we might want to have different behaviors depending on the value
387
    # of self.recalculate_locks[locking.LEVEL_NODE]
388
    wanted_nodes = []
389
    for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
390
      instance = self.context.cfg.GetInstanceInfo(instance_name)
391
      wanted_nodes.append(instance.primary_node)
392
      if not primary_only:
393
        wanted_nodes.extend(instance.secondary_nodes)
394

    
395
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
396
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
397
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
398
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
399

    
400
    del self.recalculate_locks[locking.LEVEL_NODE]
401

    
402

    
403
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
404
  """Simple LU which runs no hooks.
405

406
  This LU is intended as a parent for other LogicalUnits which will
407
  run no hooks, in order to reduce duplicate code.
408

409
  """
410
  HPATH = None
411
  HTYPE = None
412

    
413
  def BuildHooksEnv(self):
414
    """Empty BuildHooksEnv for NoHooksLu.
415

416
    This just raises an error.
417

418
    """
419
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
420

    
421
  def BuildHooksNodes(self):
422
    """Empty BuildHooksNodes for NoHooksLU.
423

424
    """
425
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
426

    
427

    
428
class Tasklet:
429
  """Tasklet base class.
430

431
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
432
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
433
  tasklets know nothing about locks.
434

435
  Subclasses must follow these rules:
436
    - Implement CheckPrereq
437
    - Implement Exec
438

439
  """
440
  def __init__(self, lu):
441
    self.lu = lu
442

    
443
    # Shortcuts
444
    self.cfg = lu.cfg
445
    self.rpc = lu.rpc
446

    
447
  def CheckPrereq(self):
448
    """Check prerequisites for this tasklets.
449

450
    This method should check whether the prerequisites for the execution of
451
    this tasklet are fulfilled. It can do internode communication, but it
452
    should be idempotent - no cluster or system changes are allowed.
453

454
    The method should raise errors.OpPrereqError in case something is not
455
    fulfilled. Its return value is ignored.
456

457
    This method should also update all parameters to their canonical form if it
458
    hasn't been done before.
459

460
    """
461
    pass
462

    
463
  def Exec(self, feedback_fn):
464
    """Execute the tasklet.
465

466
    This method should implement the actual work. It should raise
467
    errors.OpExecError for failures that are somewhat dealt with in code, or
468
    expected.
469

470
    """
471
    raise NotImplementedError
472

    
473

    
474
class _QueryBase:
475
  """Base for query utility classes.
476

477
  """
478
  #: Attribute holding field definitions
479
  FIELDS = None
480

    
481
  def __init__(self, filter_, fields, use_locking):
482
    """Initializes this class.
483

484
    """
485
    self.use_locking = use_locking
486

    
487
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
488
                             namefield="name")
489
    self.requested_data = self.query.RequestedData()
490
    self.names = self.query.RequestedNames()
491

    
492
    # Sort only if no names were requested
493
    self.sort_by_name = not self.names
494

    
495
    self.do_locking = None
496
    self.wanted = None
497

    
498
  def _GetNames(self, lu, all_names, lock_level):
499
    """Helper function to determine names asked for in the query.
500

501
    """
502
    if self.do_locking:
503
      names = lu.glm.list_owned(lock_level)
504
    else:
505
      names = all_names
506

    
507
    if self.wanted == locking.ALL_SET:
508
      assert not self.names
509
      # caller didn't specify names, so ordering is not important
510
      return utils.NiceSort(names)
511

    
512
    # caller specified names and we must keep the same order
513
    assert self.names
514
    assert not self.do_locking or lu.glm.is_owned(lock_level)
515

    
516
    missing = set(self.wanted).difference(names)
517
    if missing:
518
      raise errors.OpExecError("Some items were removed before retrieving"
519
                               " their data: %s" % missing)
520

    
521
    # Return expanded names
522
    return self.wanted
523

    
524
  def ExpandNames(self, lu):
525
    """Expand names for this query.
526

527
    See L{LogicalUnit.ExpandNames}.
528

529
    """
530
    raise NotImplementedError()
531

    
532
  def DeclareLocks(self, lu, level):
533
    """Declare locks for this query.
534

535
    See L{LogicalUnit.DeclareLocks}.
536

537
    """
538
    raise NotImplementedError()
539

    
540
  def _GetQueryData(self, lu):
541
    """Collects all data for this query.
542

543
    @return: Query data object
544

545
    """
546
    raise NotImplementedError()
547

    
548
  def NewStyleQuery(self, lu):
549
    """Collect data and execute query.
550

551
    """
552
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
553
                                  sort_by_name=self.sort_by_name)
554

    
555
  def OldStyleQuery(self, lu):
556
    """Collect data and execute query.
557

558
    """
559
    return self.query.OldStyleQuery(self._GetQueryData(lu),
560
                                    sort_by_name=self.sort_by_name)
561

    
562

    
563
def _GetWantedNodes(lu, nodes):
564
  """Returns list of checked and expanded node names.
565

566
  @type lu: L{LogicalUnit}
567
  @param lu: the logical unit on whose behalf we execute
568
  @type nodes: list
569
  @param nodes: list of node names or None for all nodes
570
  @rtype: list
571
  @return: the list of nodes, sorted
572
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
573

574
  """
575
  if nodes:
576
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
577

    
578
  return utils.NiceSort(lu.cfg.GetNodeList())
579

    
580

    
581
def _GetWantedInstances(lu, instances):
582
  """Returns list of checked and expanded instance names.
583

584
  @type lu: L{LogicalUnit}
585
  @param lu: the logical unit on whose behalf we execute
586
  @type instances: list
587
  @param instances: list of instance names or None for all instances
588
  @rtype: list
589
  @return: the list of instances, sorted
590
  @raise errors.OpPrereqError: if the instances parameter is wrong type
591
  @raise errors.OpPrereqError: if any of the passed instances is not found
592

593
  """
594
  if instances:
595
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
596
  else:
597
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
598
  return wanted
599

    
600

    
601
def _GetUpdatedParams(old_params, update_dict,
602
                      use_default=True, use_none=False):
603
  """Return the new version of a parameter dictionary.
604

605
  @type old_params: dict
606
  @param old_params: old parameters
607
  @type update_dict: dict
608
  @param update_dict: dict containing new parameter values, or
609
      constants.VALUE_DEFAULT to reset the parameter to its default
610
      value
611
  @param use_default: boolean
612
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
613
      values as 'to be deleted' values
614
  @param use_none: boolean
615
  @type use_none: whether to recognise C{None} values as 'to be
616
      deleted' values
617
  @rtype: dict
618
  @return: the new parameter dictionary
619

620
  """
621
  params_copy = copy.deepcopy(old_params)
622
  for key, val in update_dict.iteritems():
623
    if ((use_default and val == constants.VALUE_DEFAULT) or
624
        (use_none and val is None)):
625
      try:
626
        del params_copy[key]
627
      except KeyError:
628
        pass
629
    else:
630
      params_copy[key] = val
631
  return params_copy
632

    
633

    
634
def _ReleaseLocks(lu, level, names=None, keep=None):
635
  """Releases locks owned by an LU.
636

637
  @type lu: L{LogicalUnit}
638
  @param level: Lock level
639
  @type names: list or None
640
  @param names: Names of locks to release
641
  @type keep: list or None
642
  @param keep: Names of locks to retain
643

644
  """
645
  assert not (keep is not None and names is not None), \
646
         "Only one of the 'names' and the 'keep' parameters can be given"
647

    
648
  if names is not None:
649
    should_release = names.__contains__
650
  elif keep:
651
    should_release = lambda name: name not in keep
652
  else:
653
    should_release = None
654

    
655
  if should_release:
656
    retain = []
657
    release = []
658

    
659
    # Determine which locks to release
660
    for name in lu.glm.list_owned(level):
661
      if should_release(name):
662
        release.append(name)
663
      else:
664
        retain.append(name)
665

    
666
    assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
667

    
668
    # Release just some locks
669
    lu.glm.release(level, names=release)
670

    
671
    assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
672
  else:
673
    # Release everything
674
    lu.glm.release(level)
675

    
676
    assert not lu.glm.is_owned(level), "No locks should be owned"
677

    
678

    
679
def _RunPostHook(lu, node_name):
680
  """Runs the post-hook for an opcode on a single node.
681

682
  """
683
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
684
  try:
685
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
686
  except:
687
    # pylint: disable-msg=W0702
688
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
689

    
690

    
691
def _CheckOutputFields(static, dynamic, selected):
692
  """Checks whether all selected fields are valid.
693

694
  @type static: L{utils.FieldSet}
695
  @param static: static fields set
696
  @type dynamic: L{utils.FieldSet}
697
  @param dynamic: dynamic fields set
698

699
  """
700
  f = utils.FieldSet()
701
  f.Extend(static)
702
  f.Extend(dynamic)
703

    
704
  delta = f.NonMatching(selected)
705
  if delta:
706
    raise errors.OpPrereqError("Unknown output fields selected: %s"
707
                               % ",".join(delta), errors.ECODE_INVAL)
708

    
709

    
710
def _CheckGlobalHvParams(params):
711
  """Validates that given hypervisor params are not global ones.
712

713
  This will ensure that instances don't get customised versions of
714
  global params.
715

716
  """
717
  used_globals = constants.HVC_GLOBALS.intersection(params)
718
  if used_globals:
719
    msg = ("The following hypervisor parameters are global and cannot"
720
           " be customized at instance level, please modify them at"
721
           " cluster level: %s" % utils.CommaJoin(used_globals))
722
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
723

    
724

    
725
def _CheckNodeOnline(lu, node, msg=None):
726
  """Ensure that a given node is online.
727

728
  @param lu: the LU on behalf of which we make the check
729
  @param node: the node to check
730
  @param msg: if passed, should be a message to replace the default one
731
  @raise errors.OpPrereqError: if the node is offline
732

733
  """
734
  if msg is None:
735
    msg = "Can't use offline node"
736
  if lu.cfg.GetNodeInfo(node).offline:
737
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
738

    
739

    
740
def _CheckNodeNotDrained(lu, node):
741
  """Ensure that a given node is not drained.
742

743
  @param lu: the LU on behalf of which we make the check
744
  @param node: the node to check
745
  @raise errors.OpPrereqError: if the node is drained
746

747
  """
748
  if lu.cfg.GetNodeInfo(node).drained:
749
    raise errors.OpPrereqError("Can't use drained node %s" % node,
750
                               errors.ECODE_STATE)
751

    
752

    
753
def _CheckNodeVmCapable(lu, node):
754
  """Ensure that a given node is vm capable.
755

756
  @param lu: the LU on behalf of which we make the check
757
  @param node: the node to check
758
  @raise errors.OpPrereqError: if the node is not vm capable
759

760
  """
761
  if not lu.cfg.GetNodeInfo(node).vm_capable:
762
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
763
                               errors.ECODE_STATE)
764

    
765

    
766
def _CheckNodeHasOS(lu, node, os_name, force_variant):
767
  """Ensure that a node supports a given OS.
768

769
  @param lu: the LU on behalf of which we make the check
770
  @param node: the node to check
771
  @param os_name: the OS to query about
772
  @param force_variant: whether to ignore variant errors
773
  @raise errors.OpPrereqError: if the node is not supporting the OS
774

775
  """
776
  result = lu.rpc.call_os_get(node, os_name)
777
  result.Raise("OS '%s' not in supported OS list for node %s" %
778
               (os_name, node),
779
               prereq=True, ecode=errors.ECODE_INVAL)
780
  if not force_variant:
781
    _CheckOSVariant(result.payload, os_name)
782

    
783

    
784
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
785
  """Ensure that a node has the given secondary ip.
786

787
  @type lu: L{LogicalUnit}
788
  @param lu: the LU on behalf of which we make the check
789
  @type node: string
790
  @param node: the node to check
791
  @type secondary_ip: string
792
  @param secondary_ip: the ip to check
793
  @type prereq: boolean
794
  @param prereq: whether to throw a prerequisite or an execute error
795
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
796
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
797

798
  """
799
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
800
  result.Raise("Failure checking secondary ip on node %s" % node,
801
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
802
  if not result.payload:
803
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
804
           " please fix and re-run this command" % secondary_ip)
805
    if prereq:
806
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
807
    else:
808
      raise errors.OpExecError(msg)
809

    
810

    
811
def _GetClusterDomainSecret():
812
  """Reads the cluster domain secret.
813

814
  """
815
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
816
                               strict=True)
817

    
818

    
819
def _CheckInstanceDown(lu, instance, reason):
820
  """Ensure that an instance is not running."""
821
  if instance.admin_up:
822
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
823
                               (instance.name, reason), errors.ECODE_STATE)
824

    
825
  pnode = instance.primary_node
826
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
827
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
828
              prereq=True, ecode=errors.ECODE_ENVIRON)
829

    
830
  if instance.name in ins_l.payload:
831
    raise errors.OpPrereqError("Instance %s is running, %s" %
832
                               (instance.name, reason), errors.ECODE_STATE)
833

    
834

    
835
def _ExpandItemName(fn, name, kind):
836
  """Expand an item name.
837

838
  @param fn: the function to use for expansion
839
  @param name: requested item name
840
  @param kind: text description ('Node' or 'Instance')
841
  @return: the resolved (full) name
842
  @raise errors.OpPrereqError: if the item is not found
843

844
  """
845
  full_name = fn(name)
846
  if full_name is None:
847
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
848
                               errors.ECODE_NOENT)
849
  return full_name
850

    
851

    
852
def _ExpandNodeName(cfg, name):
853
  """Wrapper over L{_ExpandItemName} for nodes."""
854
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
855

    
856

    
857
def _ExpandInstanceName(cfg, name):
858
  """Wrapper over L{_ExpandItemName} for instance."""
859
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
860

    
861

    
862
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
863
                          memory, vcpus, nics, disk_template, disks,
864
                          bep, hvp, hypervisor_name, tags):
865
  """Builds instance related env variables for hooks
866

867
  This builds the hook environment from individual variables.
868

869
  @type name: string
870
  @param name: the name of the instance
871
  @type primary_node: string
872
  @param primary_node: the name of the instance's primary node
873
  @type secondary_nodes: list
874
  @param secondary_nodes: list of secondary nodes as strings
875
  @type os_type: string
876
  @param os_type: the name of the instance's OS
877
  @type status: boolean
878
  @param status: the should_run status of the instance
879
  @type memory: string
880
  @param memory: the memory size of the instance
881
  @type vcpus: string
882
  @param vcpus: the count of VCPUs the instance has
883
  @type nics: list
884
  @param nics: list of tuples (ip, mac, mode, link) representing
885
      the NICs the instance has
886
  @type disk_template: string
887
  @param disk_template: the disk template of the instance
888
  @type disks: list
889
  @param disks: the list of (size, mode) pairs
890
  @type bep: dict
891
  @param bep: the backend parameters for the instance
892
  @type hvp: dict
893
  @param hvp: the hypervisor parameters for the instance
894
  @type hypervisor_name: string
895
  @param hypervisor_name: the hypervisor for the instance
896
  @type tags: list
897
  @param tags: list of instance tags as strings
898
  @rtype: dict
899
  @return: the hook environment for this instance
900

901
  """
902
  if status:
903
    str_status = "up"
904
  else:
905
    str_status = "down"
906
  env = {
907
    "OP_TARGET": name,
908
    "INSTANCE_NAME": name,
909
    "INSTANCE_PRIMARY": primary_node,
910
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
911
    "INSTANCE_OS_TYPE": os_type,
912
    "INSTANCE_STATUS": str_status,
913
    "INSTANCE_MEMORY": memory,
914
    "INSTANCE_VCPUS": vcpus,
915
    "INSTANCE_DISK_TEMPLATE": disk_template,
916
    "INSTANCE_HYPERVISOR": hypervisor_name,
917
  }
918

    
919
  if nics:
920
    nic_count = len(nics)
921
    for idx, (ip, mac, mode, link) in enumerate(nics):
922
      if ip is None:
923
        ip = ""
924
      env["INSTANCE_NIC%d_IP" % idx] = ip
925
      env["INSTANCE_NIC%d_MAC" % idx] = mac
926
      env["INSTANCE_NIC%d_MODE" % idx] = mode
927
      env["INSTANCE_NIC%d_LINK" % idx] = link
928
      if mode == constants.NIC_MODE_BRIDGED:
929
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
930
  else:
931
    nic_count = 0
932

    
933
  env["INSTANCE_NIC_COUNT"] = nic_count
934

    
935
  if disks:
936
    disk_count = len(disks)
937
    for idx, (size, mode) in enumerate(disks):
938
      env["INSTANCE_DISK%d_SIZE" % idx] = size
939
      env["INSTANCE_DISK%d_MODE" % idx] = mode
940
  else:
941
    disk_count = 0
942

    
943
  env["INSTANCE_DISK_COUNT"] = disk_count
944

    
945
  if not tags:
946
    tags = []
947

    
948
  env["INSTANCE_TAGS"] = " ".join(tags)
949

    
950
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
951
    for key, value in source.items():
952
      env["INSTANCE_%s_%s" % (kind, key)] = value
953

    
954
  return env
955

    
956

    
957
def _NICListToTuple(lu, nics):
958
  """Build a list of nic information tuples.
959

960
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
961
  value in LUInstanceQueryData.
962

963
  @type lu:  L{LogicalUnit}
964
  @param lu: the logical unit on whose behalf we execute
965
  @type nics: list of L{objects.NIC}
966
  @param nics: list of nics to convert to hooks tuples
967

968
  """
969
  hooks_nics = []
970
  cluster = lu.cfg.GetClusterInfo()
971
  for nic in nics:
972
    ip = nic.ip
973
    mac = nic.mac
974
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
975
    mode = filled_params[constants.NIC_MODE]
976
    link = filled_params[constants.NIC_LINK]
977
    hooks_nics.append((ip, mac, mode, link))
978
  return hooks_nics
979

    
980

    
981
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
982
  """Builds instance related env variables for hooks from an object.
983

984
  @type lu: L{LogicalUnit}
985
  @param lu: the logical unit on whose behalf we execute
986
  @type instance: L{objects.Instance}
987
  @param instance: the instance for which we should build the
988
      environment
989
  @type override: dict
990
  @param override: dictionary with key/values that will override
991
      our values
992
  @rtype: dict
993
  @return: the hook environment dictionary
994

995
  """
996
  cluster = lu.cfg.GetClusterInfo()
997
  bep = cluster.FillBE(instance)
998
  hvp = cluster.FillHV(instance)
999
  args = {
1000
    'name': instance.name,
1001
    'primary_node': instance.primary_node,
1002
    'secondary_nodes': instance.secondary_nodes,
1003
    'os_type': instance.os,
1004
    'status': instance.admin_up,
1005
    'memory': bep[constants.BE_MEMORY],
1006
    'vcpus': bep[constants.BE_VCPUS],
1007
    'nics': _NICListToTuple(lu, instance.nics),
1008
    'disk_template': instance.disk_template,
1009
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
1010
    'bep': bep,
1011
    'hvp': hvp,
1012
    'hypervisor_name': instance.hypervisor,
1013
    'tags': instance.tags,
1014
  }
1015
  if override:
1016
    args.update(override)
1017
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1018

    
1019

    
1020
def _AdjustCandidatePool(lu, exceptions):
1021
  """Adjust the candidate pool after node operations.
1022

1023
  """
1024
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1025
  if mod_list:
1026
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1027
               utils.CommaJoin(node.name for node in mod_list))
1028
    for name in mod_list:
1029
      lu.context.ReaddNode(name)
1030
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1031
  if mc_now > mc_max:
1032
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1033
               (mc_now, mc_max))
1034

    
1035

    
1036
def _DecideSelfPromotion(lu, exceptions=None):
1037
  """Decide whether I should promote myself as a master candidate.
1038

1039
  """
1040
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1041
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1042
  # the new node will increase mc_max with one, so:
1043
  mc_should = min(mc_should + 1, cp_size)
1044
  return mc_now < mc_should
1045

    
1046

    
1047
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1048
  """Check that the brigdes needed by a list of nics exist.
1049

1050
  """
1051
  cluster = lu.cfg.GetClusterInfo()
1052
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1053
  brlist = [params[constants.NIC_LINK] for params in paramslist
1054
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1055
  if brlist:
1056
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1057
    result.Raise("Error checking bridges on destination node '%s'" %
1058
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1059

    
1060

    
1061
def _CheckInstanceBridgesExist(lu, instance, node=None):
1062
  """Check that the brigdes needed by an instance exist.
1063

1064
  """
1065
  if node is None:
1066
    node = instance.primary_node
1067
  _CheckNicsBridgesExist(lu, instance.nics, node)
1068

    
1069

    
1070
def _CheckOSVariant(os_obj, name):
1071
  """Check whether an OS name conforms to the os variants specification.
1072

1073
  @type os_obj: L{objects.OS}
1074
  @param os_obj: OS object to check
1075
  @type name: string
1076
  @param name: OS name passed by the user, to check for validity
1077

1078
  """
1079
  if not os_obj.supported_variants:
1080
    return
1081
  variant = objects.OS.GetVariant(name)
1082
  if not variant:
1083
    raise errors.OpPrereqError("OS name must include a variant",
1084
                               errors.ECODE_INVAL)
1085

    
1086
  if variant not in os_obj.supported_variants:
1087
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1088

    
1089

    
1090
def _GetNodeInstancesInner(cfg, fn):
1091
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1092

    
1093

    
1094
def _GetNodeInstances(cfg, node_name):
1095
  """Returns a list of all primary and secondary instances on a node.
1096

1097
  """
1098

    
1099
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1100

    
1101

    
1102
def _GetNodePrimaryInstances(cfg, node_name):
1103
  """Returns primary instances on a node.
1104

1105
  """
1106
  return _GetNodeInstancesInner(cfg,
1107
                                lambda inst: node_name == inst.primary_node)
1108

    
1109

    
1110
def _GetNodeSecondaryInstances(cfg, node_name):
1111
  """Returns secondary instances on a node.
1112

1113
  """
1114
  return _GetNodeInstancesInner(cfg,
1115
                                lambda inst: node_name in inst.secondary_nodes)
1116

    
1117

    
1118
def _GetStorageTypeArgs(cfg, storage_type):
1119
  """Returns the arguments for a storage type.
1120

1121
  """
1122
  # Special case for file storage
1123
  if storage_type == constants.ST_FILE:
1124
    # storage.FileStorage wants a list of storage directories
1125
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1126

    
1127
  return []
1128

    
1129

    
1130
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1131
  faulty = []
1132

    
1133
  for dev in instance.disks:
1134
    cfg.SetDiskID(dev, node_name)
1135

    
1136
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1137
  result.Raise("Failed to get disk status from node %s" % node_name,
1138
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1139

    
1140
  for idx, bdev_status in enumerate(result.payload):
1141
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1142
      faulty.append(idx)
1143

    
1144
  return faulty
1145

    
1146

    
1147
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1148
  """Check the sanity of iallocator and node arguments and use the
1149
  cluster-wide iallocator if appropriate.
1150

1151
  Check that at most one of (iallocator, node) is specified. If none is
1152
  specified, then the LU's opcode's iallocator slot is filled with the
1153
  cluster-wide default iallocator.
1154

1155
  @type iallocator_slot: string
1156
  @param iallocator_slot: the name of the opcode iallocator slot
1157
  @type node_slot: string
1158
  @param node_slot: the name of the opcode target node slot
1159

1160
  """
1161
  node = getattr(lu.op, node_slot, None)
1162
  iallocator = getattr(lu.op, iallocator_slot, None)
1163

    
1164
  if node is not None and iallocator is not None:
1165
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1166
                               errors.ECODE_INVAL)
1167
  elif node is None and iallocator is None:
1168
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1169
    if default_iallocator:
1170
      setattr(lu.op, iallocator_slot, default_iallocator)
1171
    else:
1172
      raise errors.OpPrereqError("No iallocator or node given and no"
1173
                                 " cluster-wide default iallocator found;"
1174
                                 " please specify either an iallocator or a"
1175
                                 " node, or set a cluster-wide default"
1176
                                 " iallocator")
1177

    
1178

    
1179
class LUClusterPostInit(LogicalUnit):
1180
  """Logical unit for running hooks after cluster initialization.
1181

1182
  """
1183
  HPATH = "cluster-init"
1184
  HTYPE = constants.HTYPE_CLUSTER
1185

    
1186
  def BuildHooksEnv(self):
1187
    """Build hooks env.
1188

1189
    """
1190
    return {
1191
      "OP_TARGET": self.cfg.GetClusterName(),
1192
      }
1193

    
1194
  def BuildHooksNodes(self):
1195
    """Build hooks nodes.
1196

1197
    """
1198
    return ([], [self.cfg.GetMasterNode()])
1199

    
1200
  def Exec(self, feedback_fn):
1201
    """Nothing to do.
1202

1203
    """
1204
    return True
1205

    
1206

    
1207
class LUClusterDestroy(LogicalUnit):
1208
  """Logical unit for destroying the cluster.
1209

1210
  """
1211
  HPATH = "cluster-destroy"
1212
  HTYPE = constants.HTYPE_CLUSTER
1213

    
1214
  def BuildHooksEnv(self):
1215
    """Build hooks env.
1216

1217
    """
1218
    return {
1219
      "OP_TARGET": self.cfg.GetClusterName(),
1220
      }
1221

    
1222
  def BuildHooksNodes(self):
1223
    """Build hooks nodes.
1224

1225
    """
1226
    return ([], [])
1227

    
1228
  def CheckPrereq(self):
1229
    """Check prerequisites.
1230

1231
    This checks whether the cluster is empty.
1232

1233
    Any errors are signaled by raising errors.OpPrereqError.
1234

1235
    """
1236
    master = self.cfg.GetMasterNode()
1237

    
1238
    nodelist = self.cfg.GetNodeList()
1239
    if len(nodelist) != 1 or nodelist[0] != master:
1240
      raise errors.OpPrereqError("There are still %d node(s) in"
1241
                                 " this cluster." % (len(nodelist) - 1),
1242
                                 errors.ECODE_INVAL)
1243
    instancelist = self.cfg.GetInstanceList()
1244
    if instancelist:
1245
      raise errors.OpPrereqError("There are still %d instance(s) in"
1246
                                 " this cluster." % len(instancelist),
1247
                                 errors.ECODE_INVAL)
1248

    
1249
  def Exec(self, feedback_fn):
1250
    """Destroys the cluster.
1251

1252
    """
1253
    master = self.cfg.GetMasterNode()
1254

    
1255
    # Run post hooks on master node before it's removed
1256
    _RunPostHook(self, master)
1257

    
1258
    result = self.rpc.call_node_stop_master(master, False)
1259
    result.Raise("Could not disable the master role")
1260

    
1261
    return master
1262

    
1263

    
1264
def _VerifyCertificate(filename):
1265
  """Verifies a certificate for LUClusterVerifyConfig.
1266

1267
  @type filename: string
1268
  @param filename: Path to PEM file
1269

1270
  """
1271
  try:
1272
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1273
                                           utils.ReadFile(filename))
1274
  except Exception, err: # pylint: disable-msg=W0703
1275
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1276
            "Failed to load X509 certificate %s: %s" % (filename, err))
1277

    
1278
  (errcode, msg) = \
1279
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1280
                                constants.SSL_CERT_EXPIRATION_ERROR)
1281

    
1282
  if msg:
1283
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1284
  else:
1285
    fnamemsg = None
1286

    
1287
  if errcode is None:
1288
    return (None, fnamemsg)
1289
  elif errcode == utils.CERT_WARNING:
1290
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1291
  elif errcode == utils.CERT_ERROR:
1292
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1293

    
1294
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1295

    
1296

    
1297
def _GetAllHypervisorParameters(cluster, instances):
1298
  """Compute the set of all hypervisor parameters.
1299

1300
  @type cluster: L{objects.Cluster}
1301
  @param cluster: the cluster object
1302
  @param instances: list of L{objects.Instance}
1303
  @param instances: additional instances from which to obtain parameters
1304
  @rtype: list of (origin, hypervisor, parameters)
1305
  @return: a list with all parameters found, indicating the hypervisor they
1306
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1307

1308
  """
1309
  hvp_data = []
1310

    
1311
  for hv_name in cluster.enabled_hypervisors:
1312
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1313

    
1314
  for os_name, os_hvp in cluster.os_hvp.items():
1315
    for hv_name, hv_params in os_hvp.items():
1316
      if hv_params:
1317
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1318
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1319

    
1320
  # TODO: collapse identical parameter values in a single one
1321
  for instance in instances:
1322
    if instance.hvparams:
1323
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1324
                       cluster.FillHV(instance)))
1325

    
1326
  return hvp_data
1327

    
1328

    
1329
class _VerifyErrors(object):
1330
  """Mix-in for cluster/group verify LUs.
1331

1332
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1333
  self.op and self._feedback_fn to be available.)
1334

1335
  """
1336
  TCLUSTER = "cluster"
1337
  TNODE = "node"
1338
  TINSTANCE = "instance"
1339

    
1340
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1341
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1342
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1343
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1344
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1345
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1346
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1347
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1348
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1349
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1350
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1351
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1352
  ENODEDRBD = (TNODE, "ENODEDRBD")
1353
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1354
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1355
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1356
  ENODEHV = (TNODE, "ENODEHV")
1357
  ENODELVM = (TNODE, "ENODELVM")
1358
  ENODEN1 = (TNODE, "ENODEN1")
1359
  ENODENET = (TNODE, "ENODENET")
1360
  ENODEOS = (TNODE, "ENODEOS")
1361
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1362
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1363
  ENODERPC = (TNODE, "ENODERPC")
1364
  ENODESSH = (TNODE, "ENODESSH")
1365
  ENODEVERSION = (TNODE, "ENODEVERSION")
1366
  ENODESETUP = (TNODE, "ENODESETUP")
1367
  ENODETIME = (TNODE, "ENODETIME")
1368
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1369

    
1370
  ETYPE_FIELD = "code"
1371
  ETYPE_ERROR = "ERROR"
1372
  ETYPE_WARNING = "WARNING"
1373

    
1374
  def _Error(self, ecode, item, msg, *args, **kwargs):
1375
    """Format an error message.
1376

1377
    Based on the opcode's error_codes parameter, either format a
1378
    parseable error code, or a simpler error string.
1379

1380
    This must be called only from Exec and functions called from Exec.
1381

1382
    """
1383
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1384
    itype, etxt = ecode
1385
    # first complete the msg
1386
    if args:
1387
      msg = msg % args
1388
    # then format the whole message
1389
    if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1390
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1391
    else:
1392
      if item:
1393
        item = " " + item
1394
      else:
1395
        item = ""
1396
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1397
    # and finally report it via the feedback_fn
1398
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1399

    
1400
  def _ErrorIf(self, cond, *args, **kwargs):
1401
    """Log an error message if the passed condition is True.
1402

1403
    """
1404
    cond = (bool(cond)
1405
            or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1406
    if cond:
1407
      self._Error(*args, **kwargs)
1408
    # do not mark the operation as failed for WARN cases only
1409
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1410
      self.bad = self.bad or cond
1411

    
1412

    
1413
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1414
  """Verifies the cluster config.
1415

1416
  """
1417
  REQ_BGL = False
1418

    
1419
  def _VerifyHVP(self, hvp_data):
1420
    """Verifies locally the syntax of the hypervisor parameters.
1421

1422
    """
1423
    for item, hv_name, hv_params in hvp_data:
1424
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1425
             (item, hv_name))
1426
      try:
1427
        hv_class = hypervisor.GetHypervisor(hv_name)
1428
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1429
        hv_class.CheckParameterSyntax(hv_params)
1430
      except errors.GenericError, err:
1431
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1432

    
1433
  def ExpandNames(self):
1434
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1435
    self.all_node_info = self.cfg.GetAllNodesInfo()
1436
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1437
    self.needed_locks = {}
1438

    
1439
  def Exec(self, feedback_fn):
1440
    """Verify integrity of cluster, performing various test on nodes.
1441

1442
    """
1443
    self.bad = False
1444
    self._feedback_fn = feedback_fn
1445

    
1446
    feedback_fn("* Verifying cluster config")
1447

    
1448
    for msg in self.cfg.VerifyConfig():
1449
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1450

    
1451
    feedback_fn("* Verifying cluster certificate files")
1452

    
1453
    for cert_filename in constants.ALL_CERT_FILES:
1454
      (errcode, msg) = _VerifyCertificate(cert_filename)
1455
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1456

    
1457
    feedback_fn("* Verifying hypervisor parameters")
1458

    
1459
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1460
                                                self.all_inst_info.values()))
1461

    
1462
    feedback_fn("* Verifying all nodes belong to an existing group")
1463

    
1464
    # We do this verification here because, should this bogus circumstance
1465
    # occur, it would never be catched by VerifyGroup, which only acts on
1466
    # nodes/instances reachable from existing node groups.
1467

    
1468
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1469
                         if node.group not in self.all_group_info)
1470

    
1471
    dangling_instances = {}
1472
    no_node_instances = []
1473

    
1474
    for inst in self.all_inst_info.values():
1475
      if inst.primary_node in dangling_nodes:
1476
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1477
      elif inst.primary_node not in self.all_node_info:
1478
        no_node_instances.append(inst.name)
1479

    
1480
    pretty_dangling = [
1481
        "%s (%s)" %
1482
        (node.name,
1483
         utils.CommaJoin(dangling_instances.get(node.name,
1484
                                                ["no instances"])))
1485
        for node in dangling_nodes]
1486

    
1487
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1488
                  "the following nodes (and their instances) belong to a non"
1489
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1490

    
1491
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1492
                  "the following instances have a non-existing primary-node:"
1493
                  " %s", utils.CommaJoin(no_node_instances))
1494

    
1495
    return (not self.bad, [g.name for g in self.all_group_info.values()])
1496

    
1497

    
1498
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1499
  """Verifies the status of a node group.
1500

1501
  """
1502
  HPATH = "cluster-verify"
1503
  HTYPE = constants.HTYPE_CLUSTER
1504
  REQ_BGL = False
1505

    
1506
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1507

    
1508
  class NodeImage(object):
1509
    """A class representing the logical and physical status of a node.
1510

1511
    @type name: string
1512
    @ivar name: the node name to which this object refers
1513
    @ivar volumes: a structure as returned from
1514
        L{ganeti.backend.GetVolumeList} (runtime)
1515
    @ivar instances: a list of running instances (runtime)
1516
    @ivar pinst: list of configured primary instances (config)
1517
    @ivar sinst: list of configured secondary instances (config)
1518
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1519
        instances for which this node is secondary (config)
1520
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1521
    @ivar dfree: free disk, as reported by the node (runtime)
1522
    @ivar offline: the offline status (config)
1523
    @type rpc_fail: boolean
1524
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1525
        not whether the individual keys were correct) (runtime)
1526
    @type lvm_fail: boolean
1527
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1528
    @type hyp_fail: boolean
1529
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1530
    @type ghost: boolean
1531
    @ivar ghost: whether this is a known node or not (config)
1532
    @type os_fail: boolean
1533
    @ivar os_fail: whether the RPC call didn't return valid OS data
1534
    @type oslist: list
1535
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1536
    @type vm_capable: boolean
1537
    @ivar vm_capable: whether the node can host instances
1538

1539
    """
1540
    def __init__(self, offline=False, name=None, vm_capable=True):
1541
      self.name = name
1542
      self.volumes = {}
1543
      self.instances = []
1544
      self.pinst = []
1545
      self.sinst = []
1546
      self.sbp = {}
1547
      self.mfree = 0
1548
      self.dfree = 0
1549
      self.offline = offline
1550
      self.vm_capable = vm_capable
1551
      self.rpc_fail = False
1552
      self.lvm_fail = False
1553
      self.hyp_fail = False
1554
      self.ghost = False
1555
      self.os_fail = False
1556
      self.oslist = {}
1557

    
1558
  def ExpandNames(self):
1559
    # This raises errors.OpPrereqError on its own:
1560
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1561

    
1562
    all_node_info = self.cfg.GetAllNodesInfo()
1563
    all_inst_info = self.cfg.GetAllInstancesInfo()
1564

    
1565
    node_names = set(node.name
1566
                     for node in all_node_info.values()
1567
                     if node.group == self.group_uuid)
1568

    
1569
    inst_names = [inst.name
1570
                  for inst in all_inst_info.values()
1571
                  if inst.primary_node in node_names]
1572

    
1573
    # In Exec(), we warn about mirrored instances that have primary and
1574
    # secondary living in separate node groups. To fully verify that
1575
    # volumes for these instances are healthy, we will need to do an
1576
    # extra call to their secondaries. We ensure here those nodes will
1577
    # be locked.
1578
    for inst in inst_names:
1579
      if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1580
        node_names.update(all_inst_info[inst].secondary_nodes)
1581

    
1582
    self.needed_locks = {
1583
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1584
      locking.LEVEL_NODE: list(node_names),
1585
      locking.LEVEL_INSTANCE: inst_names,
1586
    }
1587

    
1588
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1589

    
1590
  def CheckPrereq(self):
1591
    self.all_node_info = self.cfg.GetAllNodesInfo()
1592
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1593

    
1594
    group_nodes = set(node.name
1595
                      for node in self.all_node_info.values()
1596
                      if node.group == self.group_uuid)
1597

    
1598
    group_instances = set(inst.name
1599
                          for inst in self.all_inst_info.values()
1600
                          if inst.primary_node in group_nodes)
1601

    
1602
    unlocked_nodes = \
1603
        group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1604

    
1605
    unlocked_instances = \
1606
        group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
1607

    
1608
    if unlocked_nodes:
1609
      raise errors.OpPrereqError("missing lock for nodes: %s" %
1610
                                 utils.CommaJoin(unlocked_nodes))
1611

    
1612
    if unlocked_instances:
1613
      raise errors.OpPrereqError("missing lock for instances: %s" %
1614
                                 utils.CommaJoin(unlocked_instances))
1615

    
1616
    self.my_node_names = utils.NiceSort(group_nodes)
1617
    self.my_inst_names = utils.NiceSort(group_instances)
1618

    
1619
    self.my_node_info = dict((name, self.all_node_info[name])
1620
                             for name in self.my_node_names)
1621

    
1622
    self.my_inst_info = dict((name, self.all_inst_info[name])
1623
                             for name in self.my_inst_names)
1624

    
1625
    # We detect here the nodes that will need the extra RPC calls for verifying
1626
    # split LV volumes; they should be locked.
1627
    extra_lv_nodes = set()
1628

    
1629
    for inst in self.my_inst_info.values():
1630
      if inst.disk_template in constants.DTS_INT_MIRROR:
1631
        group = self.my_node_info[inst.primary_node].group
1632
        for nname in inst.secondary_nodes:
1633
          if self.all_node_info[nname].group != group:
1634
            extra_lv_nodes.add(nname)
1635

    
1636
    unlocked_lv_nodes = \
1637
        extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1638

    
1639
    if unlocked_lv_nodes:
1640
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1641
                                 utils.CommaJoin(unlocked_lv_nodes))
1642
    self.extra_lv_nodes = list(extra_lv_nodes)
1643

    
1644
  def _VerifyNode(self, ninfo, nresult):
1645
    """Perform some basic validation on data returned from a node.
1646

1647
      - check the result data structure is well formed and has all the
1648
        mandatory fields
1649
      - check ganeti version
1650

1651
    @type ninfo: L{objects.Node}
1652
    @param ninfo: the node to check
1653
    @param nresult: the results from the node
1654
    @rtype: boolean
1655
    @return: whether overall this call was successful (and we can expect
1656
         reasonable values in the respose)
1657

1658
    """
1659
    node = ninfo.name
1660
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1661

    
1662
    # main result, nresult should be a non-empty dict
1663
    test = not nresult or not isinstance(nresult, dict)
1664
    _ErrorIf(test, self.ENODERPC, node,
1665
                  "unable to verify node: no data returned")
1666
    if test:
1667
      return False
1668

    
1669
    # compares ganeti version
1670
    local_version = constants.PROTOCOL_VERSION
1671
    remote_version = nresult.get("version", None)
1672
    test = not (remote_version and
1673
                isinstance(remote_version, (list, tuple)) and
1674
                len(remote_version) == 2)
1675
    _ErrorIf(test, self.ENODERPC, node,
1676
             "connection to node returned invalid data")
1677
    if test:
1678
      return False
1679

    
1680
    test = local_version != remote_version[0]
1681
    _ErrorIf(test, self.ENODEVERSION, node,
1682
             "incompatible protocol versions: master %s,"
1683
             " node %s", local_version, remote_version[0])
1684
    if test:
1685
      return False
1686

    
1687
    # node seems compatible, we can actually try to look into its results
1688

    
1689
    # full package version
1690
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1691
                  self.ENODEVERSION, node,
1692
                  "software version mismatch: master %s, node %s",
1693
                  constants.RELEASE_VERSION, remote_version[1],
1694
                  code=self.ETYPE_WARNING)
1695

    
1696
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1697
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1698
      for hv_name, hv_result in hyp_result.iteritems():
1699
        test = hv_result is not None
1700
        _ErrorIf(test, self.ENODEHV, node,
1701
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1702

    
1703
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1704
    if ninfo.vm_capable and isinstance(hvp_result, list):
1705
      for item, hv_name, hv_result in hvp_result:
1706
        _ErrorIf(True, self.ENODEHV, node,
1707
                 "hypervisor %s parameter verify failure (source %s): %s",
1708
                 hv_name, item, hv_result)
1709

    
1710
    test = nresult.get(constants.NV_NODESETUP,
1711
                       ["Missing NODESETUP results"])
1712
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1713
             "; ".join(test))
1714

    
1715
    return True
1716

    
1717
  def _VerifyNodeTime(self, ninfo, nresult,
1718
                      nvinfo_starttime, nvinfo_endtime):
1719
    """Check the node time.
1720

1721
    @type ninfo: L{objects.Node}
1722
    @param ninfo: the node to check
1723
    @param nresult: the remote results for the node
1724
    @param nvinfo_starttime: the start time of the RPC call
1725
    @param nvinfo_endtime: the end time of the RPC call
1726

1727
    """
1728
    node = ninfo.name
1729
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1730

    
1731
    ntime = nresult.get(constants.NV_TIME, None)
1732
    try:
1733
      ntime_merged = utils.MergeTime(ntime)
1734
    except (ValueError, TypeError):
1735
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1736
      return
1737

    
1738
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1739
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1740
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1741
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1742
    else:
1743
      ntime_diff = None
1744

    
1745
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1746
             "Node time diverges by at least %s from master node time",
1747
             ntime_diff)
1748

    
1749
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1750
    """Check the node LVM results.
1751

1752
    @type ninfo: L{objects.Node}
1753
    @param ninfo: the node to check
1754
    @param nresult: the remote results for the node
1755
    @param vg_name: the configured VG name
1756

1757
    """
1758
    if vg_name is None:
1759
      return
1760

    
1761
    node = ninfo.name
1762
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1763

    
1764
    # checks vg existence and size > 20G
1765
    vglist = nresult.get(constants.NV_VGLIST, None)
1766
    test = not vglist
1767
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1768
    if not test:
1769
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1770
                                            constants.MIN_VG_SIZE)
1771
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1772

    
1773
    # check pv names
1774
    pvlist = nresult.get(constants.NV_PVLIST, None)
1775
    test = pvlist is None
1776
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1777
    if not test:
1778
      # check that ':' is not present in PV names, since it's a
1779
      # special character for lvcreate (denotes the range of PEs to
1780
      # use on the PV)
1781
      for _, pvname, owner_vg in pvlist:
1782
        test = ":" in pvname
1783
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1784
                 " '%s' of VG '%s'", pvname, owner_vg)
1785

    
1786
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1787
    """Check the node bridges.
1788

1789
    @type ninfo: L{objects.Node}
1790
    @param ninfo: the node to check
1791
    @param nresult: the remote results for the node
1792
    @param bridges: the expected list of bridges
1793

1794
    """
1795
    if not bridges:
1796
      return
1797

    
1798
    node = ninfo.name
1799
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1800

    
1801
    missing = nresult.get(constants.NV_BRIDGES, None)
1802
    test = not isinstance(missing, list)
1803
    _ErrorIf(test, self.ENODENET, node,
1804
             "did not return valid bridge information")
1805
    if not test:
1806
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1807
               utils.CommaJoin(sorted(missing)))
1808

    
1809
  def _VerifyNodeNetwork(self, ninfo, nresult):
1810
    """Check the node network connectivity results.
1811

1812
    @type ninfo: L{objects.Node}
1813
    @param ninfo: the node to check
1814
    @param nresult: the remote results for the node
1815

1816
    """
1817
    node = ninfo.name
1818
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1819

    
1820
    test = constants.NV_NODELIST not in nresult
1821
    _ErrorIf(test, self.ENODESSH, node,
1822
             "node hasn't returned node ssh connectivity data")
1823
    if not test:
1824
      if nresult[constants.NV_NODELIST]:
1825
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1826
          _ErrorIf(True, self.ENODESSH, node,
1827
                   "ssh communication with node '%s': %s", a_node, a_msg)
1828

    
1829
    test = constants.NV_NODENETTEST not in nresult
1830
    _ErrorIf(test, self.ENODENET, node,
1831
             "node hasn't returned node tcp connectivity data")
1832
    if not test:
1833
      if nresult[constants.NV_NODENETTEST]:
1834
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1835
        for anode in nlist:
1836
          _ErrorIf(True, self.ENODENET, node,
1837
                   "tcp communication with node '%s': %s",
1838
                   anode, nresult[constants.NV_NODENETTEST][anode])
1839

    
1840
    test = constants.NV_MASTERIP not in nresult
1841
    _ErrorIf(test, self.ENODENET, node,
1842
             "node hasn't returned node master IP reachability data")
1843
    if not test:
1844
      if not nresult[constants.NV_MASTERIP]:
1845
        if node == self.master_node:
1846
          msg = "the master node cannot reach the master IP (not configured?)"
1847
        else:
1848
          msg = "cannot reach the master IP"
1849
        _ErrorIf(True, self.ENODENET, node, msg)
1850

    
1851
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1852
                      diskstatus):
1853
    """Verify an instance.
1854

1855
    This function checks to see if the required block devices are
1856
    available on the instance's node.
1857

1858
    """
1859
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1860
    node_current = instanceconfig.primary_node
1861

    
1862
    node_vol_should = {}
1863
    instanceconfig.MapLVsByNode(node_vol_should)
1864

    
1865
    for node in node_vol_should:
1866
      n_img = node_image[node]
1867
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1868
        # ignore missing volumes on offline or broken nodes
1869
        continue
1870
      for volume in node_vol_should[node]:
1871
        test = volume not in n_img.volumes
1872
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1873
                 "volume %s missing on node %s", volume, node)
1874

    
1875
    if instanceconfig.admin_up:
1876
      pri_img = node_image[node_current]
1877
      test = instance not in pri_img.instances and not pri_img.offline
1878
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1879
               "instance not running on its primary node %s",
1880
               node_current)
1881

    
1882
    diskdata = [(nname, success, status, idx)
1883
                for (nname, disks) in diskstatus.items()
1884
                for idx, (success, status) in enumerate(disks)]
1885

    
1886
    for nname, success, bdev_status, idx in diskdata:
1887
      # the 'ghost node' construction in Exec() ensures that we have a
1888
      # node here
1889
      snode = node_image[nname]
1890
      bad_snode = snode.ghost or snode.offline
1891
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1892
               self.EINSTANCEFAULTYDISK, instance,
1893
               "couldn't retrieve status for disk/%s on %s: %s",
1894
               idx, nname, bdev_status)
1895
      _ErrorIf((instanceconfig.admin_up and success and
1896
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1897
               self.EINSTANCEFAULTYDISK, instance,
1898
               "disk/%s on %s is faulty", idx, nname)
1899

    
1900
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1901
    """Verify if there are any unknown volumes in the cluster.
1902

1903
    The .os, .swap and backup volumes are ignored. All other volumes are
1904
    reported as unknown.
1905

1906
    @type reserved: L{ganeti.utils.FieldSet}
1907
    @param reserved: a FieldSet of reserved volume names
1908

1909
    """
1910
    for node, n_img in node_image.items():
1911
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1912
        # skip non-healthy nodes
1913
        continue
1914
      for volume in n_img.volumes:
1915
        test = ((node not in node_vol_should or
1916
                volume not in node_vol_should[node]) and
1917
                not reserved.Matches(volume))
1918
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1919
                      "volume %s is unknown", volume)
1920

    
1921
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1922
    """Verify N+1 Memory Resilience.
1923

1924
    Check that if one single node dies we can still start all the
1925
    instances it was primary for.
1926

1927
    """
1928
    cluster_info = self.cfg.GetClusterInfo()
1929
    for node, n_img in node_image.items():
1930
      # This code checks that every node which is now listed as
1931
      # secondary has enough memory to host all instances it is
1932
      # supposed to should a single other node in the cluster fail.
1933
      # FIXME: not ready for failover to an arbitrary node
1934
      # FIXME: does not support file-backed instances
1935
      # WARNING: we currently take into account down instances as well
1936
      # as up ones, considering that even if they're down someone
1937
      # might want to start them even in the event of a node failure.
1938
      if n_img.offline:
1939
        # we're skipping offline nodes from the N+1 warning, since
1940
        # most likely we don't have good memory infromation from them;
1941
        # we already list instances living on such nodes, and that's
1942
        # enough warning
1943
        continue
1944
      for prinode, instances in n_img.sbp.items():
1945
        needed_mem = 0
1946
        for instance in instances:
1947
          bep = cluster_info.FillBE(instance_cfg[instance])
1948
          if bep[constants.BE_AUTO_BALANCE]:
1949
            needed_mem += bep[constants.BE_MEMORY]
1950
        test = n_img.mfree < needed_mem
1951
        self._ErrorIf(test, self.ENODEN1, node,
1952
                      "not enough memory to accomodate instance failovers"
1953
                      " should node %s fail (%dMiB needed, %dMiB available)",
1954
                      prinode, needed_mem, n_img.mfree)
1955

    
1956
  @classmethod
1957
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1958
                   (files_all, files_all_opt, files_mc, files_vm)):
1959
    """Verifies file checksums collected from all nodes.
1960

1961
    @param errorif: Callback for reporting errors
1962
    @param nodeinfo: List of L{objects.Node} objects
1963
    @param master_node: Name of master node
1964
    @param all_nvinfo: RPC results
1965

1966
    """
1967
    node_names = frozenset(node.name for node in nodeinfo)
1968

    
1969
    assert master_node in node_names
1970
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1971
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1972
           "Found file listed in more than one file list"
1973

    
1974
    # Define functions determining which nodes to consider for a file
1975
    file2nodefn = dict([(filename, fn)
1976
      for (files, fn) in [(files_all, None),
1977
                          (files_all_opt, None),
1978
                          (files_mc, lambda node: (node.master_candidate or
1979
                                                   node.name == master_node)),
1980
                          (files_vm, lambda node: node.vm_capable)]
1981
      for filename in files])
1982

    
1983
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1984

    
1985
    for node in nodeinfo:
1986
      nresult = all_nvinfo[node.name]
1987

    
1988
      if nresult.fail_msg or not nresult.payload:
1989
        node_files = None
1990
      else:
1991
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
1992

    
1993
      test = not (node_files and isinstance(node_files, dict))
1994
      errorif(test, cls.ENODEFILECHECK, node.name,
1995
              "Node did not return file checksum data")
1996
      if test:
1997
        continue
1998

    
1999
      for (filename, checksum) in node_files.items():
2000
        # Check if the file should be considered for a node
2001
        fn = file2nodefn[filename]
2002
        if fn is None or fn(node):
2003
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
2004

    
2005
    for (filename, checksums) in fileinfo.items():
2006
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2007

    
2008
      # Nodes having the file
2009
      with_file = frozenset(node_name
2010
                            for nodes in fileinfo[filename].values()
2011
                            for node_name in nodes)
2012

    
2013
      # Nodes missing file
2014
      missing_file = node_names - with_file
2015

    
2016
      if filename in files_all_opt:
2017
        # All or no nodes
2018
        errorif(missing_file and missing_file != node_names,
2019
                cls.ECLUSTERFILECHECK, None,
2020
                "File %s is optional, but it must exist on all or no nodes (not"
2021
                " found on %s)",
2022
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2023
      else:
2024
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2025
                "File %s is missing from node(s) %s", filename,
2026
                utils.CommaJoin(utils.NiceSort(missing_file)))
2027

    
2028
      # See if there are multiple versions of the file
2029
      test = len(checksums) > 1
2030
      if test:
2031
        variants = ["variant %s on %s" %
2032
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2033
                    for (idx, (checksum, nodes)) in
2034
                      enumerate(sorted(checksums.items()))]
2035
      else:
2036
        variants = []
2037

    
2038
      errorif(test, cls.ECLUSTERFILECHECK, None,
2039
              "File %s found with %s different checksums (%s)",
2040
              filename, len(checksums), "; ".join(variants))
2041

    
2042
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2043
                      drbd_map):
2044
    """Verifies and the node DRBD status.
2045

2046
    @type ninfo: L{objects.Node}
2047
    @param ninfo: the node to check
2048
    @param nresult: the remote results for the node
2049
    @param instanceinfo: the dict of instances
2050
    @param drbd_helper: the configured DRBD usermode helper
2051
    @param drbd_map: the DRBD map as returned by
2052
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2053

2054
    """
2055
    node = ninfo.name
2056
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2057

    
2058
    if drbd_helper:
2059
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2060
      test = (helper_result == None)
2061
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2062
               "no drbd usermode helper returned")
2063
      if helper_result:
2064
        status, payload = helper_result
2065
        test = not status
2066
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2067
                 "drbd usermode helper check unsuccessful: %s", payload)
2068
        test = status and (payload != drbd_helper)
2069
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2070
                 "wrong drbd usermode helper: %s", payload)
2071

    
2072
    # compute the DRBD minors
2073
    node_drbd = {}
2074
    for minor, instance in drbd_map[node].items():
2075
      test = instance not in instanceinfo
2076
      _ErrorIf(test, self.ECLUSTERCFG, None,
2077
               "ghost instance '%s' in temporary DRBD map", instance)
2078
        # ghost instance should not be running, but otherwise we
2079
        # don't give double warnings (both ghost instance and
2080
        # unallocated minor in use)
2081
      if test:
2082
        node_drbd[minor] = (instance, False)
2083
      else:
2084
        instance = instanceinfo[instance]
2085
        node_drbd[minor] = (instance.name, instance.admin_up)
2086

    
2087
    # and now check them
2088
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2089
    test = not isinstance(used_minors, (tuple, list))
2090
    _ErrorIf(test, self.ENODEDRBD, node,
2091
             "cannot parse drbd status file: %s", str(used_minors))
2092
    if test:
2093
      # we cannot check drbd status
2094
      return
2095

    
2096
    for minor, (iname, must_exist) in node_drbd.items():
2097
      test = minor not in used_minors and must_exist
2098
      _ErrorIf(test, self.ENODEDRBD, node,
2099
               "drbd minor %d of instance %s is not active", minor, iname)
2100
    for minor in used_minors:
2101
      test = minor not in node_drbd
2102
      _ErrorIf(test, self.ENODEDRBD, node,
2103
               "unallocated drbd minor %d is in use", minor)
2104

    
2105
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2106
    """Builds the node OS structures.
2107

2108
    @type ninfo: L{objects.Node}
2109
    @param ninfo: the node to check
2110
    @param nresult: the remote results for the node
2111
    @param nimg: the node image object
2112

2113
    """
2114
    node = ninfo.name
2115
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2116

    
2117
    remote_os = nresult.get(constants.NV_OSLIST, None)
2118
    test = (not isinstance(remote_os, list) or
2119
            not compat.all(isinstance(v, list) and len(v) == 7
2120
                           for v in remote_os))
2121

    
2122
    _ErrorIf(test, self.ENODEOS, node,
2123
             "node hasn't returned valid OS data")
2124

    
2125
    nimg.os_fail = test
2126

    
2127
    if test:
2128
      return
2129

    
2130
    os_dict = {}
2131

    
2132
    for (name, os_path, status, diagnose,
2133
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2134

    
2135
      if name not in os_dict:
2136
        os_dict[name] = []
2137

    
2138
      # parameters is a list of lists instead of list of tuples due to
2139
      # JSON lacking a real tuple type, fix it:
2140
      parameters = [tuple(v) for v in parameters]
2141
      os_dict[name].append((os_path, status, diagnose,
2142
                            set(variants), set(parameters), set(api_ver)))
2143

    
2144
    nimg.oslist = os_dict
2145

    
2146
  def _VerifyNodeOS(self, ninfo, nimg, base):
2147
    """Verifies the node OS list.
2148

2149
    @type ninfo: L{objects.Node}
2150
    @param ninfo: the node to check
2151
    @param nimg: the node image object
2152
    @param base: the 'template' node we match against (e.g. from the master)
2153

2154
    """
2155
    node = ninfo.name
2156
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2157

    
2158
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2159

    
2160
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2161
    for os_name, os_data in nimg.oslist.items():
2162
      assert os_data, "Empty OS status for OS %s?!" % os_name
2163
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2164
      _ErrorIf(not f_status, self.ENODEOS, node,
2165
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2166
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2167
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2168
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2169
      # this will catched in backend too
2170
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
2171
               and not f_var, self.ENODEOS, node,
2172
               "OS %s with API at least %d does not declare any variant",
2173
               os_name, constants.OS_API_V15)
2174
      # comparisons with the 'base' image
2175
      test = os_name not in base.oslist
2176
      _ErrorIf(test, self.ENODEOS, node,
2177
               "Extra OS %s not present on reference node (%s)",
2178
               os_name, base.name)
2179
      if test:
2180
        continue
2181
      assert base.oslist[os_name], "Base node has empty OS status?"
2182
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2183
      if not b_status:
2184
        # base OS is invalid, skipping
2185
        continue
2186
      for kind, a, b in [("API version", f_api, b_api),
2187
                         ("variants list", f_var, b_var),
2188
                         ("parameters", beautify_params(f_param),
2189
                          beautify_params(b_param))]:
2190
        _ErrorIf(a != b, self.ENODEOS, node,
2191
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2192
                 kind, os_name, base.name,
2193
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2194

    
2195
    # check any missing OSes
2196
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2197
    _ErrorIf(missing, self.ENODEOS, node,
2198
             "OSes present on reference node %s but missing on this node: %s",
2199
             base.name, utils.CommaJoin(missing))
2200

    
2201
  def _VerifyOob(self, ninfo, nresult):
2202
    """Verifies out of band functionality of a node.
2203

2204
    @type ninfo: L{objects.Node}
2205
    @param ninfo: the node to check
2206
    @param nresult: the remote results for the node
2207

2208
    """
2209
    node = ninfo.name
2210
    # We just have to verify the paths on master and/or master candidates
2211
    # as the oob helper is invoked on the master
2212
    if ((ninfo.master_candidate or ninfo.master_capable) and
2213
        constants.NV_OOB_PATHS in nresult):
2214
      for path_result in nresult[constants.NV_OOB_PATHS]:
2215
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2216

    
2217
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2218
    """Verifies and updates the node volume data.
2219

2220
    This function will update a L{NodeImage}'s internal structures
2221
    with data from the remote call.
2222

2223
    @type ninfo: L{objects.Node}
2224
    @param ninfo: the node to check
2225
    @param nresult: the remote results for the node
2226
    @param nimg: the node image object
2227
    @param vg_name: the configured VG name
2228

2229
    """
2230
    node = ninfo.name
2231
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2232

    
2233
    nimg.lvm_fail = True
2234
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2235
    if vg_name is None:
2236
      pass
2237
    elif isinstance(lvdata, basestring):
2238
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2239
               utils.SafeEncode(lvdata))
2240
    elif not isinstance(lvdata, dict):
2241
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2242
    else:
2243
      nimg.volumes = lvdata
2244
      nimg.lvm_fail = False
2245

    
2246
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2247
    """Verifies and updates the node instance list.
2248

2249
    If the listing was successful, then updates this node's instance
2250
    list. Otherwise, it marks the RPC call as failed for the instance
2251
    list key.
2252

2253
    @type ninfo: L{objects.Node}
2254
    @param ninfo: the node to check
2255
    @param nresult: the remote results for the node
2256
    @param nimg: the node image object
2257

2258
    """
2259
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2260
    test = not isinstance(idata, list)
2261
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2262
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2263
    if test:
2264
      nimg.hyp_fail = True
2265
    else:
2266
      nimg.instances = idata
2267

    
2268
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2269
    """Verifies and computes a node information map
2270

2271
    @type ninfo: L{objects.Node}
2272
    @param ninfo: the node to check
2273
    @param nresult: the remote results for the node
2274
    @param nimg: the node image object
2275
    @param vg_name: the configured VG name
2276

2277
    """
2278
    node = ninfo.name
2279
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2280

    
2281
    # try to read free memory (from the hypervisor)
2282
    hv_info = nresult.get(constants.NV_HVINFO, None)
2283
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2284
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2285
    if not test:
2286
      try:
2287
        nimg.mfree = int(hv_info["memory_free"])
2288
      except (ValueError, TypeError):
2289
        _ErrorIf(True, self.ENODERPC, node,
2290
                 "node returned invalid nodeinfo, check hypervisor")
2291

    
2292
    # FIXME: devise a free space model for file based instances as well
2293
    if vg_name is not None:
2294
      test = (constants.NV_VGLIST not in nresult or
2295
              vg_name not in nresult[constants.NV_VGLIST])
2296
      _ErrorIf(test, self.ENODELVM, node,
2297
               "node didn't return data for the volume group '%s'"
2298
               " - it is either missing or broken", vg_name)
2299
      if not test:
2300
        try:
2301
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2302
        except (ValueError, TypeError):
2303
          _ErrorIf(True, self.ENODERPC, node,
2304
                   "node returned invalid LVM info, check LVM status")
2305

    
2306
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2307
    """Gets per-disk status information for all instances.
2308

2309
    @type nodelist: list of strings
2310
    @param nodelist: Node names
2311
    @type node_image: dict of (name, L{objects.Node})
2312
    @param node_image: Node objects
2313
    @type instanceinfo: dict of (name, L{objects.Instance})
2314
    @param instanceinfo: Instance objects
2315
    @rtype: {instance: {node: [(succes, payload)]}}
2316
    @return: a dictionary of per-instance dictionaries with nodes as
2317
        keys and disk information as values; the disk information is a
2318
        list of tuples (success, payload)
2319

2320
    """
2321
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2322

    
2323
    node_disks = {}
2324
    node_disks_devonly = {}
2325
    diskless_instances = set()
2326
    diskless = constants.DT_DISKLESS
2327

    
2328
    for nname in nodelist:
2329
      node_instances = list(itertools.chain(node_image[nname].pinst,
2330
                                            node_image[nname].sinst))
2331
      diskless_instances.update(inst for inst in node_instances
2332
                                if instanceinfo[inst].disk_template == diskless)
2333
      disks = [(inst, disk)
2334
               for inst in node_instances
2335
               for disk in instanceinfo[inst].disks]
2336

    
2337
      if not disks:
2338
        # No need to collect data
2339
        continue
2340

    
2341
      node_disks[nname] = disks
2342

    
2343
      # Creating copies as SetDiskID below will modify the objects and that can
2344
      # lead to incorrect data returned from nodes
2345
      devonly = [dev.Copy() for (_, dev) in disks]
2346

    
2347
      for dev in devonly:
2348
        self.cfg.SetDiskID(dev, nname)
2349

    
2350
      node_disks_devonly[nname] = devonly
2351

    
2352
    assert len(node_disks) == len(node_disks_devonly)
2353

    
2354
    # Collect data from all nodes with disks
2355
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2356
                                                          node_disks_devonly)
2357

    
2358
    assert len(result) == len(node_disks)
2359

    
2360
    instdisk = {}
2361

    
2362
    for (nname, nres) in result.items():
2363
      disks = node_disks[nname]
2364

    
2365
      if nres.offline:
2366
        # No data from this node
2367
        data = len(disks) * [(False, "node offline")]
2368
      else:
2369
        msg = nres.fail_msg
2370
        _ErrorIf(msg, self.ENODERPC, nname,
2371
                 "while getting disk information: %s", msg)
2372
        if msg:
2373
          # No data from this node
2374
          data = len(disks) * [(False, msg)]
2375
        else:
2376
          data = []
2377
          for idx, i in enumerate(nres.payload):
2378
            if isinstance(i, (tuple, list)) and len(i) == 2:
2379
              data.append(i)
2380
            else:
2381
              logging.warning("Invalid result from node %s, entry %d: %s",
2382
                              nname, idx, i)
2383
              data.append((False, "Invalid result from the remote node"))
2384

    
2385
      for ((inst, _), status) in zip(disks, data):
2386
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2387

    
2388
    # Add empty entries for diskless instances.
2389
    for inst in diskless_instances:
2390
      assert inst not in instdisk
2391
      instdisk[inst] = {}
2392

    
2393
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2394
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2395
                      compat.all(isinstance(s, (tuple, list)) and
2396
                                 len(s) == 2 for s in statuses)
2397
                      for inst, nnames in instdisk.items()
2398
                      for nname, statuses in nnames.items())
2399
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2400

    
2401
    return instdisk
2402

    
2403
  def BuildHooksEnv(self):
2404
    """Build hooks env.
2405

2406
    Cluster-Verify hooks just ran in the post phase and their failure makes
2407
    the output be logged in the verify output and the verification to fail.
2408

2409
    """
2410
    env = {
2411
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2412
      }
2413

    
2414
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2415
               for node in self.my_node_info.values())
2416

    
2417
    return env
2418

    
2419
  def BuildHooksNodes(self):
2420
    """Build hooks nodes.
2421

2422
    """
2423
    assert self.my_node_names, ("Node list not gathered,"
2424
      " has CheckPrereq been executed?")
2425
    return ([], self.my_node_names)
2426

    
2427
  def Exec(self, feedback_fn):
2428
    """Verify integrity of the node group, performing various test on nodes.
2429

2430
    """
2431
    # This method has too many local variables. pylint: disable-msg=R0914
2432
    self.bad = False
2433
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2434
    verbose = self.op.verbose
2435
    self._feedback_fn = feedback_fn
2436

    
2437
    vg_name = self.cfg.GetVGName()
2438
    drbd_helper = self.cfg.GetDRBDHelper()
2439
    cluster = self.cfg.GetClusterInfo()
2440
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2441
    hypervisors = cluster.enabled_hypervisors
2442
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2443

    
2444
    i_non_redundant = [] # Non redundant instances
2445
    i_non_a_balanced = [] # Non auto-balanced instances
2446
    n_offline = 0 # Count of offline nodes
2447
    n_drained = 0 # Count of nodes being drained
2448
    node_vol_should = {}
2449

    
2450
    # FIXME: verify OS list
2451

    
2452
    # File verification
2453
    filemap = _ComputeAncillaryFiles(cluster, False)
2454

    
2455
    # do local checksums
2456
    master_node = self.master_node = self.cfg.GetMasterNode()
2457
    master_ip = self.cfg.GetMasterIP()
2458

    
2459
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2460

    
2461
    # We will make nodes contact all nodes in their group, and one node from
2462
    # every other group.
2463
    # TODO: should it be a *random* node, different every time?
2464
    online_nodes = [node.name for node in node_data_list if not node.offline]
2465
    other_group_nodes = {}
2466

    
2467
    for name in sorted(self.all_node_info):
2468
      node = self.all_node_info[name]
2469
      if (node.group not in other_group_nodes
2470
          and node.group != self.group_uuid
2471
          and not node.offline):
2472
        other_group_nodes[node.group] = node.name
2473

    
2474
    node_verify_param = {
2475
      constants.NV_FILELIST:
2476
        utils.UniqueSequence(filename
2477
                             for files in filemap
2478
                             for filename in files),
2479
      constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2480
      constants.NV_HYPERVISOR: hypervisors,
2481
      constants.NV_HVPARAMS:
2482
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2483
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2484
                                 for node in node_data_list
2485
                                 if not node.offline],
2486
      constants.NV_INSTANCELIST: hypervisors,
2487
      constants.NV_VERSION: None,
2488
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2489
      constants.NV_NODESETUP: None,
2490
      constants.NV_TIME: None,
2491
      constants.NV_MASTERIP: (master_node, master_ip),
2492
      constants.NV_OSLIST: None,
2493
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2494
      }
2495

    
2496
    if vg_name is not None:
2497
      node_verify_param[constants.NV_VGLIST] = None
2498
      node_verify_param[constants.NV_LVLIST] = vg_name
2499
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2500
      node_verify_param[constants.NV_DRBDLIST] = None
2501

    
2502
    if drbd_helper:
2503
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2504

    
2505
    # bridge checks
2506
    # FIXME: this needs to be changed per node-group, not cluster-wide
2507
    bridges = set()
2508
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2509
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2510
      bridges.add(default_nicpp[constants.NIC_LINK])
2511
    for instance in self.my_inst_info.values():
2512
      for nic in instance.nics:
2513
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2514
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2515
          bridges.add(full_nic[constants.NIC_LINK])
2516

    
2517
    if bridges:
2518
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2519

    
2520
    # Build our expected cluster state
2521
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2522
                                                 name=node.name,
2523
                                                 vm_capable=node.vm_capable))
2524
                      for node in node_data_list)
2525

    
2526
    # Gather OOB paths
2527
    oob_paths = []
2528
    for node in self.all_node_info.values():
2529
      path = _SupportsOob(self.cfg, node)
2530
      if path and path not in oob_paths:
2531
        oob_paths.append(path)
2532

    
2533
    if oob_paths:
2534
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2535

    
2536
    for instance in self.my_inst_names:
2537
      inst_config = self.my_inst_info[instance]
2538

    
2539
      for nname in inst_config.all_nodes:
2540
        if nname not in node_image:
2541
          gnode = self.NodeImage(name=nname)
2542
          gnode.ghost = (nname not in self.all_node_info)
2543
          node_image[nname] = gnode
2544

    
2545
      inst_config.MapLVsByNode(node_vol_should)
2546

    
2547
      pnode = inst_config.primary_node
2548
      node_image[pnode].pinst.append(instance)
2549

    
2550
      for snode in inst_config.secondary_nodes:
2551
        nimg = node_image[snode]
2552
        nimg.sinst.append(instance)
2553
        if pnode not in nimg.sbp:
2554
          nimg.sbp[pnode] = []
2555
        nimg.sbp[pnode].append(instance)
2556

    
2557
    # At this point, we have the in-memory data structures complete,
2558
    # except for the runtime information, which we'll gather next
2559

    
2560
    # Due to the way our RPC system works, exact response times cannot be
2561
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2562
    # time before and after executing the request, we can at least have a time
2563
    # window.
2564
    nvinfo_starttime = time.time()
2565
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2566
                                           node_verify_param,
2567
                                           self.cfg.GetClusterName())
2568
    if self.extra_lv_nodes and vg_name is not None:
2569
      extra_lv_nvinfo = \
2570
          self.rpc.call_node_verify(self.extra_lv_nodes,
2571
                                    {constants.NV_LVLIST: vg_name},
2572
                                    self.cfg.GetClusterName())
2573
    else:
2574
      extra_lv_nvinfo = {}
2575
    nvinfo_endtime = time.time()
2576

    
2577
    all_drbd_map = self.cfg.ComputeDRBDMap()
2578

    
2579
    feedback_fn("* Gathering disk information (%s nodes)" %
2580
                len(self.my_node_names))
2581
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2582
                                     self.my_inst_info)
2583

    
2584
    feedback_fn("* Verifying configuration file consistency")
2585

    
2586
    # If not all nodes are being checked, we need to make sure the master node
2587
    # and a non-checked vm_capable node are in the list.
2588
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2589
    if absent_nodes:
2590
      vf_nvinfo = all_nvinfo.copy()
2591
      vf_node_info = list(self.my_node_info.values())
2592
      additional_nodes = []
2593
      if master_node not in self.my_node_info:
2594
        additional_nodes.append(master_node)
2595
        vf_node_info.append(self.all_node_info[master_node])
2596
      # Add the first vm_capable node we find which is not included
2597
      for node in absent_nodes:
2598
        nodeinfo = self.all_node_info[node]
2599
        if nodeinfo.vm_capable and not nodeinfo.offline:
2600
          additional_nodes.append(node)
2601
          vf_node_info.append(self.all_node_info[node])
2602
          break
2603
      key = constants.NV_FILELIST
2604
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2605
                                                 {key: node_verify_param[key]},
2606
                                                 self.cfg.GetClusterName()))
2607
    else:
2608
      vf_nvinfo = all_nvinfo
2609
      vf_node_info = self.my_node_info.values()
2610

    
2611
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2612

    
2613
    feedback_fn("* Verifying node status")
2614

    
2615
    refos_img = None
2616

    
2617
    for node_i in node_data_list:
2618
      node = node_i.name
2619
      nimg = node_image[node]
2620

    
2621
      if node_i.offline:
2622
        if verbose:
2623
          feedback_fn("* Skipping offline node %s" % (node,))
2624
        n_offline += 1
2625
        continue
2626

    
2627
      if node == master_node:
2628
        ntype = "master"
2629
      elif node_i.master_candidate:
2630
        ntype = "master candidate"
2631
      elif node_i.drained:
2632
        ntype = "drained"
2633
        n_drained += 1
2634
      else:
2635
        ntype = "regular"
2636
      if verbose:
2637
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2638

    
2639
      msg = all_nvinfo[node].fail_msg
2640
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2641
      if msg:
2642
        nimg.rpc_fail = True
2643
        continue
2644

    
2645
      nresult = all_nvinfo[node].payload
2646

    
2647
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2648
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2649
      self._VerifyNodeNetwork(node_i, nresult)
2650
      self._VerifyOob(node_i, nresult)
2651

    
2652
      if nimg.vm_capable:
2653
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2654
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2655
                             all_drbd_map)
2656

    
2657
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2658
        self._UpdateNodeInstances(node_i, nresult, nimg)
2659
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2660
        self._UpdateNodeOS(node_i, nresult, nimg)
2661

    
2662
        if not nimg.os_fail:
2663
          if refos_img is None:
2664
            refos_img = nimg
2665
          self._VerifyNodeOS(node_i, nimg, refos_img)
2666
        self._VerifyNodeBridges(node_i, nresult, bridges)
2667

    
2668
        # Check whether all running instancies are primary for the node. (This
2669
        # can no longer be done from _VerifyInstance below, since some of the
2670
        # wrong instances could be from other node groups.)
2671
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2672

    
2673
        for inst in non_primary_inst:
2674
          test = inst in self.all_inst_info
2675
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2676
                   "instance should not run on node %s", node_i.name)
2677
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2678
                   "node is running unknown instance %s", inst)
2679

    
2680
    for node, result in extra_lv_nvinfo.items():
2681
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2682
                              node_image[node], vg_name)
2683

    
2684
    feedback_fn("* Verifying instance status")
2685
    for instance in self.my_inst_names:
2686
      if verbose:
2687
        feedback_fn("* Verifying instance %s" % instance)
2688
      inst_config = self.my_inst_info[instance]
2689
      self._VerifyInstance(instance, inst_config, node_image,
2690
                           instdisk[instance])
2691
      inst_nodes_offline = []
2692

    
2693
      pnode = inst_config.primary_node
2694
      pnode_img = node_image[pnode]
2695
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2696
               self.ENODERPC, pnode, "instance %s, connection to"
2697
               " primary node failed", instance)
2698

    
2699
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2700
               self.EINSTANCEBADNODE, instance,
2701
               "instance is marked as running and lives on offline node %s",
2702
               inst_config.primary_node)
2703

    
2704
      # If the instance is non-redundant we cannot survive losing its primary
2705
      # node, so we are not N+1 compliant. On the other hand we have no disk
2706
      # templates with more than one secondary so that situation is not well
2707
      # supported either.
2708
      # FIXME: does not support file-backed instances
2709
      if not inst_config.secondary_nodes:
2710
        i_non_redundant.append(instance)
2711

    
2712
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2713
               instance, "instance has multiple secondary nodes: %s",
2714
               utils.CommaJoin(inst_config.secondary_nodes),
2715
               code=self.ETYPE_WARNING)
2716

    
2717
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2718
        pnode = inst_config.primary_node
2719
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2720
        instance_groups = {}
2721

    
2722
        for node in instance_nodes:
2723
          instance_groups.setdefault(self.all_node_info[node].group,
2724
                                     []).append(node)
2725

    
2726
        pretty_list = [
2727
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2728
          # Sort so that we always list the primary node first.
2729
          for group, nodes in sorted(instance_groups.items(),
2730
                                     key=lambda (_, nodes): pnode in nodes,
2731
                                     reverse=True)]
2732

    
2733
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2734
                      instance, "instance has primary and secondary nodes in"
2735
                      " different groups: %s", utils.CommaJoin(pretty_list),
2736
                      code=self.ETYPE_WARNING)
2737

    
2738
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2739
        i_non_a_balanced.append(instance)
2740

    
2741
      for snode in inst_config.secondary_nodes:
2742
        s_img = node_image[snode]
2743
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2744
                 "instance %s, connection to secondary node failed", instance)
2745

    
2746
        if s_img.offline:
2747
          inst_nodes_offline.append(snode)
2748

    
2749
      # warn that the instance lives on offline nodes
2750
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2751
               "instance has offline secondary node(s) %s",
2752
               utils.CommaJoin(inst_nodes_offline))
2753
      # ... or ghost/non-vm_capable nodes
2754
      for node in inst_config.all_nodes:
2755
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2756
                 "instance lives on ghost node %s", node)
2757
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2758
                 instance, "instance lives on non-vm_capable node %s", node)
2759

    
2760
    feedback_fn("* Verifying orphan volumes")
2761
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2762

    
2763
    # We will get spurious "unknown volume" warnings if any node of this group
2764
    # is secondary for an instance whose primary is in another group. To avoid
2765
    # them, we find these instances and add their volumes to node_vol_should.
2766
    for inst in self.all_inst_info.values():
2767
      for secondary in inst.secondary_nodes:
2768
        if (secondary in self.my_node_info
2769
            and inst.name not in self.my_inst_info):
2770
          inst.MapLVsByNode(node_vol_should)
2771
          break
2772

    
2773
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2774

    
2775
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2776
      feedback_fn("* Verifying N+1 Memory redundancy")
2777
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2778

    
2779
    feedback_fn("* Other Notes")
2780
    if i_non_redundant:
2781
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2782
                  % len(i_non_redundant))
2783

    
2784
    if i_non_a_balanced:
2785
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2786
                  % len(i_non_a_balanced))
2787

    
2788
    if n_offline:
2789
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2790

    
2791
    if n_drained:
2792
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2793

    
2794
    return not self.bad
2795

    
2796
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2797
    """Analyze the post-hooks' result
2798

2799
    This method analyses the hook result, handles it, and sends some
2800
    nicely-formatted feedback back to the user.
2801

2802
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2803
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2804
    @param hooks_results: the results of the multi-node hooks rpc call
2805
    @param feedback_fn: function used send feedback back to the caller
2806
    @param lu_result: previous Exec result
2807
    @return: the new Exec result, based on the previous result
2808
        and hook results
2809

2810
    """
2811
    # We only really run POST phase hooks, and are only interested in
2812
    # their results
2813
    if phase == constants.HOOKS_PHASE_POST:
2814
      # Used to change hooks' output to proper indentation
2815
      feedback_fn("* Hooks Results")
2816
      assert hooks_results, "invalid result from hooks"
2817

    
2818
      for node_name in hooks_results:
2819
        res = hooks_results[node_name]
2820
        msg = res.fail_msg
2821
        test = msg and not res.offline
2822
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2823
                      "Communication failure in hooks execution: %s", msg)
2824
        if res.offline or msg:
2825
          # No need to investigate payload if node is offline or gave an error.
2826
          # override manually lu_result here as _ErrorIf only
2827
          # overrides self.bad
2828
          lu_result = 1
2829
          continue
2830
        for script, hkr, output in res.payload:
2831
          test = hkr == constants.HKR_FAIL
2832
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2833
                        "Script %s failed, output:", script)
2834
          if test:
2835
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2836
            feedback_fn("%s" % output)
2837
            lu_result = 0
2838

    
2839
      return lu_result
2840

    
2841

    
2842
class LUClusterVerifyDisks(NoHooksLU):
2843
  """Verifies the cluster disks status.
2844

2845
  """
2846
  REQ_BGL = False
2847

    
2848
  def ExpandNames(self):
2849
    self.needed_locks = {
2850
      locking.LEVEL_NODE: locking.ALL_SET,
2851
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2852
    }
2853
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2854

    
2855
  def Exec(self, feedback_fn):
2856
    """Verify integrity of cluster disks.
2857

2858
    @rtype: tuple of three items
2859
    @return: a tuple of (dict of node-to-node_error, list of instances
2860
        which need activate-disks, dict of instance: (node, volume) for
2861
        missing volumes
2862

2863
    """
2864
    result = res_nodes, res_instances, res_missing = {}, [], {}
2865

    
2866
    nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2867
    instances = self.cfg.GetAllInstancesInfo().values()
2868

    
2869
    nv_dict = {}
2870
    for inst in instances:
2871
      inst_lvs = {}
2872
      if not inst.admin_up:
2873
        continue
2874
      inst.MapLVsByNode(inst_lvs)
2875
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2876
      for node, vol_list in inst_lvs.iteritems():
2877
        for vol in vol_list:
2878
          nv_dict[(node, vol)] = inst
2879

    
2880
    if not nv_dict:
2881
      return result
2882

    
2883
    node_lvs = self.rpc.call_lv_list(nodes, [])
2884
    for node, node_res in node_lvs.items():
2885
      if node_res.offline:
2886
        continue
2887
      msg = node_res.fail_msg
2888
      if msg:
2889
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2890
        res_nodes[node] = msg
2891
        continue
2892

    
2893
      lvs = node_res.payload
2894
      for lv_name, (_, _, lv_online) in lvs.items():
2895
        inst = nv_dict.pop((node, lv_name), None)
2896
        if (not lv_online and inst is not None
2897
            and inst.name not in res_instances):
2898
          res_instances.append(inst.name)
2899

    
2900
    # any leftover items in nv_dict are missing LVs, let's arrange the
2901
    # data better
2902
    for key, inst in nv_dict.iteritems():
2903
      if inst.name not in res_missing:
2904
        res_missing[inst.name] = []
2905
      res_missing[inst.name].append(key)
2906

    
2907
    return result
2908

    
2909

    
2910
class LUClusterRepairDiskSizes(NoHooksLU):
2911
  """Verifies the cluster disks sizes.
2912

2913
  """
2914
  REQ_BGL = False
2915

    
2916
  def ExpandNames(self):
2917
    if self.op.instances:
2918
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
2919
      self.needed_locks = {
2920
        locking.LEVEL_NODE: [],
2921
        locking.LEVEL_INSTANCE: self.wanted_names,
2922
        }
2923
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2924
    else:
2925
      self.wanted_names = None
2926
      self.needed_locks = {
2927
        locking.LEVEL_NODE: locking.ALL_SET,
2928
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2929
        }
2930
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2931

    
2932
  def DeclareLocks(self, level):
2933
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2934
      self._LockInstancesNodes(primary_only=True)
2935

    
2936
  def CheckPrereq(self):
2937
    """Check prerequisites.
2938

2939
    This only checks the optional instance list against the existing names.
2940

2941
    """
2942
    if self.wanted_names is None:
2943
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
2944

    
2945
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2946
                             in self.wanted_names]
2947

    
2948
  def _EnsureChildSizes(self, disk):
2949
    """Ensure children of the disk have the needed disk size.
2950

2951
    This is valid mainly for DRBD8 and fixes an issue where the
2952
    children have smaller disk size.
2953

2954
    @param disk: an L{ganeti.objects.Disk} object
2955

2956
    """
2957
    if disk.dev_type == constants.LD_DRBD8:
2958
      assert disk.children, "Empty children for DRBD8?"
2959
      fchild = disk.children[0]
2960
      mismatch = fchild.size < disk.size
2961
      if mismatch:
2962
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2963
                     fchild.size, disk.size)
2964
        fchild.size = disk.size
2965

    
2966
      # and we recurse on this child only, not on the metadev
2967
      return self._EnsureChildSizes(fchild) or mismatch
2968
    else:
2969
      return False
2970

    
2971
  def Exec(self, feedback_fn):
2972
    """Verify the size of cluster disks.
2973

2974
    """
2975
    # TODO: check child disks too
2976
    # TODO: check differences in size between primary/secondary nodes
2977
    per_node_disks = {}
2978
    for instance in self.wanted_instances:
2979
      pnode = instance.primary_node
2980
      if pnode not in per_node_disks:
2981
        per_node_disks[pnode] = []
2982
      for idx, disk in enumerate(instance.disks):
2983
        per_node_disks[pnode].append((instance, idx, disk))
2984

    
2985
    changed = []
2986
    for node, dskl in per_node_disks.items():
2987
      newl = [v[2].Copy() for v in dskl]
2988
      for dsk in newl:
2989
        self.cfg.SetDiskID(dsk, node)
2990
      result = self.rpc.call_blockdev_getsize(node, newl)
2991
      if result.fail_msg:
2992
        self.LogWarning("Failure in blockdev_getsize call to node"
2993
                        " %s, ignoring", node)
2994
        continue
2995
      if len(result.payload) != len(dskl):
2996
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
2997
                        " result.payload=%s", node, len(dskl), result.payload)
2998
        self.LogWarning("Invalid result from node %s, ignoring node results",
2999
                        node)
3000
        continue
3001
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3002
        if size is None:
3003
          self.LogWarning("Disk %d of instance %s did not return size"
3004
                          " information, ignoring", idx, instance.name)
3005
          continue
3006
        if not isinstance(size, (int, long)):
3007
          self.LogWarning("Disk %d of instance %s did not return valid"
3008
                          " size information, ignoring", idx, instance.name)
3009
          continue
3010
        size = size >> 20
3011
        if size != disk.size:
3012
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3013
                       " correcting: recorded %d, actual %d", idx,
3014
                       instance.name, disk.size, size)
3015
          disk.size = size
3016
          self.cfg.Update(instance, feedback_fn)
3017
          changed.append((instance.name, idx, size))
3018
        if self._EnsureChildSizes(disk):
3019
          self.cfg.Update(instance, feedback_fn)
3020
          changed.append((instance.name, idx, disk.size))
3021
    return changed
3022

    
3023

    
3024
class LUClusterRename(LogicalUnit):
3025
  """Rename the cluster.
3026

3027
  """
3028
  HPATH = "cluster-rename"
3029
  HTYPE = constants.HTYPE_CLUSTER
3030

    
3031
  def BuildHooksEnv(self):
3032
    """Build hooks env.
3033

3034
    """
3035
    return {
3036
      "OP_TARGET": self.cfg.GetClusterName(),
3037
      "NEW_NAME": self.op.name,
3038
      }
3039

    
3040
  def BuildHooksNodes(self):
3041
    """Build hooks nodes.
3042

3043
    """
3044
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3045

    
3046
  def CheckPrereq(self):
3047
    """Verify that the passed name is a valid one.
3048

3049
    """
3050
    hostname = netutils.GetHostname(name=self.op.name,
3051
                                    family=self.cfg.GetPrimaryIPFamily())
3052

    
3053
    new_name = hostname.name
3054
    self.ip = new_ip = hostname.ip
3055
    old_name = self.cfg.GetClusterName()
3056
    old_ip = self.cfg.GetMasterIP()
3057
    if new_name == old_name and new_ip == old_ip:
3058
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3059
                                 " cluster has changed",
3060
                                 errors.ECODE_INVAL)
3061
    if new_ip != old_ip:
3062
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3063
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3064
                                   " reachable on the network" %
3065
                                   new_ip, errors.ECODE_NOTUNIQUE)
3066

    
3067
    self.op.name = new_name
3068

    
3069
  def Exec(self, feedback_fn):
3070
    """Rename the cluster.
3071

3072
    """
3073
    clustername = self.op.name
3074
    ip = self.ip
3075

    
3076
    # shutdown the master IP
3077
    master = self.cfg.GetMasterNode()
3078
    result = self.rpc.call_node_stop_master(master, False)
3079
    result.Raise("Could not disable the master role")
3080

    
3081
    try:
3082
      cluster = self.cfg.GetClusterInfo()
3083
      cluster.cluster_name = clustername
3084
      cluster.master_ip = ip
3085
      self.cfg.Update(cluster, feedback_fn)
3086

    
3087
      # update the known hosts file
3088
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3089
      node_list = self.cfg.GetOnlineNodeList()
3090
      try:
3091
        node_list.remove(master)
3092
      except ValueError:
3093
        pass
3094
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3095
    finally:
3096
      result = self.rpc.call_node_start_master(master, False, False)
3097
      msg = result.fail_msg
3098
      if msg:
3099
        self.LogWarning("Could not re-enable the master role on"
3100
                        " the master, please restart manually: %s", msg)
3101

    
3102
    return clustername
3103

    
3104

    
3105
class LUClusterSetParams(LogicalUnit):
3106
  """Change the parameters of the cluster.
3107

3108
  """
3109
  HPATH = "cluster-modify"
3110
  HTYPE = constants.HTYPE_CLUSTER
3111
  REQ_BGL = False
3112

    
3113
  def CheckArguments(self):
3114
    """Check parameters
3115

3116
    """
3117
    if self.op.uid_pool:
3118
      uidpool.CheckUidPool(self.op.uid_pool)
3119

    
3120
    if self.op.add_uids:
3121
      uidpool.CheckUidPool(self.op.add_uids)
3122

    
3123
    if self.op.remove_uids:
3124
      uidpool.CheckUidPool(self.op.remove_uids)
3125

    
3126
  def ExpandNames(self):
3127
    # FIXME: in the future maybe other cluster params won't require checking on
3128
    # all nodes to be modified.
3129
    self.needed_locks = {
3130
      locking.LEVEL_NODE: locking.ALL_SET,
3131
    }
3132
    self.share_locks[locking.LEVEL_NODE] = 1
3133

    
3134
  def BuildHooksEnv(self):
3135
    """Build hooks env.
3136

3137
    """
3138
    return {
3139
      "OP_TARGET": self.cfg.GetClusterName(),
3140
      "NEW_VG_NAME": self.op.vg_name,
3141
      }
3142

    
3143
  def BuildHooksNodes(self):
3144
    """Build hooks nodes.
3145

3146
    """
3147
    mn = self.cfg.GetMasterNode()
3148
    return ([mn], [mn])
3149

    
3150
  def CheckPrereq(self):
3151
    """Check prerequisites.
3152

3153
    This checks whether the given params don't conflict and
3154
    if the given volume group is valid.
3155

3156
    """
3157
    if self.op.vg_name is not None and not self.op.vg_name:
3158
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3159
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3160
                                   " instances exist", errors.ECODE_INVAL)
3161

    
3162
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3163
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3164
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3165
                                   " drbd-based instances exist",
3166
                                   errors.ECODE_INVAL)
3167

    
3168
    node_list = self.glm.list_owned(locking.LEVEL_NODE)
3169

    
3170
    # if vg_name not None, checks given volume group on all nodes
3171
    if self.op.vg_name:
3172
      vglist = self.rpc.call_vg_list(node_list)
3173
      for node in node_list:
3174
        msg = vglist[node].fail_msg
3175
        if msg:
3176
          # ignoring down node
3177
          self.LogWarning("Error while gathering data on node %s"
3178
                          " (ignoring node): %s", node, msg)
3179
          continue
3180
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3181
                                              self.op.vg_name,
3182
                                              constants.MIN_VG_SIZE)
3183
        if vgstatus:
3184
          raise errors.OpPrereqError("Error on node '%s': %s" %
3185
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3186

    
3187
    if self.op.drbd_helper:
3188
      # checks given drbd helper on all nodes
3189
      helpers = self.rpc.call_drbd_helper(node_list)
3190
      for node in node_list:
3191
        ninfo = self.cfg.GetNodeInfo(node)
3192
        if ninfo.offline:
3193
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3194
          continue
3195
        msg = helpers[node].fail_msg
3196
        if msg:
3197
          raise errors.OpPrereqError("Error checking drbd helper on node"
3198
                                     " '%s': %s" % (node, msg),
3199
                                     errors.ECODE_ENVIRON)
3200
        node_helper = helpers[node].payload
3201
        if node_helper != self.op.drbd_helper:
3202
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3203
                                     (node, node_helper), errors.ECODE_ENVIRON)
3204

    
3205
    self.cluster = cluster = self.cfg.GetClusterInfo()
3206
    # validate params changes
3207
    if self.op.beparams:
3208
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3209
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3210

    
3211
    if self.op.ndparams:
3212
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3213
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3214

    
3215
      # TODO: we need a more general way to handle resetting
3216
      # cluster-level parameters to default values
3217
      if self.new_ndparams["oob_program"] == "":
3218
        self.new_ndparams["oob_program"] = \
3219
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3220

    
3221
    if self.op.nicparams:
3222
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3223
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3224
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3225
      nic_errors = []
3226

    
3227
      # check all instances for consistency
3228
      for instance in self.cfg.GetAllInstancesInfo().values():
3229
        for nic_idx, nic in enumerate(instance.nics):
3230
          params_copy = copy.deepcopy(nic.nicparams)
3231
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3232

    
3233
          # check parameter syntax
3234
          try:
3235
            objects.NIC.CheckParameterSyntax(params_filled)
3236
          except errors.ConfigurationError, err:
3237
            nic_errors.append("Instance %s, nic/%d: %s" %
3238
                              (instance.name, nic_idx, err))
3239

    
3240
          # if we're moving instances to routed, check that they have an ip
3241
          target_mode = params_filled[constants.NIC_MODE]
3242
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3243
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3244
                              " address" % (instance.name, nic_idx))
3245
      if nic_errors:
3246
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3247
                                   "\n".join(nic_errors))
3248

    
3249
    # hypervisor list/parameters
3250
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3251
    if self.op.hvparams:
3252
      for hv_name, hv_dict in self.op.hvparams.items():
3253
        if hv_name not in self.new_hvparams:
3254
          self.new_hvparams[hv_name] = hv_dict
3255
        else:
3256
          self.new_hvparams[hv_name].update(hv_dict)
3257

    
3258
    # os hypervisor parameters
3259
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3260
    if self.op.os_hvp:
3261
      for os_name, hvs in self.op.os_hvp.items():
3262
        if os_name not in self.new_os_hvp:
3263
          self.new_os_hvp[os_name] = hvs
3264
        else:
3265
          for hv_name, hv_dict in hvs.items():
3266
            if hv_name not in self.new_os_hvp[os_name]:
3267
              self.new_os_hvp[os_name][hv_name] = hv_dict
3268
            else:
3269
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3270

    
3271
    # os parameters
3272
    self.new_osp = objects.FillDict(cluster.osparams, {})
3273
    if self.op.osparams:
3274
      for os_name, osp in self.op.osparams.items():
3275
        if os_name not in self.new_osp:
3276
          self.new_osp[os_name] = {}
3277

    
3278
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3279
                                                  use_none=True)
3280

    
3281
        if not self.new_osp[os_name]:
3282
          # we removed all parameters
3283
          del self.new_osp[os_name]
3284
        else:
3285
          # check the parameter validity (remote check)
3286
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3287
                         os_name, self.new_osp[os_name])
3288

    
3289
    # changes to the hypervisor list
3290
    if self.op.enabled_hypervisors is not None:
3291
      self.hv_list = self.op.enabled_hypervisors
3292
      for hv in self.hv_list:
3293
        # if the hypervisor doesn't already exist in the cluster
3294
        # hvparams, we initialize it to empty, and then (in both
3295
        # cases) we make sure to fill the defaults, as we might not
3296
        # have a complete defaults list if the hypervisor wasn't
3297
        # enabled before
3298
        if hv not in new_hvp:
3299
          new_hvp[hv] = {}
3300
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3301
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3302
    else:
3303
      self.hv_list = cluster.enabled_hypervisors
3304

    
3305
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3306
      # either the enabled list has changed, or the parameters have, validate
3307
      for hv_name, hv_params in self.new_hvparams.items():
3308
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3309
            (self.op.enabled_hypervisors and
3310
             hv_name in self.op.enabled_hypervisors)):
3311
          # either this is a new hypervisor, or its parameters have changed
3312
          hv_class = hypervisor.GetHypervisor(hv_name)
3313
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3314
          hv_class.CheckParameterSyntax(hv_params)
3315
          _CheckHVParams(self, node_list, hv_name, hv_params)
3316

    
3317
    if self.op.os_hvp:
3318
      # no need to check any newly-enabled hypervisors, since the
3319
      # defaults have already been checked in the above code-block
3320
      for os_name, os_hvp in self.new_os_hvp.items():
3321
        for hv_name, hv_params in os_hvp.items():
3322
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3323
          # we need to fill in the new os_hvp on top of the actual hv_p
3324
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3325
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3326
          hv_class = hypervisor.GetHypervisor(hv_name)
3327
          hv_class.CheckParameterSyntax(new_osp)
3328
          _CheckHVParams(self, node_list, hv_name, new_osp)
3329

    
3330
    if self.op.default_iallocator:
3331
      alloc_script = utils.FindFile(self.op.default_iallocator,
3332
                                    constants.IALLOCATOR_SEARCH_PATH,
3333
                                    os.path.isfile)
3334
      if alloc_script is None:
3335
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3336
                                   " specified" % self.op.default_iallocator,
3337
                                   errors.ECODE_INVAL)
3338

    
3339
  def Exec(self, feedback_fn):
3340
    """Change the parameters of the cluster.
3341

3342
    """
3343
    if self.op.vg_name is not None:
3344
      new_volume = self.op.vg_name
3345
      if not new_volume:
3346
        new_volume = None
3347
      if new_volume != self.cfg.GetVGName():
3348
        self.cfg.SetVGName(new_volume)
3349
      else:
3350
        feedback_fn("Cluster LVM configuration already in desired"
3351
                    " state, not changing")
3352
    if self.op.drbd_helper is not None:
3353
      new_helper = self.op.drbd_helper
3354
      if not new_helper:
3355
        new_helper = None
3356
      if new_helper != self.cfg.GetDRBDHelper():
3357
        self.cfg.SetDRBDHelper(new_helper)
3358
      else:
3359
        feedback_fn("Cluster DRBD helper already in desired state,"
3360
                    " not changing")
3361
    if self.op.hvparams:
3362
      self.cluster.hvparams = self.new_hvparams
3363
    if self.op.os_hvp:
3364
      self.cluster.os_hvp = self.new_os_hvp
3365
    if self.op.enabled_hypervisors is not None:
3366
      self.cluster.hvparams = self.new_hvparams
3367
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3368
    if self.op.beparams:
3369
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3370
    if self.op.nicparams:
3371
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3372
    if self.op.osparams:
3373
      self.cluster.osparams = self.new_osp
3374
    if self.op.ndparams:
3375
      self.cluster.ndparams = self.new_ndparams
3376

    
3377
    if self.op.candidate_pool_size is not None:
3378
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3379
      # we need to update the pool size here, otherwise the save will fail
3380
      _AdjustCandidatePool(self, [])
3381

    
3382
    if self.op.maintain_node_health is not None:
3383
      self.cluster.maintain_node_health = self.op.maintain_node_health
3384

    
3385
    if self.op.prealloc_wipe_disks is not None:
3386
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3387

    
3388
    if self.op.add_uids is not None:
3389
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3390

    
3391
    if self.op.remove_uids is not None:
3392
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3393

    
3394
    if self.op.uid_pool is not None:
3395
      self.cluster.uid_pool = self.op.uid_pool
3396

    
3397
    if self.op.default_iallocator is not None:
3398
      self.cluster.default_iallocator = self.op.default_iallocator
3399

    
3400
    if self.op.reserved_lvs is not None:
3401
      self.cluster.reserved_lvs = self.op.reserved_lvs
3402

    
3403
    def helper_os(aname, mods, desc):
3404
      desc += " OS list"
3405
      lst = getattr(self.cluster, aname)
3406
      for key, val in mods:
3407
        if key == constants.DDM_ADD:
3408
          if val in lst:
3409
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3410
          else:
3411
            lst.append(val)
3412
        elif key == constants.DDM_REMOVE:
3413
          if val in lst:
3414
            lst.remove(val)
3415
          else:
3416
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3417
        else:
3418
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3419

    
3420
    if self.op.hidden_os:
3421
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3422

    
3423
    if self.op.blacklisted_os:
3424
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3425

    
3426
    if self.op.master_netdev:
3427
      master = self.cfg.GetMasterNode()
3428
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3429
                  self.cluster.master_netdev)
3430
      result = self.rpc.call_node_stop_master(master, False)
3431
      result.Raise("Could not disable the master ip")
3432
      feedback_fn("Changing master_netdev from %s to %s" %
3433
                  (self.cluster.master_netdev, self.op.master_netdev))
3434
      self.cluster.master_netdev = self.op.master_netdev
3435

    
3436
    self.cfg.Update(self.cluster, feedback_fn)
3437

    
3438
    if self.op.master_netdev:
3439
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3440
                  self.op.master_netdev)
3441
      result = self.rpc.call_node_start_master(master, False, False)
3442
      if result.fail_msg:
3443
        self.LogWarning("Could not re-enable the master ip on"
3444
                        " the master, please restart manually: %s",
3445
                        result.fail_msg)
3446

    
3447

    
3448
def _UploadHelper(lu, nodes, fname):
3449
  """Helper for uploading a file and showing warnings.
3450

3451
  """
3452
  if os.path.exists(fname):
3453
    result = lu.rpc.call_upload_file(nodes, fname)
3454
    for to_node, to_result in result.items():
3455
      msg = to_result.fail_msg
3456
      if msg:
3457
        msg = ("Copy of file %s to node %s failed: %s" %
3458
               (fname, to_node, msg))
3459
        lu.proc.LogWarning(msg)
3460

    
3461

    
3462
def _ComputeAncillaryFiles(cluster, redist):
3463
  """Compute files external to Ganeti which need to be consistent.
3464

3465
  @type redist: boolean
3466
  @param redist: Whether to include files which need to be redistributed
3467

3468
  """
3469
  # Compute files for all nodes
3470
  files_all = set([
3471
    constants.SSH_KNOWN_HOSTS_FILE,
3472
    constants.CONFD_HMAC_KEY,
3473
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3474
    ])
3475

    
3476
  if not redist:
3477
    files_all.update(constants.ALL_CERT_FILES)
3478
    files_all.update(ssconf.SimpleStore().GetFileList())
3479

    
3480
  if cluster.modify_etc_hosts:
3481
    files_all.add(constants.ETC_HOSTS)
3482

    
3483
  # Files which must either exist on all nodes or on none
3484
  files_all_opt = set([
3485
    constants.RAPI_USERS_FILE,
3486
    ])
3487

    
3488
  # Files which should only be on master candidates
3489
  files_mc = set()
3490
  if not redist:
3491
    files_mc.add(constants.CLUSTER_CONF_FILE)
3492

    
3493
  # Files which should only be on VM-capable nodes
3494
  files_vm = set(filename
3495
    for hv_name in cluster.enabled_hypervisors
3496
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3497

    
3498
  # Filenames must be unique
3499
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3500
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3501
         "Found file listed in more than one file list"
3502

    
3503
  return (files_all, files_all_opt, files_mc, files_vm)
3504

    
3505

    
3506
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3507
  """Distribute additional files which are part of the cluster configuration.
3508

3509
  ConfigWriter takes care of distributing the config and ssconf files, but
3510
  there are more files which should be distributed to all nodes. This function
3511
  makes sure those are copied.
3512

3513
  @param lu: calling logical unit
3514
  @param additional_nodes: list of nodes not in the config to distribute to
3515
  @type additional_vm: boolean
3516
  @param additional_vm: whether the additional nodes are vm-capable or not
3517

3518
  """
3519
  # Gather target nodes
3520
  cluster = lu.cfg.GetClusterInfo()
3521
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3522

    
3523
  online_nodes = lu.cfg.GetOnlineNodeList()
3524
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3525

    
3526
  if additional_nodes is not None:
3527
    online_nodes.extend(additional_nodes)
3528
    if additional_vm:
3529
      vm_nodes.extend(additional_nodes)
3530

    
3531
  # Never distribute to master node
3532
  for nodelist in [online_nodes, vm_nodes]:
3533
    if master_info.name in nodelist:
3534
      nodelist.remove(master_info.name)
3535

    
3536
  # Gather file lists
3537
  (files_all, files_all_opt, files_mc, files_vm) = \
3538
    _ComputeAncillaryFiles(cluster, True)
3539

    
3540
  # Never re-distribute configuration file from here
3541
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3542
              constants.CLUSTER_CONF_FILE in files_vm)
3543
  assert not files_mc, "Master candidates not handled in this function"
3544

    
3545
  filemap = [
3546
    (online_nodes, files_all),
3547
    (online_nodes, files_all_opt),
3548
    (vm_nodes, files_vm),
3549
    ]
3550

    
3551
  # Upload the files
3552
  for (node_list, files) in filemap:
3553
    for fname in files:
3554
      _UploadHelper(lu, node_list, fname)
3555

    
3556

    
3557
class LUClusterRedistConf(NoHooksLU):
3558
  """Force the redistribution of cluster configuration.
3559

3560
  This is a very simple LU.
3561

3562
  """
3563
  REQ_BGL = False
3564

    
3565
  def ExpandNames(self):
3566
    self.needed_locks = {
3567
      locking.LEVEL_NODE: locking.ALL_SET,
3568
    }
3569
    self.share_locks[locking.LEVEL_NODE] = 1
3570

    
3571
  def Exec(self, feedback_fn):
3572
    """Redistribute the configuration.
3573

3574
    """
3575
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3576
    _RedistributeAncillaryFiles(self)
3577

    
3578

    
3579
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3580
  """Sleep and poll for an instance's disk to sync.
3581

3582
  """
3583
  if not instance.disks or disks is not None and not disks:
3584
    return True
3585

    
3586
  disks = _ExpandCheckDisks(instance, disks)
3587

    
3588
  if not oneshot:
3589
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3590

    
3591
  node = instance.primary_node
3592

    
3593
  for dev in disks:
3594
    lu.cfg.SetDiskID(dev, node)
3595

    
3596
  # TODO: Convert to utils.Retry
3597

    
3598
  retries = 0
3599
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3600
  while True:
3601
    max_time = 0
3602
    done = True
3603
    cumul_degraded = False
3604
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3605
    msg = rstats.fail_msg
3606
    if msg:
3607
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3608
      retries += 1
3609
      if retries >= 10:
3610
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3611
                                 " aborting." % node)
3612
      time.sleep(6)
3613
      continue
3614
    rstats = rstats.payload
3615
    retries = 0
3616
    for i, mstat in enumerate(rstats):
3617
      if mstat is None:
3618
        lu.LogWarning("Can't compute data for node %s/%s",
3619
                           node, disks[i].iv_name)
3620
        continue
3621

    
3622
      cumul_degraded = (cumul_degraded or
3623
                        (mstat.is_degraded and mstat.sync_percent is None))
3624
      if mstat.sync_percent is not None:
3625
        done = False
3626
        if mstat.estimated_time is not None:
3627
          rem_time = ("%s remaining (estimated)" %
3628
                      utils.FormatSeconds(mstat.estimated_time))
3629
          max_time = mstat.estimated_time
3630
        else:
3631
          rem_time = "no time estimate"
3632
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3633
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3634

    
3635
    # if we're done but degraded, let's do a few small retries, to
3636
    # make sure we see a stable and not transient situation; therefore
3637
    # we force restart of the loop
3638
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3639
      logging.info("Degraded disks found, %d retries left", degr_retries)
3640
      degr_retries -= 1
3641
      time.sleep(1)
3642
      continue
3643

    
3644
    if done or oneshot:
3645
      break
3646

    
3647
    time.sleep(min(60, max_time))
3648

    
3649
  if done:
3650
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3651
  return not cumul_degraded
3652

    
3653

    
3654
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3655
  """Check that mirrors are not degraded.
3656

3657
  The ldisk parameter, if True, will change the test from the
3658
  is_degraded attribute (which represents overall non-ok status for
3659
  the device(s)) to the ldisk (representing the local storage status).
3660

3661
  """
3662
  lu.cfg.SetDiskID(dev, node)
3663

    
3664
  result = True
3665

    
3666
  if on_primary or dev.AssembleOnSecondary():
3667
    rstats = lu.rpc.call_blockdev_find(node, dev)
3668
    msg = rstats.fail_msg
3669
    if msg:
3670
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3671
      result = False
3672
    elif not rstats.payload:
3673
      lu.LogWarning("Can't find disk on node %s", node)
3674
      result = False
3675
    else:
3676
      if ldisk:
3677
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3678
      else:
3679
        result = result and not rstats.payload.is_degraded
3680

    
3681
  if dev.children:
3682
    for child in dev.children:
3683
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3684

    
3685
  return result
3686

    
3687

    
3688
class LUOobCommand(NoHooksLU):
3689
  """Logical unit for OOB handling.
3690

3691
  """
3692
  REG_BGL = False
3693
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3694

    
3695
  def ExpandNames(self):
3696
    """Gather locks we need.
3697

3698
    """
3699
    if self.op.node_names:
3700
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3701
      lock_names = self.op.node_names
3702
    else:
3703
      lock_names = locking.ALL_SET
3704

    
3705
    self.needed_locks = {
3706
      locking.LEVEL_NODE: lock_names,
3707
      }
3708

    
3709
  def CheckPrereq(self):
3710
    """Check prerequisites.
3711

3712
    This checks:
3713
     - the node exists in the configuration
3714
     - OOB is supported
3715

3716
    Any errors are signaled by raising errors.OpPrereqError.
3717

3718
    """
3719
    self.nodes = []
3720
    self.master_node = self.cfg.GetMasterNode()
3721

    
3722
    assert self.op.power_delay >= 0.0
3723

    
3724
    if self.op.node_names:
3725
      if (self.op.command in self._SKIP_MASTER and
3726
          self.master_node in self.op.node_names):
3727
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3728
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3729

    
3730
        if master_oob_handler:
3731
          additional_text = ("run '%s %s %s' if you want to operate on the"
3732
                             " master regardless") % (master_oob_handler,
3733
                                                      self.op.command,
3734
                                                      self.master_node)
3735
        else:
3736
          additional_text = "it does not support out-of-band operations"
3737

    
3738
        raise errors.OpPrereqError(("Operating on the master node %s is not"
3739
                                    " allowed for %s; %s") %
3740
                                   (self.master_node, self.op.command,
3741
                                    additional_text), errors.ECODE_INVAL)
3742
    else:
3743
      self.op.node_names = self.cfg.GetNodeList()
3744
      if self.op.command in self._SKIP_MASTER:
3745
        self.op.node_names.remove(self.master_node)
3746

    
3747
    if self.op.command in self._SKIP_MASTER:
3748
      assert self.master_node not in self.op.node_names
3749

    
3750
    for node_name in self.op.node_names:
3751
      node = self.cfg.GetNodeInfo(node_name)
3752

    
3753
      if node is None:
3754
        raise errors.OpPrereqError("Node %s not found" % node_name,
3755
                                   errors.ECODE_NOENT)
3756
      else:
3757
        self.nodes.append(node)
3758

    
3759
      if (not self.op.ignore_status and
3760
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3761
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3762
                                    " not marked offline") % node_name,
3763
                                   errors.ECODE_STATE)
3764

    
3765
  def Exec(self, feedback_fn):
3766
    """Execute OOB and return result if we expect any.
3767

3768
    """
3769
    master_node = self.master_node
3770
    ret = []
3771

    
3772
    for idx, node in enumerate(utils.NiceSort(self.nodes,
3773
                                              key=lambda node: node.name)):
3774
      node_entry = [(constants.RS_NORMAL, node.name)]
3775
      ret.append(node_entry)
3776

    
3777
      oob_program = _SupportsOob(self.cfg, node)
3778

    
3779
      if not oob_program:
3780
        node_entry.append((constants.RS_UNAVAIL, None))
3781
        continue
3782

    
3783
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3784
                   self.op.command, oob_program, node.name)
3785
      result = self.rpc.call_run_oob(master_node, oob_program,
3786
                                     self.op.command, node.name,
3787
                                     self.op.timeout)
3788

    
3789
      if result.fail_msg:
3790
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3791
                        node.name, result.fail_msg)
3792
        node_entry.append((constants.RS_NODATA, None))
3793
      else:
3794
        try:
3795
          self._CheckPayload(result)
3796
        except errors.OpExecError, err:
3797
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
3798
                          node.name, err)
3799
          node_entry.append((constants.RS_NODATA, None))
3800
        else:
3801
          if self.op.command == constants.OOB_HEALTH:
3802
            # For health we should log important events
3803
            for item, status in result.payload:
3804
              if status in [constants.OOB_STATUS_WARNING,
3805
                            constants.OOB_STATUS_CRITICAL]:
3806
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
3807
                                item, node.name, status)
3808

    
3809
          if self.op.command == constants.OOB_POWER_ON:
3810
            node.powered = True
3811
          elif self.op.command == constants.OOB_POWER_OFF:
3812
            node.powered = False
3813
          elif self.op.command == constants.OOB_POWER_STATUS:
3814
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3815
            if powered != node.powered:
3816
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3817
                               " match actual power state (%s)"), node.powered,
3818
                              node.name, powered)
3819

    
3820
          # For configuration changing commands we should update the node
3821
          if self.op.command in (constants.OOB_POWER_ON,
3822
                                 constants.OOB_POWER_OFF):
3823
            self.cfg.Update(node, feedback_fn)
3824

    
3825
          node_entry.append((constants.RS_NORMAL, result.payload))
3826

    
3827
          if (self.op.command == constants.OOB_POWER_ON and
3828
              idx < len(self.nodes) - 1):
3829
            time.sleep(self.op.power_delay)
3830

    
3831
    return ret
3832

    
3833
  def _CheckPayload(self, result):
3834
    """Checks if the payload is valid.
3835

3836
    @param result: RPC result
3837
    @raises errors.OpExecError: If payload is not valid
3838

3839
    """
3840
    errs = []
3841
    if self.op.command == constants.OOB_HEALTH:
3842
      if not isinstance(result.payload, list):
3843
        errs.append("command 'health' is expected to return a list but got %s" %
3844
                    type(result.payload))
3845
      else:
3846
        for item, status in result.payload:
3847
          if status not in constants.OOB_STATUSES:
3848
            errs.append("health item '%s' has invalid status '%s'" %
3849
                        (item, status))
3850

    
3851
    if self.op.command == constants.OOB_POWER_STATUS:
3852
      if not isinstance(result.payload, dict):
3853
        errs.append("power-status is expected to return a dict but got %s" %
3854
                    type(result.payload))
3855

    
3856
    if self.op.command in [
3857
        constants.OOB_POWER_ON,
3858
        constants.OOB_POWER_OFF,
3859
        constants.OOB_POWER_CYCLE,
3860
        ]:
3861
      if result.payload is not None:
3862
        errs.append("%s is expected to not return payload but got '%s'" %
3863
                    (self.op.command, result.payload))
3864

    
3865
    if errs:
3866
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3867
                               utils.CommaJoin(errs))
3868

    
3869
class _OsQuery(_QueryBase):
3870
  FIELDS = query.OS_FIELDS
3871

    
3872
  def ExpandNames(self, lu):
3873
    # Lock all nodes in shared mode
3874
    # Temporary removal of locks, should be reverted later
3875
    # TODO: reintroduce locks when they are lighter-weight
3876
    lu.needed_locks = {}
3877
    #self.share_locks[locking.LEVEL_NODE] = 1
3878
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3879

    
3880
    # The following variables interact with _QueryBase._GetNames
3881
    if self.names:
3882
      self.wanted = self.names
3883
    else:
3884
      self.wanted = locking.ALL_SET
3885

    
3886
    self.do_locking = self.use_locking
3887

    
3888
  def DeclareLocks(self, lu, level):
3889
    pass
3890

    
3891
  @staticmethod
3892
  def _DiagnoseByOS(rlist):
3893
    """Remaps a per-node return list into an a per-os per-node dictionary
3894

3895
    @param rlist: a map with node names as keys and OS objects as values
3896

3897
    @rtype: dict
3898
    @return: a dictionary with osnames as keys and as value another
3899
        map, with nodes as keys and tuples of (path, status, diagnose,
3900
        variants, parameters, api_versions) as values, eg::
3901

3902
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3903
                                     (/srv/..., False, "invalid api")],
3904
                           "node2": [(/srv/..., True, "", [], [])]}
3905
          }
3906

3907
    """
3908
    all_os = {}
3909
    # we build here the list of nodes that didn't fail the RPC (at RPC
3910
    # level), so that nodes with a non-responding node daemon don't
3911
    # make all OSes invalid
3912
    good_nodes = [node_name for node_name in rlist
3913
                  if not rlist[node_name].fail_msg]
3914
    for node_name, nr in rlist.items():
3915
      if nr.fail_msg or not nr.payload:
3916
        continue
3917
      for (name, path, status, diagnose, variants,
3918
           params, api_versions) in nr.payload:
3919
        if name not in all_os:
3920
          # build a list of nodes for this os containing empty lists
3921
          # for each node in node_list
3922
          all_os[name] = {}
3923
          for nname in good_nodes:
3924
            all_os[name][nname] = []
3925
        # convert params from [name, help] to (name, help)
3926
        params = [tuple(v) for v in params]
3927
        all_os[name][node_name].append((path, status, diagnose,
3928
                                        variants, params, api_versions))
3929
    return all_os
3930

    
3931
  def _GetQueryData(self, lu):
3932
    """Computes the list of nodes and their attributes.
3933

3934
    """
3935
    # Locking is not used
3936
    assert not (compat.any(lu.glm.is_owned(level)
3937
                           for level in locking.LEVELS
3938
                           if level != locking.LEVEL_CLUSTER) or
3939
                self.do_locking or self.use_locking)
3940

    
3941
    valid_nodes = [node.name
3942
                   for node in lu.cfg.GetAllNodesInfo().values()
3943
                   if not node.offline and node.vm_capable]
3944
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3945
    cluster = lu.cfg.GetClusterInfo()
3946

    
3947
    data = {}
3948

    
3949
    for (os_name, os_data) in pol.items():
3950
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3951
                          hidden=(os_name in cluster.hidden_os),
3952
                          blacklisted=(os_name in cluster.blacklisted_os))
3953

    
3954
      variants = set()
3955
      parameters = set()
3956
      api_versions = set()
3957

    
3958
      for idx, osl in enumerate(os_data.values()):
3959
        info.valid = bool(info.valid and osl and osl[0][1])
3960
        if not info.valid:
3961
          break
3962

    
3963
        (node_variants, node_params, node_api) = osl[0][3:6]
3964
        if idx == 0:
3965
          # First entry
3966
          variants.update(node_variants)
3967
          parameters.update(node_params)
3968
          api_versions.update(node_api)
3969
        else:
3970
          # Filter out inconsistent values
3971
          variants.intersection_update(node_variants)
3972
          parameters.intersection_update(node_params)
3973
          api_versions.intersection_update(node_api)
3974

    
3975
      info.variants = list(variants)
3976
      info.parameters = list(parameters)
3977
      info.api_versions = list(api_versions)
3978

    
3979
      data[os_name] = info
3980

    
3981
    # Prepare data in requested order
3982
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3983
            if name in data]
3984

    
3985

    
3986
class LUOsDiagnose(NoHooksLU):
3987
  """Logical unit for OS diagnose/query.
3988

3989
  """
3990
  REQ_BGL = False
3991

    
3992
  @staticmethod
3993
  def _BuildFilter(fields, names):
3994
    """Builds a filter for querying OSes.
3995

3996
    """
3997
    name_filter = qlang.MakeSimpleFilter("name", names)
3998

    
3999
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4000
    # respective field is not requested
4001
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4002
                     for fname in ["hidden", "blacklisted"]
4003
                     if fname not in fields]
4004
    if "valid" not in fields:
4005
      status_filter.append([qlang.OP_TRUE, "valid"])
4006

    
4007
    if status_filter:
4008
      status_filter.insert(0, qlang.OP_AND)
4009
    else:
4010
      status_filter = None
4011

    
4012
    if name_filter and status_filter:
4013
      return [qlang.OP_AND, name_filter, status_filter]
4014
    elif name_filter:
4015
      return name_filter
4016
    else:
4017
      return status_filter
4018

    
4019
  def CheckArguments(self):
4020
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4021
                       self.op.output_fields, False)
4022

    
4023
  def ExpandNames(self):
4024
    self.oq.ExpandNames(self)
4025

    
4026
  def Exec(self, feedback_fn):
4027
    return self.oq.OldStyleQuery(self)
4028

    
4029

    
4030
class LUNodeRemove(LogicalUnit):
4031
  """Logical unit for removing a node.
4032

4033
  """
4034
  HPATH = "node-remove"
4035
  HTYPE = constants.HTYPE_NODE
4036

    
4037
  def BuildHooksEnv(self):
4038
    """Build hooks env.
4039

4040
    This doesn't run on the target node in the pre phase as a failed
4041
    node would then be impossible to remove.
4042

4043
    """
4044
    return {
4045
      "OP_TARGET": self.op.node_name,
4046
      "NODE_NAME": self.op.node_name,
4047
      }
4048

    
4049
  def BuildHooksNodes(self):
4050
    """Build hooks nodes.
4051

4052
    """
4053
    all_nodes = self.cfg.GetNodeList()
4054
    try:
4055
      all_nodes.remove(self.op.node_name)
4056
    except ValueError:
4057
      logging.warning("Node '%s', which is about to be removed, was not found"
4058
                      " in the list of all nodes", self.op.node_name)
4059
    return (all_nodes, all_nodes)
4060

    
4061
  def CheckPrereq(self):
4062
    """Check prerequisites.
4063

4064
    This checks:
4065
     - the node exists in the configuration
4066
     - it does not have primary or secondary instances
4067
     - it's not the master
4068

4069
    Any errors are signaled by raising errors.OpPrereqError.
4070

4071
    """
4072
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4073
    node = self.cfg.GetNodeInfo(self.op.node_name)
4074
    assert node is not None
4075

    
4076
    instance_list = self.cfg.GetInstanceList()
4077

    
4078
    masternode = self.cfg.GetMasterNode()
4079
    if node.name == masternode:
4080
      raise errors.OpPrereqError("Node is the master node, failover to another"
4081
                                 " node is required", errors.ECODE_INVAL)
4082

    
4083
    for instance_name in instance_list:
4084
      instance = self.cfg.GetInstanceInfo(instance_name)
4085
      if node.name in instance.all_nodes:
4086
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4087
                                   " please remove first" % instance_name,
4088
                                   errors.ECODE_INVAL)
4089
    self.op.node_name = node.name
4090
    self.node = node
4091

    
4092
  def Exec(self, feedback_fn):
4093
    """Removes the node from the cluster.
4094

4095
    """
4096
    node = self.node
4097
    logging.info("Stopping the node daemon and removing configs from node %s",
4098
                 node.name)
4099

    
4100
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4101

    
4102
    # Promote nodes to master candidate as needed
4103
    _AdjustCandidatePool(self, exceptions=[node.name])
4104
    self.context.RemoveNode(node.name)
4105

    
4106
    # Run post hooks on the node before it's removed
4107
    _RunPostHook(self, node.name)
4108

    
4109
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4110
    msg = result.fail_msg
4111
    if msg:
4112
      self.LogWarning("Errors encountered on the remote node while leaving"
4113
                      " the cluster: %s", msg)
4114

    
4115
    # Remove node from our /etc/hosts
4116
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4117
      master_node = self.cfg.GetMasterNode()
4118
      result = self.rpc.call_etc_hosts_modify(master_node,
4119
                                              constants.ETC_HOSTS_REMOVE,
4120
                                              node.name, None)
4121
      result.Raise("Can't update hosts file with new host data")
4122
      _RedistributeAncillaryFiles(self)
4123

    
4124

    
4125
class _NodeQuery(_QueryBase):
4126
  FIELDS = query.NODE_FIELDS
4127

    
4128
  def ExpandNames(self, lu):
4129
    lu.needed_locks = {}
4130
    lu.share_locks[locking.LEVEL_NODE] = 1
4131

    
4132
    if self.names:
4133
      self.wanted = _GetWantedNodes(lu, self.names)
4134
    else:
4135
      self.wanted = locking.ALL_SET
4136

    
4137
    self.do_locking = (self.use_locking and
4138
                       query.NQ_LIVE in self.requested_data)
4139

    
4140
    if self.do_locking:
4141
      # if we don't request only static fields, we need to lock the nodes
4142
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4143

    
4144
  def DeclareLocks(self, lu, level):
4145
    pass
4146

    
4147
  def _GetQueryData(self, lu):
4148
    """Computes the list of nodes and their attributes.
4149

4150
    """
4151
    all_info = lu.cfg.GetAllNodesInfo()
4152

    
4153
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4154

    
4155
    # Gather data as requested
4156
    if query.NQ_LIVE in self.requested_data:
4157
      # filter out non-vm_capable nodes
4158
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4159

    
4160
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4161
                                        lu.cfg.GetHypervisorType())
4162
      live_data = dict((name, nresult.payload)
4163
                       for (name, nresult) in node_data.items()
4164
                       if not nresult.fail_msg and nresult.payload)
4165
    else:
4166
      live_data = None
4167

    
4168
    if query.NQ_INST in self.requested_data:
4169
      node_to_primary = dict([(name, set()) for name in nodenames])
4170
      node_to_secondary = dict([(name, set()) for name in nodenames])
4171

    
4172
      inst_data = lu.cfg.GetAllInstancesInfo()
4173

    
4174
      for inst in inst_data.values():
4175
        if inst.primary_node in node_to_primary:
4176
          node_to_primary[inst.primary_node].add(inst.name)
4177
        for secnode in inst.secondary_nodes:
4178
          if secnode in node_to_secondary:
4179
            node_to_secondary[secnode].add(inst.name)
4180
    else:
4181
      node_to_primary = None
4182
      node_to_secondary = None
4183

    
4184
    if query.NQ_OOB in self.requested_data:
4185
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4186
                         for name, node in all_info.iteritems())
4187
    else:
4188
      oob_support = None
4189

    
4190
    if query.NQ_GROUP in self.requested_data:
4191
      groups = lu.cfg.GetAllNodeGroupsInfo()
4192
    else:
4193
      groups = {}
4194

    
4195
    return query.NodeQueryData([all_info[name] for name in nodenames],
4196
                               live_data, lu.cfg.GetMasterNode(),
4197
                               node_to_primary, node_to_secondary, groups,
4198
                               oob_support, lu.cfg.GetClusterInfo())
4199

    
4200

    
4201
class LUNodeQuery(NoHooksLU):
4202
  """Logical unit for querying nodes.
4203

4204
  """
4205
  # pylint: disable-msg=W0142
4206
  REQ_BGL = False
4207

    
4208
  def CheckArguments(self):
4209
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4210
                         self.op.output_fields, self.op.use_locking)
4211

    
4212
  def ExpandNames(self):
4213
    self.nq.ExpandNames(self)
4214

    
4215
  def Exec(self, feedback_fn):
4216
    return self.nq.OldStyleQuery(self)
4217

    
4218

    
4219
class LUNodeQueryvols(NoHooksLU):
4220
  """Logical unit for getting volumes on node(s).
4221

4222
  """
4223
  REQ_BGL = False
4224
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4225
  _FIELDS_STATIC = utils.FieldSet("node")
4226

    
4227
  def CheckArguments(self):
4228
    _CheckOutputFields(static=self._FIELDS_STATIC,
4229
                       dynamic=self._FIELDS_DYNAMIC,
4230
                       selected=self.op.output_fields)
4231

    
4232
  def ExpandNames(self):
4233
    self.needed_locks = {}
4234
    self.share_locks[locking.LEVEL_NODE] = 1
4235
    if not self.op.nodes:
4236
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4237
    else:
4238
      self.needed_locks[locking.LEVEL_NODE] = \
4239
        _GetWantedNodes(self, self.op.nodes)
4240

    
4241
  def Exec(self, feedback_fn):
4242
    """Computes the list of nodes and their attributes.
4243

4244
    """
4245
    nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4246
    volumes = self.rpc.call_node_volumes(nodenames)
4247

    
4248
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
4249
             in self.cfg.GetInstanceList()]
4250

    
4251
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
4252

    
4253
    output = []
4254
    for node in nodenames:
4255
      nresult = volumes[node]
4256
      if nresult.offline:
4257
        continue
4258
      msg = nresult.fail_msg
4259
      if msg:
4260
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4261
        continue
4262

    
4263
      node_vols = nresult.payload[:]
4264
      node_vols.sort(key=lambda vol: vol['dev'])
4265

    
4266
      for vol in node_vols:
4267
        node_output = []
4268
        for field in self.op.output_fields:
4269
          if field == "node":
4270
            val = node
4271
          elif field == "phys":
4272
            val = vol['dev']
4273
          elif field == "vg":
4274
            val = vol['vg']
4275
          elif field == "name":
4276
            val = vol['name']
4277
          elif field == "size":
4278
            val = int(float(vol['size']))
4279
          elif field == "instance":
4280
            for inst in ilist:
4281
              if node not in lv_by_node[inst]:
4282
                continue
4283
              if vol['name'] in lv_by_node[inst][node]:
4284
                val = inst.name
4285
                break
4286
            else:
4287
              val = '-'
4288
          else:
4289
            raise errors.ParameterError(field)
4290
          node_output.append(str(val))
4291

    
4292
        output.append(node_output)
4293

    
4294
    return output
4295

    
4296

    
4297
class LUNodeQueryStorage(NoHooksLU):
4298
  """Logical unit for getting information on storage units on node(s).
4299

4300
  """
4301
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4302
  REQ_BGL = False
4303

    
4304
  def CheckArguments(self):
4305
    _CheckOutputFields(static=self._FIELDS_STATIC,
4306
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4307
                       selected=self.op.output_fields)
4308

    
4309
  def ExpandNames(self):
4310
    self.needed_locks = {}
4311
    self.share_locks[locking.LEVEL_NODE] = 1
4312

    
4313
    if self.op.nodes:
4314
      self.needed_locks[locking.LEVEL_NODE] = \
4315
        _GetWantedNodes(self, self.op.nodes)
4316
    else:
4317
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4318

    
4319
  def Exec(self, feedback_fn):
4320
    """Computes the list of nodes and their attributes.
4321

4322
    """
4323
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4324

    
4325
    # Always get name to sort by
4326
    if constants.SF_NAME in self.op.output_fields:
4327
      fields = self.op.output_fields[:]
4328
    else:
4329
      fields = [constants.SF_NAME] + self.op.output_fields
4330

    
4331
    # Never ask for node or type as it's only known to the LU
4332
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4333
      while extra in fields:
4334
        fields.remove(extra)
4335

    
4336
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4337
    name_idx = field_idx[constants.SF_NAME]
4338

    
4339
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4340
    data = self.rpc.call_storage_list(self.nodes,
4341
                                      self.op.storage_type, st_args,
4342
                                      self.op.name, fields)
4343

    
4344
    result = []
4345

    
4346
    for node in utils.NiceSort(self.nodes):
4347
      nresult = data[node]
4348
      if nresult.offline:
4349
        continue
4350

    
4351
      msg = nresult.fail_msg
4352
      if msg:
4353
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4354
        continue
4355

    
4356
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4357

    
4358
      for name in utils.NiceSort(rows.keys()):
4359
        row = rows[name]
4360

    
4361
        out = []
4362

    
4363
        for field in self.op.output_fields:
4364
          if field == constants.SF_NODE:
4365
            val = node
4366
          elif field == constants.SF_TYPE:
4367
            val = self.op.storage_type
4368
          elif field in field_idx:
4369
            val = row[field_idx[field]]
4370
          else:
4371
            raise errors.ParameterError(field)
4372

    
4373
          out.append(val)
4374

    
4375
        result.append(out)
4376

    
4377
    return result
4378

    
4379

    
4380
class _InstanceQuery(_QueryBase):
4381
  FIELDS = query.INSTANCE_FIELDS
4382

    
4383
  def ExpandNames(self, lu):
4384
    lu.needed_locks = {}
4385
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
4386
    lu.share_locks[locking.LEVEL_NODE] = 1
4387

    
4388
    if self.names:
4389
      self.wanted = _GetWantedInstances(lu, self.names)
4390
    else:
4391
      self.wanted = locking.ALL_SET
4392

    
4393
    self.do_locking = (self.use_locking and
4394
                       query.IQ_LIVE in self.requested_data)
4395
    if self.do_locking:
4396
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4397
      lu.needed_locks[locking.LEVEL_NODE] = []
4398
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4399

    
4400
  def DeclareLocks(self, lu, level):
4401
    if level == locking.LEVEL_NODE and self.do_locking:
4402
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
4403

    
4404
  def _GetQueryData(self, lu):
4405
    """Computes the list of instances and their attributes.
4406

4407
    """
4408
    cluster = lu.cfg.GetClusterInfo()
4409
    all_info = lu.cfg.GetAllInstancesInfo()
4410

    
4411
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4412

    
4413
    instance_list = [all_info[name] for name in instance_names]
4414
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4415
                                        for inst in instance_list)))
4416
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4417
    bad_nodes = []
4418
    offline_nodes = []
4419
    wrongnode_inst = set()
4420

    
4421
    # Gather data as requested
4422
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4423
      live_data = {}
4424
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4425
      for name in nodes:
4426
        result = node_data[name]
4427
        if result.offline:
4428
          # offline nodes will be in both lists
4429
          assert result.fail_msg
4430
          offline_nodes.append(name)
4431
        if result.fail_msg:
4432
          bad_nodes.append(name)
4433
        elif result.payload:
4434
          for inst in result.payload:
4435
            if inst in all_info:
4436
              if all_info[inst].primary_node == name:
4437
                live_data.update(result.payload)
4438
              else:
4439
                wrongnode_inst.add(inst)
4440
            else:
4441
              # orphan instance; we don't list it here as we don't
4442
              # handle this case yet in the output of instance listing
4443
              logging.warning("Orphan instance '%s' found on node %s",
4444
                              inst, name)
4445
        # else no instance is alive
4446
    else:
4447
      live_data = {}
4448

    
4449
    if query.IQ_DISKUSAGE in self.requested_data:
4450
      disk_usage = dict((inst.name,
4451
                         _ComputeDiskSize(inst.disk_template,
4452
                                          [{constants.IDISK_SIZE: disk.size}
4453
                                           for disk in inst.disks]))
4454
                        for inst in instance_list)
4455
    else:
4456
      disk_usage = None
4457

    
4458
    if query.IQ_CONSOLE in self.requested_data:
4459
      consinfo = {}
4460
      for inst in instance_list:
4461
        if inst.name in live_data:
4462
          # Instance is running
4463
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4464
        else:
4465
          consinfo[inst.name] = None
4466
      assert set(consinfo.keys()) == set(instance_names)
4467
    else:
4468
      consinfo = None
4469

    
4470
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4471
                                   disk_usage, offline_nodes, bad_nodes,
4472
                                   live_data, wrongnode_inst, consinfo)
4473

    
4474

    
4475
class LUQuery(NoHooksLU):
4476
  """Query for resources/items of a certain kind.
4477

4478
  """
4479
  # pylint: disable-msg=W0142
4480
  REQ_BGL = False
4481

    
4482
  def CheckArguments(self):
4483
    qcls = _GetQueryImplementation(self.op.what)
4484

    
4485
    self.impl = qcls(self.op.filter, self.op.fields, False)
4486

    
4487
  def ExpandNames(self):
4488
    self.impl.ExpandNames(self)
4489

    
4490
  def DeclareLocks(self, level):
4491
    self.impl.DeclareLocks(self, level)
4492

    
4493
  def Exec(self, feedback_fn):
4494
    return self.impl.NewStyleQuery(self)
4495

    
4496

    
4497
class LUQueryFields(NoHooksLU):
4498
  """Query for resources/items of a certain kind.
4499

4500
  """
4501
  # pylint: disable-msg=W0142
4502
  REQ_BGL = False
4503

    
4504
  def CheckArguments(self):
4505
    self.qcls = _GetQueryImplementation(self.op.what)
4506

    
4507
  def ExpandNames(self):
4508
    self.needed_locks = {}
4509

    
4510
  def Exec(self, feedback_fn):
4511
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4512

    
4513

    
4514
class LUNodeModifyStorage(NoHooksLU):
4515
  """Logical unit for modifying a storage volume on a node.
4516

4517
  """
4518
  REQ_BGL = False
4519

    
4520
  def CheckArguments(self):
4521
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4522

    
4523
    storage_type = self.op.storage_type
4524

    
4525
    try:
4526
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4527
    except KeyError:
4528
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4529
                                 " modified" % storage_type,
4530
                                 errors.ECODE_INVAL)
4531

    
4532
    diff = set(self.op.changes.keys()) - modifiable
4533
    if diff:
4534
      raise errors.OpPrereqError("The following fields can not be modified for"
4535
                                 " storage units of type '%s': %r" %
4536
                                 (storage_type, list(diff)),
4537
                                 errors.ECODE_INVAL)
4538

    
4539
  def ExpandNames(self):
4540
    self.needed_locks = {
4541
      locking.LEVEL_NODE: self.op.node_name,
4542
      }
4543

    
4544
  def Exec(self, feedback_fn):
4545
    """Computes the list of nodes and their attributes.
4546

4547
    """
4548
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4549
    result = self.rpc.call_storage_modify(self.op.node_name,
4550
                                          self.op.storage_type, st_args,
4551
                                          self.op.name, self.op.changes)
4552
    result.Raise("Failed to modify storage unit '%s' on %s" %
4553
                 (self.op.name, self.op.node_name))
4554

    
4555

    
4556
class LUNodeAdd(LogicalUnit):
4557
  """Logical unit for adding node to the cluster.
4558

4559
  """
4560
  HPATH = "node-add"
4561
  HTYPE = constants.HTYPE_NODE
4562
  _NFLAGS = ["master_capable", "vm_capable"]
4563

    
4564
  def CheckArguments(self):
4565
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4566
    # validate/normalize the node name
4567
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4568
                                         family=self.primary_ip_family)
4569
    self.op.node_name = self.hostname.name
4570

    
4571
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4572
      raise errors.OpPrereqError("Cannot readd the master node",
4573
                                 errors.ECODE_STATE)
4574

    
4575
    if self.op.readd and self.op.group:
4576
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4577
                                 " being readded", errors.ECODE_INVAL)
4578

    
4579
  def BuildHooksEnv(self):
4580
    """Build hooks env.
4581

4582
    This will run on all nodes before, and on all nodes + the new node after.
4583

4584
    """
4585
    return {
4586
      "OP_TARGET": self.op.node_name,
4587
      "NODE_NAME": self.op.node_name,
4588
      "NODE_PIP": self.op.primary_ip,
4589
      "NODE_SIP": self.op.secondary_ip,
4590
      "MASTER_CAPABLE": str(self.op.master_capable),
4591
      "VM_CAPABLE": str(self.op.vm_capable),
4592
      }
4593

    
4594
  def BuildHooksNodes(self):
4595
    """Build hooks nodes.
4596

4597
    """
4598
    # Exclude added node
4599
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4600
    post_nodes = pre_nodes + [self.op.node_name, ]
4601

    
4602
    return (pre_nodes, post_nodes)
4603

    
4604
  def CheckPrereq(self):
4605
    """Check prerequisites.
4606

4607
    This checks:
4608
     - the new node is not already in the config
4609
     - it is resolvable
4610
     - its parameters (single/dual homed) matches the cluster
4611

4612
    Any errors are signaled by raising errors.OpPrereqError.
4613

4614
    """
4615
    cfg = self.cfg
4616
    hostname = self.hostname
4617
    node = hostname.name
4618
    primary_ip = self.op.primary_ip = hostname.ip
4619
    if self.op.secondary_ip is None:
4620
      if self.primary_ip_family == netutils.IP6Address.family:
4621
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4622
                                   " IPv4 address must be given as secondary",
4623
                                   errors.ECODE_INVAL)
4624
      self.op.secondary_ip = primary_ip
4625

    
4626
    secondary_ip = self.op.secondary_ip
4627
    if not netutils.IP4Address.IsValid(secondary_ip):
4628
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4629
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4630

    
4631
    node_list = cfg.GetNodeList()
4632
    if not self.op.readd and node in node_list:
4633
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4634
                                 node, errors.ECODE_EXISTS)
4635
    elif self.op.readd and node not in node_list:
4636
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4637
                                 errors.ECODE_NOENT)
4638

    
4639
    self.changed_primary_ip = False
4640

    
4641
    for existing_node_name in node_list:
4642
      existing_node = cfg.GetNodeInfo(existing_node_name)
4643

    
4644
      if self.op.readd and node == existing_node_name:
4645
        if existing_node.secondary_ip != secondary_ip:
4646
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4647
                                     " address configuration as before",
4648
                                     errors.ECODE_INVAL)
4649
        if existing_node.primary_ip != primary_ip:
4650
          self.changed_primary_ip = True
4651

    
4652
        continue
4653

    
4654
      if (existing_node.primary_ip == primary_ip or
4655
          existing_node.secondary_ip == primary_ip or
4656
          existing_node.primary_ip == secondary_ip or
4657
          existing_node.secondary_ip == secondary_ip):
4658
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4659
                                   " existing node %s" % existing_node.name,
4660
                                   errors.ECODE_NOTUNIQUE)
4661

    
4662
    # After this 'if' block, None is no longer a valid value for the
4663
    # _capable op attributes
4664
    if self.op.readd:
4665
      old_node = self.cfg.GetNodeInfo(node)
4666
      assert old_node is not None, "Can't retrieve locked node %s" % node
4667
      for attr in self._NFLAGS:
4668
        if getattr(self.op, attr) is None:
4669
          setattr(self.op, attr, getattr(old_node, attr))
4670
    else:
4671
      for attr in self._NFLAGS:
4672
        if getattr(self.op, attr) is None:
4673
          setattr(self.op, attr, True)
4674

    
4675
    if self.op.readd and not self.op.vm_capable:
4676
      pri, sec = cfg.GetNodeInstances(node)
4677
      if pri or sec:
4678
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4679
                                   " flag set to false, but it already holds"
4680
                                   " instances" % node,
4681
                                   errors.ECODE_STATE)
4682

    
4683
    # check that the type of the node (single versus dual homed) is the
4684
    # same as for the master
4685
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4686
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4687
    newbie_singlehomed = secondary_ip == primary_ip
4688
    if master_singlehomed != newbie_singlehomed:
4689
      if master_singlehomed:
4690
        raise errors.OpPrereqError("The master has no secondary ip but the"
4691
                                   " new node has one",
4692
                                   errors.ECODE_INVAL)
4693
      else:
4694
        raise errors.OpPrereqError("The master has a secondary ip but the"
4695
                                   " new node doesn't have one",
4696
                                   errors.ECODE_INVAL)
4697

    
4698
    # checks reachability
4699
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4700
      raise errors.OpPrereqError("Node not reachable by ping",
4701
                                 errors.ECODE_ENVIRON)
4702

    
4703
    if not newbie_singlehomed:
4704
      # check reachability from my secondary ip to newbie's secondary ip
4705
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4706
                           source=myself.secondary_ip):
4707
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4708
                                   " based ping to node daemon port",
4709
                                   errors.ECODE_ENVIRON)
4710

    
4711
    if self.op.readd:
4712
      exceptions = [node]
4713
    else:
4714
      exceptions = []
4715

    
4716
    if self.op.master_capable:
4717
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4718
    else:
4719
      self.master_candidate = False
4720

    
4721
    if self.op.readd:
4722
      self.new_node = old_node
4723
    else:
4724
      node_group = cfg.LookupNodeGroup(self.op.group)
4725
      self.new_node = objects.Node(name=node,
4726
                                   primary_ip=primary_ip,
4727
                                   secondary_ip=secondary_ip,
4728
                                   master_candidate=self.master_candidate,
4729
                                   offline=False, drained=False,
4730
                                   group=node_group)
4731

    
4732
    if self.op.ndparams:
4733
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4734

    
4735
  def Exec(self, feedback_fn):
4736
    """Adds the new node to the cluster.
4737

4738
    """
4739
    new_node = self.new_node
4740
    node = new_node.name
4741

    
4742
    # We adding a new node so we assume it's powered
4743
    new_node.powered = True
4744

    
4745
    # for re-adds, reset the offline/drained/master-candidate flags;
4746
    # we need to reset here, otherwise offline would prevent RPC calls
4747
    # later in the procedure; this also means that if the re-add
4748
    # fails, we are left with a non-offlined, broken node
4749
    if self.op.readd:
4750
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4751
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4752
      # if we demote the node, we do cleanup later in the procedure
4753
      new_node.master_candidate = self.master_candidate
4754
      if self.changed_primary_ip:
4755
        new_node.primary_ip = self.op.primary_ip
4756

    
4757
    # copy the master/vm_capable flags
4758
    for attr in self._NFLAGS:
4759
      setattr(new_node, attr, getattr(self.op, attr))
4760

    
4761
    # notify the user about any possible mc promotion
4762
    if new_node.master_candidate:
4763
      self.LogInfo("Node will be a master candidate")
4764

    
4765
    if self.op.ndparams:
4766
      new_node.ndparams = self.op.ndparams
4767
    else:
4768
      new_node.ndparams = {}
4769

    
4770
    # check connectivity
4771
    result = self.rpc.call_version([node])[node]
4772
    result.Raise("Can't get version information from node %s" % node)
4773
    if constants.PROTOCOL_VERSION == result.payload:
4774
      logging.info("Communication to node %s fine, sw version %s match",
4775
                   node, result.payload)
4776
    else:
4777
      raise errors.OpExecError("Version mismatch master version %s,"
4778
                               " node version %s" %
4779
                               (constants.PROTOCOL_VERSION, result.payload))
4780

    
4781
    # Add node to our /etc/hosts, and add key to known_hosts
4782
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4783
      master_node = self.cfg.GetMasterNode()
4784
      result = self.rpc.call_etc_hosts_modify(master_node,
4785
                                              constants.ETC_HOSTS_ADD,
4786
                                              self.hostname.name,
4787
                                              self.hostname.ip)
4788
      result.Raise("Can't update hosts file with new host data")
4789

    
4790
    if new_node.secondary_ip != new_node.primary_ip:
4791
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4792
                               False)
4793

    
4794
    node_verify_list = [self.cfg.GetMasterNode()]
4795
    node_verify_param = {
4796
      constants.NV_NODELIST: [node],
4797
      # TODO: do a node-net-test as well?
4798
    }
4799

    
4800
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4801
                                       self.cfg.GetClusterName())
4802
    for verifier in node_verify_list:
4803
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4804
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4805
      if nl_payload:
4806
        for failed in nl_payload:
4807
          feedback_fn("ssh/hostname verification failed"
4808
                      " (checking from %s): %s" %
4809
                      (verifier, nl_payload[failed]))
4810
        raise errors.OpExecError("ssh/hostname verification failed")
4811

    
4812
    if self.op.readd:
4813
      _RedistributeAncillaryFiles(self)
4814
      self.context.ReaddNode(new_node)
4815
      # make sure we redistribute the config
4816
      self.cfg.Update(new_node, feedback_fn)
4817
      # and make sure the new node will not have old files around
4818
      if not new_node.master_candidate:
4819
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4820
        msg = result.fail_msg
4821
        if msg:
4822
          self.LogWarning("Node failed to demote itself from master"
4823
                          " candidate status: %s" % msg)
4824
    else:
4825
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4826
                                  additional_vm=self.op.vm_capable)
4827
      self.context.AddNode(new_node, self.proc.GetECId())
4828

    
4829

    
4830
class LUNodeSetParams(LogicalUnit):
4831
  """Modifies the parameters of a node.
4832

4833
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4834
      to the node role (as _ROLE_*)
4835
  @cvar _R2F: a dictionary from node role to tuples of flags
4836
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4837

4838
  """
4839
  HPATH = "node-modify"
4840
  HTYPE = constants.HTYPE_NODE
4841
  REQ_BGL = False
4842
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4843
  _F2R = {
4844
    (True, False, False): _ROLE_CANDIDATE,
4845
    (False, True, False): _ROLE_DRAINED,
4846
    (False, False, True): _ROLE_OFFLINE,
4847
    (False, False, False): _ROLE_REGULAR,
4848
    }
4849
  _R2F = dict((v, k) for k, v in _F2R.items())
4850
  _FLAGS = ["master_candidate", "drained", "offline"]
4851

    
4852
  def CheckArguments(self):
4853
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4854
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4855
                self.op.master_capable, self.op.vm_capable,
4856
                self.op.secondary_ip, self.op.ndparams]
4857
    if all_mods.count(None) == len(all_mods):
4858
      raise errors.OpPrereqError("Please pass at least one modification",
4859
                                 errors.ECODE_INVAL)
4860
    if all_mods.count(True) > 1:
4861
      raise errors.OpPrereqError("Can't set the node into more than one"
4862
                                 " state at the same time",
4863
                                 errors.ECODE_INVAL)
4864

    
4865
    # Boolean value that tells us whether we might be demoting from MC
4866
    self.might_demote = (self.op.master_candidate == False or
4867
                         self.op.offline == True or
4868
                         self.op.drained == True or
4869
                         self.op.master_capable == False)
4870

    
4871
    if self.op.secondary_ip:
4872
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4873
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4874
                                   " address" % self.op.secondary_ip,
4875
                                   errors.ECODE_INVAL)
4876

    
4877
    self.lock_all = self.op.auto_promote and self.might_demote
4878
    self.lock_instances = self.op.secondary_ip is not None
4879

    
4880
  def ExpandNames(self):
4881
    if self.lock_all:
4882
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4883
    else:
4884
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4885

    
4886
    if self.lock_instances:
4887
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4888

    
4889
  def DeclareLocks(self, level):
4890
    # If we have locked all instances, before waiting to lock nodes, release
4891
    # all the ones living on nodes unrelated to the current operation.
4892
    if level == locking.LEVEL_NODE and self.lock_instances:
4893
      self.affected_instances = []
4894
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4895
        instances_keep = []
4896

    
4897
        # Build list of instances to release
4898
        for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
4899
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4900
          if (instance.disk_template in constants.DTS_INT_MIRROR and
4901
              self.op.node_name in instance.all_nodes):
4902
            instances_keep.append(instance_name)
4903
            self.affected_instances.append(instance)
4904

    
4905
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
4906

    
4907
        assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
4908
                set(instances_keep))
4909

    
4910
  def BuildHooksEnv(self):
4911
    """Build hooks env.
4912

4913
    This runs on the master node.
4914

4915
    """
4916
    return {
4917
      "OP_TARGET": self.op.node_name,
4918
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4919
      "OFFLINE": str(self.op.offline),
4920
      "DRAINED": str(self.op.drained),
4921
      "MASTER_CAPABLE": str(self.op.master_capable),
4922
      "VM_CAPABLE": str(self.op.vm_capable),
4923
      }
4924

    
4925
  def BuildHooksNodes(self):
4926
    """Build hooks nodes.
4927

4928
    """
4929
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
4930
    return (nl, nl)
4931

    
4932
  def CheckPrereq(self):
4933
    """Check prerequisites.
4934

4935
    This only checks the instance list against the existing names.
4936

4937
    """
4938
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4939

    
4940
    if (self.op.master_candidate is not None or
4941
        self.op.drained is not None or
4942
        self.op.offline is not None):
4943
      # we can't change the master's node flags
4944
      if self.op.node_name == self.cfg.GetMasterNode():
4945
        raise errors.OpPrereqError("The master role can be changed"
4946
                                   " only via master-failover",
4947
                                   errors.ECODE_INVAL)
4948

    
4949
    if self.op.master_candidate and not node.master_capable:
4950
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4951
                                 " it a master candidate" % node.name,
4952
                                 errors.ECODE_STATE)
4953

    
4954
    if self.op.vm_capable == False:
4955
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4956
      if ipri or isec:
4957
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4958
                                   " the vm_capable flag" % node.name,
4959
                                   errors.ECODE_STATE)
4960

    
4961
    if node.master_candidate and self.might_demote and not self.lock_all:
4962
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
4963
      # check if after removing the current node, we're missing master
4964
      # candidates
4965
      (mc_remaining, mc_should, _) = \
4966
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4967
      if mc_remaining < mc_should:
4968
        raise errors.OpPrereqError("Not enough master candidates, please"
4969
                                   " pass auto promote option to allow"
4970
                                   " promotion", errors.ECODE_STATE)
4971

    
4972
    self.old_flags = old_flags = (node.master_candidate,
4973
                                  node.drained, node.offline)
4974
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4975
    self.old_role = old_role = self._F2R[old_flags]
4976

    
4977
    # Check for ineffective changes
4978
    for attr in self._FLAGS:
4979
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4980
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4981
        setattr(self.op, attr, None)
4982

    
4983
    # Past this point, any flag change to False means a transition
4984
    # away from the respective state, as only real changes are kept
4985

    
4986
    # TODO: We might query the real power state if it supports OOB
4987
    if _SupportsOob(self.cfg, node):
4988
      if self.op.offline is False and not (node.powered or
4989
                                           self.op.powered == True):
4990
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
4991
                                    " offline status can be reset") %
4992
                                   self.op.node_name)
4993
    elif self.op.powered is not None:
4994
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
4995
                                  " as it does not support out-of-band"
4996
                                  " handling") % self.op.node_name)
4997

    
4998
    # If we're being deofflined/drained, we'll MC ourself if needed
4999
    if (self.op.drained == False or self.op.offline == False or
5000
        (self.op.master_capable and not node.master_capable)):
5001
      if _DecideSelfPromotion(self):
5002
        self.op.master_candidate = True
5003
        self.LogInfo("Auto-promoting node to master candidate")
5004

    
5005
    # If we're no longer master capable, we'll demote ourselves from MC
5006
    if self.op.master_capable == False and node.master_candidate:
5007
      self.LogInfo("Demoting from master candidate")
5008
      self.op.master_candidate = False
5009

    
5010
    # Compute new role
5011
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5012
    if self.op.master_candidate:
5013
      new_role = self._ROLE_CANDIDATE
5014
    elif self.op.drained:
5015
      new_role = self._ROLE_DRAINED
5016
    elif self.op.offline:
5017
      new_role = self._ROLE_OFFLINE
5018
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5019
      # False is still in new flags, which means we're un-setting (the
5020
      # only) True flag
5021
      new_role = self._ROLE_REGULAR
5022
    else: # no new flags, nothing, keep old role
5023
      new_role = old_role
5024

    
5025
    self.new_role = new_role
5026

    
5027
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5028
      # Trying to transition out of offline status
5029
      result = self.rpc.call_version([node.name])[node.name]
5030
      if result.fail_msg:
5031
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5032
                                   " to report its version: %s" %
5033
                                   (node.name, result.fail_msg),
5034
                                   errors.ECODE_STATE)
5035
      else:
5036
        self.LogWarning("Transitioning node from offline to online state"
5037
                        " without using re-add. Please make sure the node"
5038
                        " is healthy!")
5039

    
5040
    if self.op.secondary_ip:
5041
      # Ok even without locking, because this can't be changed by any LU
5042
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5043
      master_singlehomed = master.secondary_ip == master.primary_ip
5044
      if master_singlehomed and self.op.secondary_ip:
5045
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5046
                                   " homed cluster", errors.ECODE_INVAL)
5047

    
5048
      if node.offline:
5049
        if self.affected_instances:
5050
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5051
                                     " node has instances (%s) configured"
5052
                                     " to use it" % self.affected_instances)
5053
      else:
5054
        # On online nodes, check that no instances are running, and that
5055
        # the node has the new ip and we can reach it.
5056
        for instance in self.affected_instances:
5057
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5058

    
5059
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5060
        if master.name != node.name:
5061
          # check reachability from master secondary ip to new secondary ip
5062
          if not netutils.TcpPing(self.op.secondary_ip,
5063
                                  constants.DEFAULT_NODED_PORT,
5064
                                  source=master.secondary_ip):
5065
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5066
                                       " based ping to node daemon port",
5067
                                       errors.ECODE_ENVIRON)
5068

    
5069
    if self.op.ndparams:
5070
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5071
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5072
      self.new_ndparams = new_ndparams
5073

    
5074
  def Exec(self, feedback_fn):
5075
    """Modifies a node.
5076

5077
    """
5078
    node = self.node
5079
    old_role = self.old_role
5080
    new_role = self.new_role
5081

    
5082
    result = []
5083

    
5084
    if self.op.ndparams:
5085
      node.ndparams = self.new_ndparams
5086

    
5087
    if self.op.powered is not None:
5088
      node.powered = self.op.powered
5089

    
5090
    for attr in ["master_capable", "vm_capable"]:
5091
      val = getattr(self.op, attr)
5092
      if val is not None:
5093
        setattr(node, attr, val)
5094
        result.append((attr, str(val)))
5095

    
5096
    if new_role != old_role:
5097
      # Tell the node to demote itself, if no longer MC and not offline
5098
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5099
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5100
        if msg:
5101
          self.LogWarning("Node failed to demote itself: %s", msg)
5102

    
5103
      new_flags = self._R2F[new_role]
5104
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5105
        if of != nf:
5106
          result.append((desc, str(nf)))
5107
      (node.master_candidate, node.drained, node.offline) = new_flags
5108

    
5109
      # we locked all nodes, we adjust the CP before updating this node
5110
      if self.lock_all:
5111
        _AdjustCandidatePool(self, [node.name])
5112

    
5113
    if self.op.secondary_ip:
5114
      node.secondary_ip = self.op.secondary_ip
5115
      result.append(("secondary_ip", self.op.secondary_ip))
5116

    
5117
    # this will trigger configuration file update, if needed
5118
    self.cfg.Update(node, feedback_fn)
5119

    
5120
    # this will trigger job queue propagation or cleanup if the mc
5121
    # flag changed
5122
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5123
      self.context.ReaddNode(node)
5124

    
5125
    return result
5126

    
5127

    
5128
class LUNodePowercycle(NoHooksLU):
5129
  """Powercycles a node.
5130

5131
  """
5132
  REQ_BGL = False
5133

    
5134
  def CheckArguments(self):
5135
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5136
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5137
      raise errors.OpPrereqError("The node is the master and the force"
5138
                                 " parameter was not set",
5139
                                 errors.ECODE_INVAL)
5140

    
5141
  def ExpandNames(self):
5142
    """Locking for PowercycleNode.
5143

5144
    This is a last-resort option and shouldn't block on other
5145
    jobs. Therefore, we grab no locks.
5146

5147
    """
5148
    self.needed_locks = {}
5149

    
5150
  def Exec(self, feedback_fn):
5151
    """Reboots a node.
5152

5153
    """
5154
    result = self.rpc.call_node_powercycle(self.op.node_name,
5155
                                           self.cfg.GetHypervisorType())
5156
    result.Raise("Failed to schedule the reboot")
5157
    return result.payload
5158

    
5159

    
5160
class LUClusterQuery(NoHooksLU):
5161
  """Query cluster configuration.
5162

5163
  """
5164
  REQ_BGL = False
5165

    
5166
  def ExpandNames(self):
5167
    self.needed_locks = {}
5168

    
5169
  def Exec(self, feedback_fn):
5170
    """Return cluster config.
5171

5172
    """
5173
    cluster = self.cfg.GetClusterInfo()
5174
    os_hvp = {}
5175

    
5176
    # Filter just for enabled hypervisors
5177
    for os_name, hv_dict in cluster.os_hvp.items():
5178
      os_hvp[os_name] = {}
5179
      for hv_name, hv_params in hv_dict.items():
5180
        if hv_name in cluster.enabled_hypervisors:
5181
          os_hvp[os_name][hv_name] = hv_params
5182

    
5183
    # Convert ip_family to ip_version
5184
    primary_ip_version = constants.IP4_VERSION
5185
    if cluster.primary_ip_family == netutils.IP6Address.family:
5186
      primary_ip_version = constants.IP6_VERSION
5187

    
5188
    result = {
5189
      "software_version": constants.RELEASE_VERSION,
5190
      "protocol_version": constants.PROTOCOL_VERSION,
5191
      "config_version": constants.CONFIG_VERSION,
5192
      "os_api_version": max(constants.OS_API_VERSIONS),
5193
      "export_version": constants.EXPORT_VERSION,
5194
      "architecture": (platform.architecture()[0], platform.machine()),
5195
      "name": cluster.cluster_name,
5196
      "master": cluster.master_node,
5197
      "default_hypervisor": cluster.enabled_hypervisors[0],
5198
      "enabled_hypervisors": cluster.enabled_hypervisors,
5199
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5200
                        for hypervisor_name in cluster.enabled_hypervisors]),
5201
      "os_hvp": os_hvp,
5202
      "beparams": cluster.beparams,
5203
      "osparams": cluster.osparams,
5204
      "nicparams": cluster.nicparams,
5205
      "ndparams": cluster.ndparams,
5206
      "candidate_pool_size": cluster.candidate_pool_size,
5207
      "master_netdev": cluster.master_netdev,
5208
      "volume_group_name": cluster.volume_group_name,
5209
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5210
      "file_storage_dir": cluster.file_storage_dir,
5211
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5212
      "maintain_node_health": cluster.maintain_node_health,
5213
      "ctime": cluster.ctime,
5214
      "mtime": cluster.mtime,
5215
      "uuid": cluster.uuid,
5216
      "tags": list(cluster.GetTags()),
5217
      "uid_pool": cluster.uid_pool,
5218
      "default_iallocator": cluster.default_iallocator,
5219
      "reserved_lvs": cluster.reserved_lvs,
5220
      "primary_ip_version": primary_ip_version,
5221
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5222
      "hidden_os": cluster.hidden_os,
5223
      "blacklisted_os": cluster.blacklisted_os,
5224
      }
5225

    
5226
    return result
5227

    
5228

    
5229
class LUClusterConfigQuery(NoHooksLU):
5230
  """Return configuration values.
5231

5232
  """
5233
  REQ_BGL = False
5234
  _FIELDS_DYNAMIC = utils.FieldSet()
5235
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5236
                                  "watcher_pause", "volume_group_name")
5237

    
5238
  def CheckArguments(self):
5239
    _CheckOutputFields(static=self._FIELDS_STATIC,
5240
                       dynamic=self._FIELDS_DYNAMIC,
5241
                       selected=self.op.output_fields)
5242

    
5243
  def ExpandNames(self):
5244
    self.needed_locks = {}
5245

    
5246
  def Exec(self, feedback_fn):
5247
    """Dump a representation of the cluster config to the standard output.
5248

5249
    """
5250
    values = []
5251
    for field in self.op.output_fields:
5252
      if field == "cluster_name":
5253
        entry = self.cfg.GetClusterName()
5254
      elif field == "master_node":
5255
        entry = self.cfg.GetMasterNode()
5256
      elif field == "drain_flag":
5257
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5258
      elif field == "watcher_pause":
5259
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5260
      elif field == "volume_group_name":
5261
        entry = self.cfg.GetVGName()
5262
      else:
5263
        raise errors.ParameterError(field)
5264
      values.append(entry)
5265
    return values
5266

    
5267

    
5268
class LUInstanceActivateDisks(NoHooksLU):
5269
  """Bring up an instance's disks.
5270

5271
  """
5272
  REQ_BGL = False
5273

    
5274
  def ExpandNames(self):
5275
    self._ExpandAndLockInstance()
5276
    self.needed_locks[locking.LEVEL_NODE] = []
5277
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5278

    
5279
  def DeclareLocks(self, level):
5280
    if level == locking.LEVEL_NODE:
5281
      self._LockInstancesNodes()
5282

    
5283
  def CheckPrereq(self):
5284
    """Check prerequisites.
5285

5286
    This checks that the instance is in the cluster.
5287

5288
    """
5289
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5290
    assert self.instance is not None, \
5291
      "Cannot retrieve locked instance %s" % self.op.instance_name
5292
    _CheckNodeOnline(self, self.instance.primary_node)
5293

    
5294
  def Exec(self, feedback_fn):
5295
    """Activate the disks.
5296

5297
    """
5298
    disks_ok, disks_info = \
5299
              _AssembleInstanceDisks(self, self.instance,
5300
                                     ignore_size=self.op.ignore_size)
5301
    if not disks_ok:
5302
      raise errors.OpExecError("Cannot activate block devices")
5303

    
5304
    return disks_info
5305

    
5306

    
5307
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5308
                           ignore_size=False):
5309
  """Prepare the block devices for an instance.
5310

5311
  This sets up the block devices on all nodes.
5312

5313
  @type lu: L{LogicalUnit}
5314
  @param lu: the logical unit on whose behalf we execute
5315
  @type instance: L{objects.Instance}
5316
  @param instance: the instance for whose disks we assemble
5317
  @type disks: list of L{objects.Disk} or None
5318
  @param disks: which disks to assemble (or all, if None)
5319
  @type ignore_secondaries: boolean
5320
  @param ignore_secondaries: if true, errors on secondary nodes
5321
      won't result in an error return from the function
5322
  @type ignore_size: boolean
5323
  @param ignore_size: if true, the current known size of the disk
5324
      will not be used during the disk activation, useful for cases
5325
      when the size is wrong
5326
  @return: False if the operation failed, otherwise a list of
5327
      (host, instance_visible_name, node_visible_name)
5328
      with the mapping from node devices to instance devices
5329

5330
  """
5331
  device_info = []
5332
  disks_ok = True
5333
  iname = instance.name
5334
  disks = _ExpandCheckDisks(instance, disks)
5335

    
5336
  # With the two passes mechanism we try to reduce the window of
5337
  # opportunity for the race condition of switching DRBD to primary
5338
  # before handshaking occured, but we do not eliminate it
5339

    
5340
  # The proper fix would be to wait (with some limits) until the
5341
  # connection has been made and drbd transitions from WFConnection
5342
  # into any other network-connected state (Connected, SyncTarget,
5343
  # SyncSource, etc.)
5344

    
5345
  # 1st pass, assemble on all nodes in secondary mode
5346
  for idx, inst_disk in enumerate(disks):
5347
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5348
      if ignore_size:
5349
        node_disk = node_disk.Copy()
5350
        node_disk.UnsetSize()
5351
      lu.cfg.SetDiskID(node_disk, node)
5352
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5353
      msg = result.fail_msg
5354
      if msg:
5355
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5356
                           " (is_primary=False, pass=1): %s",
5357
                           inst_disk.iv_name, node, msg)
5358
        if not ignore_secondaries:
5359
          disks_ok = False
5360

    
5361
  # FIXME: race condition on drbd migration to primary
5362

    
5363
  # 2nd pass, do only the primary node
5364
  for idx, inst_disk in enumerate(disks):
5365
    dev_path = None
5366

    
5367
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5368
      if node != instance.primary_node:
5369
        continue
5370
      if ignore_size:
5371
        node_disk = node_disk.Copy()
5372
        node_disk.UnsetSize()
5373
      lu.cfg.SetDiskID(node_disk, node)
5374
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5375
      msg = result.fail_msg
5376
      if msg:
5377
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5378
                           " (is_primary=True, pass=2): %s",
5379
                           inst_disk.iv_name, node, msg)
5380
        disks_ok = False
5381
      else:
5382
        dev_path = result.payload
5383

    
5384
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5385

    
5386
  # leave the disks configured for the primary node
5387
  # this is a workaround that would be fixed better by
5388
  # improving the logical/physical id handling
5389
  for disk in disks:
5390
    lu.cfg.SetDiskID(disk, instance.primary_node)
5391

    
5392
  return disks_ok, device_info
5393

    
5394

    
5395
def _StartInstanceDisks(lu, instance, force):
5396
  """Start the disks of an instance.
5397

5398
  """
5399
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5400
                                           ignore_secondaries=force)
5401
  if not disks_ok:
5402
    _ShutdownInstanceDisks(lu, instance)
5403
    if force is not None and not force:
5404
      lu.proc.LogWarning("", hint="If the message above refers to a"
5405
                         " secondary node,"
5406
                         " you can retry the operation using '--force'.")
5407
    raise errors.OpExecError("Disk consistency error")
5408

    
5409

    
5410
class LUInstanceDeactivateDisks(NoHooksLU):
5411
  """Shutdown an instance's disks.
5412

5413
  """
5414
  REQ_BGL = False
5415

    
5416
  def ExpandNames(self):
5417
    self._ExpandAndLockInstance()
5418
    self.needed_locks[locking.LEVEL_NODE] = []
5419
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5420

    
5421
  def DeclareLocks(self, level):
5422
    if level == locking.LEVEL_NODE:
5423
      self._LockInstancesNodes()
5424

    
5425
  def CheckPrereq(self):
5426
    """Check prerequisites.
5427

5428
    This checks that the instance is in the cluster.
5429

5430
    """
5431
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5432
    assert self.instance is not None, \
5433
      "Cannot retrieve locked instance %s" % self.op.instance_name
5434

    
5435
  def Exec(self, feedback_fn):
5436
    """Deactivate the disks
5437

5438
    """
5439
    instance = self.instance
5440
    if self.op.force:
5441
      _ShutdownInstanceDisks(self, instance)
5442
    else:
5443
      _SafeShutdownInstanceDisks(self, instance)
5444

    
5445

    
5446
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5447
  """Shutdown block devices of an instance.
5448

5449
  This function checks if an instance is running, before calling
5450
  _ShutdownInstanceDisks.
5451

5452
  """
5453
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5454
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5455

    
5456

    
5457
def _ExpandCheckDisks(instance, disks):
5458
  """Return the instance disks selected by the disks list
5459

5460
  @type disks: list of L{objects.Disk} or None
5461
  @param disks: selected disks
5462
  @rtype: list of L{objects.Disk}
5463
  @return: selected instance disks to act on
5464

5465
  """
5466
  if disks is None:
5467
    return instance.disks
5468
  else:
5469
    if not set(disks).issubset(instance.disks):
5470
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5471
                                   " target instance")
5472
    return disks
5473

    
5474

    
5475
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5476
  """Shutdown block devices of an instance.
5477

5478
  This does the shutdown on all nodes of the instance.
5479

5480
  If the ignore_primary is false, errors on the primary node are
5481
  ignored.
5482

5483
  """
5484
  all_result = True
5485
  disks = _ExpandCheckDisks(instance, disks)
5486

    
5487
  for disk in disks:
5488
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5489
      lu.cfg.SetDiskID(top_disk, node)
5490
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5491
      msg = result.fail_msg
5492
      if msg:
5493
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5494
                      disk.iv_name, node, msg)
5495
        if ((node == instance.primary_node and not ignore_primary) or
5496
            (node != instance.primary_node and not result.offline)):
5497
          all_result = False
5498
  return all_result
5499

    
5500

    
5501
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5502
  """Checks if a node has enough free memory.
5503

5504
  This function check if a given node has the needed amount of free
5505
  memory. In case the node has less memory or we cannot get the
5506
  information from the node, this function raise an OpPrereqError
5507
  exception.
5508

5509
  @type lu: C{LogicalUnit}
5510
  @param lu: a logical unit from which we get configuration data
5511
  @type node: C{str}
5512
  @param node: the node to check
5513
  @type reason: C{str}
5514
  @param reason: string to use in the error message
5515
  @type requested: C{int}
5516
  @param requested: the amount of memory in MiB to check for
5517
  @type hypervisor_name: C{str}
5518
  @param hypervisor_name: the hypervisor to ask for memory stats
5519
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5520
      we cannot check the node
5521

5522
  """
5523
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5524
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5525
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5526
  free_mem = nodeinfo[node].payload.get('memory_free', None)
5527
  if not isinstance(free_mem, int):
5528
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5529
                               " was '%s'" % (node, free_mem),
5530
                               errors.ECODE_ENVIRON)
5531
  if requested > free_mem:
5532
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5533
                               " needed %s MiB, available %s MiB" %
5534
                               (node, reason, requested, free_mem),
5535
                               errors.ECODE_NORES)
5536

    
5537

    
5538
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5539
  """Checks if nodes have enough free disk space in the all VGs.
5540

5541
  This function check if all given nodes have the needed amount of
5542
  free disk. In case any node has less disk or we cannot get the
5543
  information from the node, this function raise an OpPrereqError
5544
  exception.
5545

5546
  @type lu: C{LogicalUnit}
5547
  @param lu: a logical unit from which we get configuration data
5548
  @type nodenames: C{list}
5549
  @param nodenames: the list of node names to check
5550
  @type req_sizes: C{dict}
5551
  @param req_sizes: the hash of vg and corresponding amount of disk in
5552
      MiB to check for
5553
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5554
      or we cannot check the node
5555

5556
  """
5557
  for vg, req_size in req_sizes.items():
5558
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5559

    
5560

    
5561
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5562
  """Checks if nodes have enough free disk space in the specified VG.
5563

5564
  This function check if all given nodes have the needed amount of
5565
  free disk. In case any node has less disk or we cannot get the
5566
  information from the node, this function raise an OpPrereqError
5567
  exception.
5568

5569
  @type lu: C{LogicalUnit}
5570
  @param lu: a logical unit from which we get configuration data
5571
  @type nodenames: C{list}
5572
  @param nodenames: the list of node names to check
5573
  @type vg: C{str}
5574
  @param vg: the volume group to check
5575
  @type requested: C{int}
5576
  @param requested: the amount of disk in MiB to check for
5577
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5578
      or we cannot check the node
5579

5580
  """
5581
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5582
  for node in nodenames:
5583
    info = nodeinfo[node]
5584
    info.Raise("Cannot get current information from node %s" % node,
5585
               prereq=True, ecode=errors.ECODE_ENVIRON)
5586
    vg_free = info.payload.get("vg_free", None)
5587
    if not isinstance(vg_free, int):
5588
      raise errors.OpPrereqError("Can't compute free disk space on node"
5589
                                 " %s for vg %s, result was '%s'" %
5590
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5591
    if requested > vg_free:
5592
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5593
                                 " vg %s: required %d MiB, available %d MiB" %
5594
                                 (node, vg, requested, vg_free),
5595
                                 errors.ECODE_NORES)
5596

    
5597

    
5598
class LUInstanceStartup(LogicalUnit):
5599
  """Starts an instance.
5600

5601
  """
5602
  HPATH = "instance-start"
5603
  HTYPE = constants.HTYPE_INSTANCE
5604
  REQ_BGL = False
5605

    
5606
  def CheckArguments(self):
5607
    # extra beparams
5608
    if self.op.beparams:
5609
      # fill the beparams dict
5610
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5611

    
5612
  def ExpandNames(self):
5613
    self._ExpandAndLockInstance()
5614

    
5615
  def BuildHooksEnv(self):
5616
    """Build hooks env.
5617

5618
    This runs on master, primary and secondary nodes of the instance.
5619

5620
    """
5621
    env = {
5622
      "FORCE": self.op.force,
5623
      }
5624

    
5625
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5626

    
5627
    return env
5628

    
5629
  def BuildHooksNodes(self):
5630
    """Build hooks nodes.
5631

5632
    """
5633
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5634
    return (nl, nl)
5635

    
5636
  def CheckPrereq(self):
5637
    """Check prerequisites.
5638

5639
    This checks that the instance is in the cluster.
5640

5641
    """
5642
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5643
    assert self.instance is not None, \
5644
      "Cannot retrieve locked instance %s" % self.op.instance_name
5645

    
5646
    # extra hvparams
5647
    if self.op.hvparams:
5648
      # check hypervisor parameter syntax (locally)
5649
      cluster = self.cfg.GetClusterInfo()
5650
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5651
      filled_hvp = cluster.FillHV(instance)
5652
      filled_hvp.update(self.op.hvparams)
5653
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5654
      hv_type.CheckParameterSyntax(filled_hvp)
5655
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5656

    
5657
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5658

    
5659
    if self.primary_offline and self.op.ignore_offline_nodes:
5660
      self.proc.LogWarning("Ignoring offline primary node")
5661

    
5662
      if self.op.hvparams or self.op.beparams:
5663
        self.proc.LogWarning("Overridden parameters are ignored")
5664
    else:
5665
      _CheckNodeOnline(self, instance.primary_node)
5666

    
5667
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5668

    
5669
      # check bridges existence
5670
      _CheckInstanceBridgesExist(self, instance)
5671

    
5672
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5673
                                                instance.name,
5674
                                                instance.hypervisor)
5675
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5676
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5677
      if not remote_info.payload: # not running already
5678
        _CheckNodeFreeMemory(self, instance.primary_node,
5679
                             "starting instance %s" % instance.name,
5680
                             bep[constants.BE_MEMORY], instance.hypervisor)
5681

    
5682
  def Exec(self, feedback_fn):
5683
    """Start the instance.
5684

5685
    """
5686
    instance = self.instance
5687
    force = self.op.force
5688

    
5689
    if not self.op.no_remember:
5690
      self.cfg.MarkInstanceUp(instance.name)
5691

    
5692
    if self.primary_offline:
5693
      assert self.op.ignore_offline_nodes
5694
      self.proc.LogInfo("Primary node offline, marked instance as started")
5695
    else:
5696
      node_current = instance.primary_node
5697

    
5698
      _StartInstanceDisks(self, instance, force)
5699

    
5700
      result = self.rpc.call_instance_start(node_current, instance,
5701
                                            self.op.hvparams, self.op.beparams)
5702
      msg = result.fail_msg
5703
      if msg:
5704
        _ShutdownInstanceDisks(self, instance)
5705
        raise errors.OpExecError("Could not start instance: %s" % msg)
5706

    
5707

    
5708
class LUInstanceReboot(LogicalUnit):
5709
  """Reboot an instance.
5710

5711
  """
5712
  HPATH = "instance-reboot"
5713
  HTYPE = constants.HTYPE_INSTANCE
5714
  REQ_BGL = False
5715

    
5716
  def ExpandNames(self):
5717
    self._ExpandAndLockInstance()
5718

    
5719
  def BuildHooksEnv(self):
5720
    """Build hooks env.
5721

5722
    This runs on master, primary and secondary nodes of the instance.
5723

5724
    """
5725
    env = {
5726
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5727
      "REBOOT_TYPE": self.op.reboot_type,
5728
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5729
      }
5730

    
5731
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5732

    
5733
    return env
5734

    
5735
  def BuildHooksNodes(self):
5736
    """Build hooks nodes.
5737

5738
    """
5739
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5740
    return (nl, nl)
5741

    
5742
  def CheckPrereq(self):
5743
    """Check prerequisites.
5744

5745
    This checks that the instance is in the cluster.
5746

5747
    """
5748
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5749
    assert self.instance is not None, \
5750
      "Cannot retrieve locked instance %s" % self.op.instance_name
5751

    
5752
    _CheckNodeOnline(self, instance.primary_node)
5753

    
5754
    # check bridges existence
5755
    _CheckInstanceBridgesExist(self, instance)
5756

    
5757
  def Exec(self, feedback_fn):
5758
    """Reboot the instance.
5759

5760
    """
5761
    instance = self.instance
5762
    ignore_secondaries = self.op.ignore_secondaries
5763
    reboot_type = self.op.reboot_type
5764

    
5765
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5766
                                              instance.name,
5767
                                              instance.hypervisor)
5768
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5769
    instance_running = bool(remote_info.payload)
5770

    
5771
    node_current = instance.primary_node
5772

    
5773
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5774
                                            constants.INSTANCE_REBOOT_HARD]:
5775
      for disk in instance.disks:
5776
        self.cfg.SetDiskID(disk, node_current)
5777
      result = self.rpc.call_instance_reboot(node_current, instance,
5778
                                             reboot_type,
5779
                                             self.op.shutdown_timeout)
5780
      result.Raise("Could not reboot instance")
5781
    else:
5782
      if instance_running:
5783
        result = self.rpc.call_instance_shutdown(node_current, instance,
5784
                                                 self.op.shutdown_timeout)
5785
        result.Raise("Could not shutdown instance for full reboot")
5786
        _ShutdownInstanceDisks(self, instance)
5787
      else:
5788
        self.LogInfo("Instance %s was already stopped, starting now",
5789
                     instance.name)
5790
      _StartInstanceDisks(self, instance, ignore_secondaries)
5791
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5792
      msg = result.fail_msg
5793
      if msg:
5794
        _ShutdownInstanceDisks(self, instance)
5795
        raise errors.OpExecError("Could not start instance for"
5796
                                 " full reboot: %s" % msg)
5797

    
5798
    self.cfg.MarkInstanceUp(instance.name)
5799

    
5800

    
5801
class LUInstanceShutdown(LogicalUnit):
5802
  """Shutdown an instance.
5803

5804
  """
5805
  HPATH = "instance-stop"
5806
  HTYPE = constants.HTYPE_INSTANCE
5807
  REQ_BGL = False
5808

    
5809
  def ExpandNames(self):
5810
    self._ExpandAndLockInstance()
5811

    
5812
  def BuildHooksEnv(self):
5813
    """Build hooks env.
5814

5815
    This runs on master, primary and secondary nodes of the instance.
5816

5817
    """
5818
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5819
    env["TIMEOUT"] = self.op.timeout
5820
    return env
5821

    
5822
  def BuildHooksNodes(self):
5823
    """Build hooks nodes.
5824

5825
    """
5826
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5827
    return (nl, nl)
5828

    
5829
  def CheckPrereq(self):
5830
    """Check prerequisites.
5831

5832
    This checks that the instance is in the cluster.
5833

5834
    """
5835
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5836
    assert self.instance is not None, \
5837
      "Cannot retrieve locked instance %s" % self.op.instance_name
5838

    
5839
    self.primary_offline = \
5840
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5841

    
5842
    if self.primary_offline and self.op.ignore_offline_nodes:
5843
      self.proc.LogWarning("Ignoring offline primary node")
5844
    else:
5845
      _CheckNodeOnline(self, self.instance.primary_node)
5846

    
5847
  def Exec(self, feedback_fn):
5848
    """Shutdown the instance.
5849

5850
    """
5851
    instance = self.instance
5852
    node_current = instance.primary_node
5853
    timeout = self.op.timeout
5854

    
5855
    if not self.op.no_remember:
5856
      self.cfg.MarkInstanceDown(instance.name)
5857

    
5858
    if self.primary_offline:
5859
      assert self.op.ignore_offline_nodes
5860
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5861
    else:
5862
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5863
      msg = result.fail_msg
5864
      if msg:
5865
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5866

    
5867
      _ShutdownInstanceDisks(self, instance)
5868

    
5869

    
5870
class LUInstanceReinstall(LogicalUnit):
5871
  """Reinstall an instance.
5872

5873
  """
5874
  HPATH = "instance-reinstall"
5875
  HTYPE = constants.HTYPE_INSTANCE
5876
  REQ_BGL = False
5877

    
5878
  def ExpandNames(self):
5879
    self._ExpandAndLockInstance()
5880

    
5881
  def BuildHooksEnv(self):
5882
    """Build hooks env.
5883

5884
    This runs on master, primary and secondary nodes of the instance.
5885

5886
    """
5887
    return _BuildInstanceHookEnvByObject(self, self.instance)
5888

    
5889
  def BuildHooksNodes(self):
5890
    """Build hooks nodes.
5891

5892
    """
5893
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5894
    return (nl, nl)
5895

    
5896
  def CheckPrereq(self):
5897
    """Check prerequisites.
5898

5899
    This checks that the instance is in the cluster and is not running.
5900

5901
    """
5902
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5903
    assert instance is not None, \
5904
      "Cannot retrieve locked instance %s" % self.op.instance_name
5905
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5906
                     " offline, cannot reinstall")
5907
    for node in instance.secondary_nodes:
5908
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5909
                       " cannot reinstall")
5910

    
5911
    if instance.disk_template == constants.DT_DISKLESS:
5912
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5913
                                 self.op.instance_name,
5914
                                 errors.ECODE_INVAL)
5915
    _CheckInstanceDown(self, instance, "cannot reinstall")
5916

    
5917
    if self.op.os_type is not None:
5918
      # OS verification
5919
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5920
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5921
      instance_os = self.op.os_type
5922
    else:
5923
      instance_os = instance.os
5924

    
5925
    nodelist = list(instance.all_nodes)
5926

    
5927
    if self.op.osparams:
5928
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5929
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5930
      self.os_inst = i_osdict # the new dict (without defaults)
5931
    else:
5932
      self.os_inst = None
5933

    
5934
    self.instance = instance
5935

    
5936
  def Exec(self, feedback_fn):
5937
    """Reinstall the instance.
5938

5939
    """
5940
    inst = self.instance
5941

    
5942
    if self.op.os_type is not None:
5943
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5944
      inst.os = self.op.os_type
5945
      # Write to configuration
5946
      self.cfg.Update(inst, feedback_fn)
5947

    
5948
    _StartInstanceDisks(self, inst, None)
5949
    try:
5950
      feedback_fn("Running the instance OS create scripts...")
5951
      # FIXME: pass debug option from opcode to backend
5952
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5953
                                             self.op.debug_level,
5954
                                             osparams=self.os_inst)
5955
      result.Raise("Could not install OS for instance %s on node %s" %
5956
                   (inst.name, inst.primary_node))
5957
    finally:
5958
      _ShutdownInstanceDisks(self, inst)
5959

    
5960

    
5961
class LUInstanceRecreateDisks(LogicalUnit):
5962
  """Recreate an instance's missing disks.
5963

5964
  """
5965
  HPATH = "instance-recreate-disks"
5966
  HTYPE = constants.HTYPE_INSTANCE
5967
  REQ_BGL = False
5968

    
5969
  def CheckArguments(self):
5970
    # normalise the disk list
5971
    self.op.disks = sorted(frozenset(self.op.disks))
5972

    
5973
  def ExpandNames(self):
5974
    self._ExpandAndLockInstance()
5975
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5976
    if self.op.nodes:
5977
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
5978
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
5979
    else:
5980
      self.needed_locks[locking.LEVEL_NODE] = []
5981

    
5982
  def DeclareLocks(self, level):
5983
    if level == locking.LEVEL_NODE:
5984
      # if we replace the nodes, we only need to lock the old primary,
5985
      # otherwise we need to lock all nodes for disk re-creation
5986
      primary_only = bool(self.op.nodes)
5987
      self._LockInstancesNodes(primary_only=primary_only)
5988

    
5989
  def BuildHooksEnv(self):
5990
    """Build hooks env.
5991

5992
    This runs on master, primary and secondary nodes of the instance.
5993

5994
    """
5995
    return _BuildInstanceHookEnvByObject(self, self.instance)
5996

    
5997
  def BuildHooksNodes(self):
5998
    """Build hooks nodes.
5999

6000
    """
6001
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6002
    return (nl, nl)
6003

    
6004
  def CheckPrereq(self):
6005
    """Check prerequisites.
6006

6007
    This checks that the instance is in the cluster and is not running.
6008

6009
    """
6010
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6011
    assert instance is not None, \
6012
      "Cannot retrieve locked instance %s" % self.op.instance_name
6013
    if self.op.nodes:
6014
      if len(self.op.nodes) != len(instance.all_nodes):
6015
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6016
                                   " %d replacement nodes were specified" %
6017
                                   (instance.name, len(instance.all_nodes),
6018
                                    len(self.op.nodes)),
6019
                                   errors.ECODE_INVAL)
6020
      assert instance.disk_template != constants.DT_DRBD8 or \
6021
          len(self.op.nodes) == 2
6022
      assert instance.disk_template != constants.DT_PLAIN or \
6023
          len(self.op.nodes) == 1
6024
      primary_node = self.op.nodes[0]
6025
    else:
6026
      primary_node = instance.primary_node
6027
    _CheckNodeOnline(self, primary_node)
6028

    
6029
    if instance.disk_template == constants.DT_DISKLESS:
6030
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6031
                                 self.op.instance_name, errors.ECODE_INVAL)
6032
    # if we replace nodes *and* the old primary is offline, we don't
6033
    # check
6034
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6035
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6036
    if not (self.op.nodes and old_pnode.offline):
6037
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6038

    
6039
    if not self.op.disks:
6040
      self.op.disks = range(len(instance.disks))
6041
    else:
6042
      for idx in self.op.disks:
6043
        if idx >= len(instance.disks):
6044
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6045
                                     errors.ECODE_INVAL)
6046
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6047
      raise errors.OpPrereqError("Can't recreate disks partially and"
6048
                                 " change the nodes at the same time",
6049
                                 errors.ECODE_INVAL)
6050
    self.instance = instance
6051

    
6052
  def Exec(self, feedback_fn):
6053
    """Recreate the disks.
6054

6055
    """
6056
    # change primary node, if needed
6057
    if self.op.nodes:
6058
      self.instance.primary_node = self.op.nodes[0]
6059
      self.LogWarning("Changing the instance's nodes, you will have to"
6060
                      " remove any disks left on the older nodes manually")
6061

    
6062
    to_skip = []
6063
    for idx, disk in enumerate(self.instance.disks):
6064
      if idx not in self.op.disks: # disk idx has not been passed in
6065
        to_skip.append(idx)
6066
        continue
6067
      # update secondaries for disks, if needed
6068
      if self.op.nodes:
6069
        if disk.dev_type == constants.LD_DRBD8:
6070
          # need to update the nodes
6071
          assert len(self.op.nodes) == 2
6072
          logical_id = list(disk.logical_id)
6073
          logical_id[0] = self.op.nodes[0]
6074
          logical_id[1] = self.op.nodes[1]
6075
          disk.logical_id = tuple(logical_id)
6076

    
6077
    if self.op.nodes:
6078
      self.cfg.Update(self.instance, feedback_fn)
6079

    
6080
    _CreateDisks(self, self.instance, to_skip=to_skip)
6081

    
6082

    
6083
class LUInstanceRename(LogicalUnit):
6084
  """Rename an instance.
6085

6086
  """
6087
  HPATH = "instance-rename"
6088
  HTYPE = constants.HTYPE_INSTANCE
6089

    
6090
  def CheckArguments(self):
6091
    """Check arguments.
6092

6093
    """
6094
    if self.op.ip_check and not self.op.name_check:
6095
      # TODO: make the ip check more flexible and not depend on the name check
6096
      raise errors.OpPrereqError("IP address check requires a name check",
6097
                                 errors.ECODE_INVAL)
6098

    
6099
  def BuildHooksEnv(self):
6100
    """Build hooks env.
6101

6102
    This runs on master, primary and secondary nodes of the instance.
6103

6104
    """
6105
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6106
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6107
    return env
6108

    
6109
  def BuildHooksNodes(self):
6110
    """Build hooks nodes.
6111

6112
    """
6113
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6114
    return (nl, nl)
6115

    
6116
  def CheckPrereq(self):
6117
    """Check prerequisites.
6118

6119
    This checks that the instance is in the cluster and is not running.
6120

6121
    """
6122
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6123
                                                self.op.instance_name)
6124
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6125
    assert instance is not None
6126
    _CheckNodeOnline(self, instance.primary_node)
6127
    _CheckInstanceDown(self, instance, "cannot rename")
6128
    self.instance = instance
6129

    
6130
    new_name = self.op.new_name
6131
    if self.op.name_check:
6132
      hostname = netutils.GetHostname(name=new_name)
6133
      if hostname != new_name:
6134
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6135
                     hostname.name)
6136
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6137
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6138
                                    " same as given hostname '%s'") %
6139
                                    (hostname.name, self.op.new_name),
6140
                                    errors.ECODE_INVAL)
6141
      new_name = self.op.new_name = hostname.name
6142
      if (self.op.ip_check and
6143
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6144
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6145
                                   (hostname.ip, new_name),
6146
                                   errors.ECODE_NOTUNIQUE)
6147

    
6148
    instance_list = self.cfg.GetInstanceList()
6149
    if new_name in instance_list and new_name != instance.name:
6150
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6151
                                 new_name, errors.ECODE_EXISTS)
6152

    
6153
  def Exec(self, feedback_fn):
6154
    """Rename the instance.
6155

6156
    """
6157
    inst = self.instance
6158
    old_name = inst.name
6159

    
6160
    rename_file_storage = False
6161
    if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
6162
        self.op.new_name != inst.name):
6163
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6164
      rename_file_storage = True
6165

    
6166
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6167
    # Change the instance lock. This is definitely safe while we hold the BGL.
6168
    # Otherwise the new lock would have to be added in acquired mode.
6169
    assert self.REQ_BGL
6170
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6171
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6172

    
6173
    # re-read the instance from the configuration after rename
6174
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6175

    
6176
    if rename_file_storage:
6177
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6178
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6179
                                                     old_file_storage_dir,
6180
                                                     new_file_storage_dir)
6181
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6182
                   " (but the instance has been renamed in Ganeti)" %
6183
                   (inst.primary_node, old_file_storage_dir,
6184
                    new_file_storage_dir))
6185

    
6186
    _StartInstanceDisks(self, inst, None)
6187
    try:
6188
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6189
                                                 old_name, self.op.debug_level)
6190
      msg = result.fail_msg
6191
      if msg:
6192
        msg = ("Could not run OS rename script for instance %s on node %s"
6193
               " (but the instance has been renamed in Ganeti): %s" %
6194
               (inst.name, inst.primary_node, msg))
6195
        self.proc.LogWarning(msg)
6196
    finally:
6197
      _ShutdownInstanceDisks(self, inst)
6198

    
6199
    return inst.name
6200

    
6201

    
6202
class LUInstanceRemove(LogicalUnit):
6203
  """Remove an instance.
6204

6205
  """
6206
  HPATH = "instance-remove"
6207
  HTYPE = constants.HTYPE_INSTANCE
6208
  REQ_BGL = False
6209

    
6210
  def ExpandNames(self):
6211
    self._ExpandAndLockInstance()
6212
    self.needed_locks[locking.LEVEL_NODE] = []
6213
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6214

    
6215
  def DeclareLocks(self, level):
6216
    if level == locking.LEVEL_NODE:
6217
      self._LockInstancesNodes()
6218

    
6219
  def BuildHooksEnv(self):
6220
    """Build hooks env.
6221

6222
    This runs on master, primary and secondary nodes of the instance.
6223

6224
    """
6225
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6226
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6227
    return env
6228

    
6229
  def BuildHooksNodes(self):
6230
    """Build hooks nodes.
6231

6232
    """
6233
    nl = [self.cfg.GetMasterNode()]
6234
    nl_post = list(self.instance.all_nodes) + nl
6235
    return (nl, nl_post)
6236

    
6237
  def CheckPrereq(self):
6238
    """Check prerequisites.
6239

6240
    This checks that the instance is in the cluster.
6241

6242
    """
6243
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6244
    assert self.instance is not None, \
6245
      "Cannot retrieve locked instance %s" % self.op.instance_name
6246

    
6247
  def Exec(self, feedback_fn):
6248
    """Remove the instance.
6249

6250
    """
6251
    instance = self.instance
6252
    logging.info("Shutting down instance %s on node %s",
6253
                 instance.name, instance.primary_node)
6254

    
6255
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6256
                                             self.op.shutdown_timeout)
6257
    msg = result.fail_msg
6258
    if msg:
6259
      if self.op.ignore_failures:
6260
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6261
      else:
6262
        raise errors.OpExecError("Could not shutdown instance %s on"
6263
                                 " node %s: %s" %
6264
                                 (instance.name, instance.primary_node, msg))
6265

    
6266
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6267

    
6268

    
6269
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6270
  """Utility function to remove an instance.
6271

6272
  """
6273
  logging.info("Removing block devices for instance %s", instance.name)
6274

    
6275
  if not _RemoveDisks(lu, instance):
6276
    if not ignore_failures:
6277
      raise errors.OpExecError("Can't remove instance's disks")
6278
    feedback_fn("Warning: can't remove instance's disks")
6279

    
6280
  logging.info("Removing instance %s out of cluster config", instance.name)
6281

    
6282
  lu.cfg.RemoveInstance(instance.name)
6283

    
6284
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6285
    "Instance lock removal conflict"
6286

    
6287
  # Remove lock for the instance
6288
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6289

    
6290

    
6291
class LUInstanceQuery(NoHooksLU):
6292
  """Logical unit for querying instances.
6293

6294
  """
6295
  # pylint: disable-msg=W0142
6296
  REQ_BGL = False
6297

    
6298
  def CheckArguments(self):
6299
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6300
                             self.op.output_fields, self.op.use_locking)
6301

    
6302
  def ExpandNames(self):
6303
    self.iq.ExpandNames(self)
6304

    
6305
  def DeclareLocks(self, level):
6306
    self.iq.DeclareLocks(self, level)
6307

    
6308
  def Exec(self, feedback_fn):
6309
    return self.iq.OldStyleQuery(self)
6310

    
6311

    
6312
class LUInstanceFailover(LogicalUnit):
6313
  """Failover an instance.
6314

6315
  """
6316
  HPATH = "instance-failover"
6317
  HTYPE = constants.HTYPE_INSTANCE
6318
  REQ_BGL = False
6319

    
6320
  def CheckArguments(self):
6321
    """Check the arguments.
6322

6323
    """
6324
    self.iallocator = getattr(self.op, "iallocator", None)
6325
    self.target_node = getattr(self.op, "target_node", None)
6326

    
6327
  def ExpandNames(self):
6328
    self._ExpandAndLockInstance()
6329

    
6330
    if self.op.target_node is not None:
6331
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6332

    
6333
    self.needed_locks[locking.LEVEL_NODE] = []
6334
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6335

    
6336
    ignore_consistency = self.op.ignore_consistency
6337
    shutdown_timeout = self.op.shutdown_timeout
6338
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6339
                                       cleanup=False,
6340
                                       failover=True,
6341
                                       ignore_consistency=ignore_consistency,
6342
                                       shutdown_timeout=shutdown_timeout)
6343
    self.tasklets = [self._migrater]
6344

    
6345
  def DeclareLocks(self, level):
6346
    if level == locking.LEVEL_NODE:
6347
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6348
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6349
        if self.op.target_node is None:
6350
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6351
        else:
6352
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6353
                                                   self.op.target_node]
6354
        del self.recalculate_locks[locking.LEVEL_NODE]
6355
      else:
6356
        self._LockInstancesNodes()
6357

    
6358
  def BuildHooksEnv(self):
6359
    """Build hooks env.
6360

6361
    This runs on master, primary and secondary nodes of the instance.
6362

6363
    """
6364
    instance = self._migrater.instance
6365
    source_node = instance.primary_node
6366
    target_node = self.op.target_node
6367
    env = {
6368
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6369
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6370
      "OLD_PRIMARY": source_node,
6371
      "NEW_PRIMARY": target_node,
6372
      }
6373

    
6374
    if instance.disk_template in constants.DTS_INT_MIRROR:
6375
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6376
      env["NEW_SECONDARY"] = source_node
6377
    else:
6378
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6379

    
6380
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6381

    
6382
    return env
6383

    
6384
  def BuildHooksNodes(self):
6385
    """Build hooks nodes.
6386

6387
    """
6388
    instance = self._migrater.instance
6389
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6390
    return (nl, nl + [instance.primary_node])
6391

    
6392

    
6393
class LUInstanceMigrate(LogicalUnit):
6394
  """Migrate an instance.
6395

6396
  This is migration without shutting down, compared to the failover,
6397
  which is done with shutdown.
6398

6399
  """
6400
  HPATH = "instance-migrate"
6401
  HTYPE = constants.HTYPE_INSTANCE
6402
  REQ_BGL = False
6403

    
6404
  def ExpandNames(self):
6405
    self._ExpandAndLockInstance()
6406

    
6407
    if self.op.target_node is not None:
6408
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6409

    
6410
    self.needed_locks[locking.LEVEL_NODE] = []
6411
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6412

    
6413
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6414
                                       cleanup=self.op.cleanup,
6415
                                       failover=False,
6416
                                       fallback=self.op.allow_failover)
6417
    self.tasklets = [self._migrater]
6418

    
6419
  def DeclareLocks(self, level):
6420
    if level == locking.LEVEL_NODE:
6421
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6422
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6423
        if self.op.target_node is None:
6424
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6425
        else:
6426
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6427
                                                   self.op.target_node]
6428
        del self.recalculate_locks[locking.LEVEL_NODE]
6429
      else:
6430
        self._LockInstancesNodes()
6431

    
6432
  def BuildHooksEnv(self):
6433
    """Build hooks env.
6434

6435
    This runs on master, primary and secondary nodes of the instance.
6436

6437
    """
6438
    instance = self._migrater.instance
6439
    source_node = instance.primary_node
6440
    target_node = self.op.target_node
6441
    env = _BuildInstanceHookEnvByObject(self, instance)
6442
    env.update({
6443
      "MIGRATE_LIVE": self._migrater.live,
6444
      "MIGRATE_CLEANUP": self.op.cleanup,
6445
      "OLD_PRIMARY": source_node,
6446
      "NEW_PRIMARY": target_node,
6447
      })
6448

    
6449
    if instance.disk_template in constants.DTS_INT_MIRROR:
6450
      env["OLD_SECONDARY"] = target_node
6451
      env["NEW_SECONDARY"] = source_node
6452
    else:
6453
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6454

    
6455
    return env
6456

    
6457
  def BuildHooksNodes(self):
6458
    """Build hooks nodes.
6459

6460
    """
6461
    instance = self._migrater.instance
6462
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6463
    return (nl, nl + [instance.primary_node])
6464

    
6465

    
6466
class LUInstanceMove(LogicalUnit):
6467
  """Move an instance by data-copying.
6468

6469
  """
6470
  HPATH = "instance-move"
6471
  HTYPE = constants.HTYPE_INSTANCE
6472
  REQ_BGL = False
6473

    
6474
  def ExpandNames(self):
6475
    self._ExpandAndLockInstance()
6476
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6477
    self.op.target_node = target_node
6478
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6479
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6480

    
6481
  def DeclareLocks(self, level):
6482
    if level == locking.LEVEL_NODE:
6483
      self._LockInstancesNodes(primary_only=True)
6484

    
6485
  def BuildHooksEnv(self):
6486
    """Build hooks env.
6487

6488
    This runs on master, primary and secondary nodes of the instance.
6489

6490
    """
6491
    env = {
6492
      "TARGET_NODE": self.op.target_node,
6493
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6494
      }
6495
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6496
    return env
6497

    
6498
  def BuildHooksNodes(self):
6499
    """Build hooks nodes.
6500

6501
    """
6502
    nl = [
6503
      self.cfg.GetMasterNode(),
6504
      self.instance.primary_node,
6505
      self.op.target_node,
6506
      ]
6507
    return (nl, nl)
6508

    
6509
  def CheckPrereq(self):
6510
    """Check prerequisites.
6511

6512
    This checks that the instance is in the cluster.
6513

6514
    """
6515
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6516
    assert self.instance is not None, \
6517
      "Cannot retrieve locked instance %s" % self.op.instance_name
6518

    
6519
    node = self.cfg.GetNodeInfo(self.op.target_node)
6520
    assert node is not None, \
6521
      "Cannot retrieve locked node %s" % self.op.target_node
6522

    
6523
    self.target_node = target_node = node.name
6524

    
6525
    if target_node == instance.primary_node:
6526
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6527
                                 (instance.name, target_node),
6528
                                 errors.ECODE_STATE)
6529

    
6530
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6531

    
6532
    for idx, dsk in enumerate(instance.disks):
6533
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6534
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6535
                                   " cannot copy" % idx, errors.ECODE_STATE)
6536

    
6537
    _CheckNodeOnline(self, target_node)
6538
    _CheckNodeNotDrained(self, target_node)
6539
    _CheckNodeVmCapable(self, target_node)
6540

    
6541
    if instance.admin_up:
6542
      # check memory requirements on the secondary node
6543
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6544
                           instance.name, bep[constants.BE_MEMORY],
6545
                           instance.hypervisor)
6546
    else:
6547
      self.LogInfo("Not checking memory on the secondary node as"
6548
                   " instance will not be started")
6549

    
6550
    # check bridge existance
6551
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6552

    
6553
  def Exec(self, feedback_fn):
6554
    """Move an instance.
6555

6556
    The move is done by shutting it down on its present node, copying
6557
    the data over (slow) and starting it on the new node.
6558

6559
    """
6560
    instance = self.instance
6561

    
6562
    source_node = instance.primary_node
6563
    target_node = self.target_node
6564

    
6565
    self.LogInfo("Shutting down instance %s on source node %s",
6566
                 instance.name, source_node)
6567

    
6568
    result = self.rpc.call_instance_shutdown(source_node, instance,
6569
                                             self.op.shutdown_timeout)
6570
    msg = result.fail_msg
6571
    if msg:
6572
      if self.op.ignore_consistency:
6573
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6574
                             " Proceeding anyway. Please make sure node"
6575
                             " %s is down. Error details: %s",
6576
                             instance.name, source_node, source_node, msg)
6577
      else:
6578
        raise errors.OpExecError("Could not shutdown instance %s on"
6579
                                 " node %s: %s" %
6580
                                 (instance.name, source_node, msg))
6581

    
6582
    # create the target disks
6583
    try:
6584
      _CreateDisks(self, instance, target_node=target_node)
6585
    except errors.OpExecError:
6586
      self.LogWarning("Device creation failed, reverting...")
6587
      try:
6588
        _RemoveDisks(self, instance, target_node=target_node)
6589
      finally:
6590
        self.cfg.ReleaseDRBDMinors(instance.name)
6591
        raise
6592

    
6593
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6594

    
6595
    errs = []
6596
    # activate, get path, copy the data over
6597
    for idx, disk in enumerate(instance.disks):
6598
      self.LogInfo("Copying data for disk %d", idx)
6599
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6600
                                               instance.name, True, idx)
6601
      if result.fail_msg:
6602
        self.LogWarning("Can't assemble newly created disk %d: %s",
6603
                        idx, result.fail_msg)
6604
        errs.append(result.fail_msg)
6605
        break
6606
      dev_path = result.payload
6607
      result = self.rpc.call_blockdev_export(source_node, disk,
6608
                                             target_node, dev_path,
6609
                                             cluster_name)
6610
      if result.fail_msg:
6611
        self.LogWarning("Can't copy data over for disk %d: %s",
6612
                        idx, result.fail_msg)
6613
        errs.append(result.fail_msg)
6614
        break
6615

    
6616
    if errs:
6617
      self.LogWarning("Some disks failed to copy, aborting")
6618
      try:
6619
        _RemoveDisks(self, instance, target_node=target_node)
6620
      finally:
6621
        self.cfg.ReleaseDRBDMinors(instance.name)
6622
        raise errors.OpExecError("Errors during disk copy: %s" %
6623
                                 (",".join(errs),))
6624

    
6625
    instance.primary_node = target_node
6626
    self.cfg.Update(instance, feedback_fn)
6627

    
6628
    self.LogInfo("Removing the disks on the original node")
6629
    _RemoveDisks(self, instance, target_node=source_node)
6630

    
6631
    # Only start the instance if it's marked as up
6632
    if instance.admin_up:
6633
      self.LogInfo("Starting instance %s on node %s",
6634
                   instance.name, target_node)
6635

    
6636
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6637
                                           ignore_secondaries=True)
6638
      if not disks_ok:
6639
        _ShutdownInstanceDisks(self, instance)
6640
        raise errors.OpExecError("Can't activate the instance's disks")
6641

    
6642
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6643
      msg = result.fail_msg
6644
      if msg:
6645
        _ShutdownInstanceDisks(self, instance)
6646
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6647
                                 (instance.name, target_node, msg))
6648

    
6649

    
6650
class LUNodeMigrate(LogicalUnit):
6651
  """Migrate all instances from a node.
6652

6653
  """
6654
  HPATH = "node-migrate"
6655
  HTYPE = constants.HTYPE_NODE
6656
  REQ_BGL = False
6657

    
6658
  def CheckArguments(self):
6659
    pass
6660

    
6661
  def ExpandNames(self):
6662
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6663

    
6664
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6665
    self.needed_locks = {
6666
      locking.LEVEL_NODE: [self.op.node_name],
6667
      }
6668

    
6669
  def BuildHooksEnv(self):
6670
    """Build hooks env.
6671

6672
    This runs on the master, the primary and all the secondaries.
6673

6674
    """
6675
    return {
6676
      "NODE_NAME": self.op.node_name,
6677
      }
6678

    
6679
  def BuildHooksNodes(self):
6680
    """Build hooks nodes.
6681

6682
    """
6683
    nl = [self.cfg.GetMasterNode()]
6684
    return (nl, nl)
6685

    
6686
  def CheckPrereq(self):
6687
    pass
6688

    
6689
  def Exec(self, feedback_fn):
6690
    # Prepare jobs for migration instances
6691
    jobs = [
6692
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
6693
                                 mode=self.op.mode,
6694
                                 live=self.op.live,
6695
                                 iallocator=self.op.iallocator,
6696
                                 target_node=self.op.target_node)]
6697
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6698
      ]
6699

    
6700
    # TODO: Run iallocator in this opcode and pass correct placement options to
6701
    # OpInstanceMigrate. Since other jobs can modify the cluster between
6702
    # running the iallocator and the actual migration, a good consistency model
6703
    # will have to be found.
6704

    
6705
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
6706
            frozenset([self.op.node_name]))
6707

    
6708
    return ResultWithJobs(jobs)
6709

    
6710

    
6711
class TLMigrateInstance(Tasklet):
6712
  """Tasklet class for instance migration.
6713

6714
  @type live: boolean
6715
  @ivar live: whether the migration will be done live or non-live;
6716
      this variable is initalized only after CheckPrereq has run
6717
  @type cleanup: boolean
6718
  @ivar cleanup: Wheater we cleanup from a failed migration
6719
  @type iallocator: string
6720
  @ivar iallocator: The iallocator used to determine target_node
6721
  @type target_node: string
6722
  @ivar target_node: If given, the target_node to reallocate the instance to
6723
  @type failover: boolean
6724
  @ivar failover: Whether operation results in failover or migration
6725
  @type fallback: boolean
6726
  @ivar fallback: Whether fallback to failover is allowed if migration not
6727
                  possible
6728
  @type ignore_consistency: boolean
6729
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6730
                            and target node
6731
  @type shutdown_timeout: int
6732
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6733

6734
  """
6735
  def __init__(self, lu, instance_name, cleanup=False,
6736
               failover=False, fallback=False,
6737
               ignore_consistency=False,
6738
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6739
    """Initializes this class.
6740

6741
    """
6742
    Tasklet.__init__(self, lu)
6743

    
6744
    # Parameters
6745
    self.instance_name = instance_name
6746
    self.cleanup = cleanup
6747
    self.live = False # will be overridden later
6748
    self.failover = failover
6749
    self.fallback = fallback
6750
    self.ignore_consistency = ignore_consistency
6751
    self.shutdown_timeout = shutdown_timeout
6752

    
6753
  def CheckPrereq(self):
6754
    """Check prerequisites.
6755

6756
    This checks that the instance is in the cluster.
6757

6758
    """
6759
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6760
    instance = self.cfg.GetInstanceInfo(instance_name)
6761
    assert instance is not None
6762
    self.instance = instance
6763

    
6764
    if (not self.cleanup and not instance.admin_up and not self.failover and
6765
        self.fallback):
6766
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6767
                      " to failover")
6768
      self.failover = True
6769

    
6770
    if instance.disk_template not in constants.DTS_MIRRORED:
6771
      if self.failover:
6772
        text = "failovers"
6773
      else:
6774
        text = "migrations"
6775
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6776
                                 " %s" % (instance.disk_template, text),
6777
                                 errors.ECODE_STATE)
6778

    
6779
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6780
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6781

    
6782
      if self.lu.op.iallocator:
6783
        self._RunAllocator()
6784
      else:
6785
        # We set set self.target_node as it is required by
6786
        # BuildHooksEnv
6787
        self.target_node = self.lu.op.target_node
6788

    
6789
      # self.target_node is already populated, either directly or by the
6790
      # iallocator run
6791
      target_node = self.target_node
6792
      if self.target_node == instance.primary_node:
6793
        raise errors.OpPrereqError("Cannot migrate instance %s"
6794
                                   " to its primary (%s)" %
6795
                                   (instance.name, instance.primary_node))
6796

    
6797
      if len(self.lu.tasklets) == 1:
6798
        # It is safe to release locks only when we're the only tasklet
6799
        # in the LU
6800
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6801
                      keep=[instance.primary_node, self.target_node])
6802

    
6803
    else:
6804
      secondary_nodes = instance.secondary_nodes
6805
      if not secondary_nodes:
6806
        raise errors.ConfigurationError("No secondary node but using"
6807
                                        " %s disk template" %
6808
                                        instance.disk_template)
6809
      target_node = secondary_nodes[0]
6810
      if self.lu.op.iallocator or (self.lu.op.target_node and
6811
                                   self.lu.op.target_node != target_node):
6812
        if self.failover:
6813
          text = "failed over"
6814
        else:
6815
          text = "migrated"
6816
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6817
                                   " be %s to arbitrary nodes"
6818
                                   " (neither an iallocator nor a target"
6819
                                   " node can be passed)" %
6820
                                   (instance.disk_template, text),
6821
                                   errors.ECODE_INVAL)
6822

    
6823
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6824

    
6825
    # check memory requirements on the secondary node
6826
    if not self.failover or instance.admin_up:
6827
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6828
                           instance.name, i_be[constants.BE_MEMORY],
6829
                           instance.hypervisor)
6830
    else:
6831
      self.lu.LogInfo("Not checking memory on the secondary node as"
6832
                      " instance will not be started")
6833

    
6834
    # check bridge existance
6835
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6836

    
6837
    if not self.cleanup:
6838
      _CheckNodeNotDrained(self.lu, target_node)
6839
      if not self.failover:
6840
        result = self.rpc.call_instance_migratable(instance.primary_node,
6841
                                                   instance)
6842
        if result.fail_msg and self.fallback:
6843
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6844
                          " failover")
6845
          self.failover = True
6846
        else:
6847
          result.Raise("Can't migrate, please use failover",
6848
                       prereq=True, ecode=errors.ECODE_STATE)
6849

    
6850
    assert not (self.failover and self.cleanup)
6851

    
6852
    if not self.failover:
6853
      if self.lu.op.live is not None and self.lu.op.mode is not None:
6854
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6855
                                   " parameters are accepted",
6856
                                   errors.ECODE_INVAL)
6857
      if self.lu.op.live is not None:
6858
        if self.lu.op.live:
6859
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
6860
        else:
6861
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6862
        # reset the 'live' parameter to None so that repeated
6863
        # invocations of CheckPrereq do not raise an exception
6864
        self.lu.op.live = None
6865
      elif self.lu.op.mode is None:
6866
        # read the default value from the hypervisor
6867
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6868
                                                skip_globals=False)
6869
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6870

    
6871
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6872
    else:
6873
      # Failover is never live
6874
      self.live = False
6875

    
6876
  def _RunAllocator(self):
6877
    """Run the allocator based on input opcode.
6878

6879
    """
6880
    ial = IAllocator(self.cfg, self.rpc,
6881
                     mode=constants.IALLOCATOR_MODE_RELOC,
6882
                     name=self.instance_name,
6883
                     # TODO See why hail breaks with a single node below
6884
                     relocate_from=[self.instance.primary_node,
6885
                                    self.instance.primary_node],
6886
                     )
6887

    
6888
    ial.Run(self.lu.op.iallocator)
6889

    
6890
    if not ial.success:
6891
      raise errors.OpPrereqError("Can't compute nodes using"
6892
                                 " iallocator '%s': %s" %
6893
                                 (self.lu.op.iallocator, ial.info),
6894
                                 errors.ECODE_NORES)
6895
    if len(ial.result) != ial.required_nodes:
6896
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6897
                                 " of nodes (%s), required %s" %
6898
                                 (self.lu.op.iallocator, len(ial.result),
6899
                                  ial.required_nodes), errors.ECODE_FAULT)
6900
    self.target_node = ial.result[0]
6901
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6902
                 self.instance_name, self.lu.op.iallocator,
6903
                 utils.CommaJoin(ial.result))
6904

    
6905
  def _WaitUntilSync(self):
6906
    """Poll with custom rpc for disk sync.
6907

6908
    This uses our own step-based rpc call.
6909

6910
    """
6911
    self.feedback_fn("* wait until resync is done")
6912
    all_done = False
6913
    while not all_done:
6914
      all_done = True
6915
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6916
                                            self.nodes_ip,
6917
                                            self.instance.disks)
6918
      min_percent = 100
6919
      for node, nres in result.items():
6920
        nres.Raise("Cannot resync disks on node %s" % node)
6921
        node_done, node_percent = nres.payload
6922
        all_done = all_done and node_done
6923
        if node_percent is not None:
6924
          min_percent = min(min_percent, node_percent)
6925
      if not all_done:
6926
        if min_percent < 100:
6927
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6928
        time.sleep(2)
6929

    
6930
  def _EnsureSecondary(self, node):
6931
    """Demote a node to secondary.
6932

6933
    """
6934
    self.feedback_fn("* switching node %s to secondary mode" % node)
6935

    
6936
    for dev in self.instance.disks:
6937
      self.cfg.SetDiskID(dev, node)
6938

    
6939
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6940
                                          self.instance.disks)
6941
    result.Raise("Cannot change disk to secondary on node %s" % node)
6942

    
6943
  def _GoStandalone(self):
6944
    """Disconnect from the network.
6945

6946
    """
6947
    self.feedback_fn("* changing into standalone mode")
6948
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6949
                                               self.instance.disks)
6950
    for node, nres in result.items():
6951
      nres.Raise("Cannot disconnect disks node %s" % node)
6952

    
6953
  def _GoReconnect(self, multimaster):
6954
    """Reconnect to the network.
6955

6956
    """
6957
    if multimaster:
6958
      msg = "dual-master"
6959
    else:
6960
      msg = "single-master"
6961
    self.feedback_fn("* changing disks into %s mode" % msg)
6962
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6963
                                           self.instance.disks,
6964
                                           self.instance.name, multimaster)
6965
    for node, nres in result.items():
6966
      nres.Raise("Cannot change disks config on node %s" % node)
6967

    
6968
  def _ExecCleanup(self):
6969
    """Try to cleanup after a failed migration.
6970

6971
    The cleanup is done by:
6972
      - check that the instance is running only on one node
6973
        (and update the config if needed)
6974
      - change disks on its secondary node to secondary
6975
      - wait until disks are fully synchronized
6976
      - disconnect from the network
6977
      - change disks into single-master mode
6978
      - wait again until disks are fully synchronized
6979

6980
    """
6981
    instance = self.instance
6982
    target_node = self.target_node
6983
    source_node = self.source_node
6984

    
6985
    # check running on only one node
6986
    self.feedback_fn("* checking where the instance actually runs"
6987
                     " (if this hangs, the hypervisor might be in"
6988
                     " a bad state)")
6989
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6990
    for node, result in ins_l.items():
6991
      result.Raise("Can't contact node %s" % node)
6992

    
6993
    runningon_source = instance.name in ins_l[source_node].payload
6994
    runningon_target = instance.name in ins_l[target_node].payload
6995

    
6996
    if runningon_source and runningon_target:
6997
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6998
                               " or the hypervisor is confused; you will have"
6999
                               " to ensure manually that it runs only on one"
7000
                               " and restart this operation")
7001

    
7002
    if not (runningon_source or runningon_target):
7003
      raise errors.OpExecError("Instance does not seem to be running at all;"
7004
                               " in this case it's safer to repair by"
7005
                               " running 'gnt-instance stop' to ensure disk"
7006
                               " shutdown, and then restarting it")
7007

    
7008
    if runningon_target:
7009
      # the migration has actually succeeded, we need to update the config
7010
      self.feedback_fn("* instance running on secondary node (%s),"
7011
                       " updating config" % target_node)
7012
      instance.primary_node = target_node
7013
      self.cfg.Update(instance, self.feedback_fn)
7014
      demoted_node = source_node
7015
    else:
7016
      self.feedback_fn("* instance confirmed to be running on its"
7017
                       " primary node (%s)" % source_node)
7018
      demoted_node = target_node
7019

    
7020
    if instance.disk_template in constants.DTS_INT_MIRROR:
7021
      self._EnsureSecondary(demoted_node)
7022
      try:
7023
        self._WaitUntilSync()
7024
      except errors.OpExecError:
7025
        # we ignore here errors, since if the device is standalone, it
7026
        # won't be able to sync
7027
        pass
7028
      self._GoStandalone()
7029
      self._GoReconnect(False)
7030
      self._WaitUntilSync()
7031

    
7032
    self.feedback_fn("* done")
7033

    
7034
  def _RevertDiskStatus(self):
7035
    """Try to revert the disk status after a failed migration.
7036

7037
    """
7038
    target_node = self.target_node
7039
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7040
      return
7041

    
7042
    try:
7043
      self._EnsureSecondary(target_node)
7044
      self._GoStandalone()
7045
      self._GoReconnect(False)
7046
      self._WaitUntilSync()
7047
    except errors.OpExecError, err:
7048
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7049
                         " please try to recover the instance manually;"
7050
                         " error '%s'" % str(err))
7051

    
7052
  def _AbortMigration(self):
7053
    """Call the hypervisor code to abort a started migration.
7054

7055
    """
7056
    instance = self.instance
7057
    target_node = self.target_node
7058
    migration_info = self.migration_info
7059

    
7060
    abort_result = self.rpc.call_finalize_migration(target_node,
7061
                                                    instance,
7062
                                                    migration_info,
7063
                                                    False)
7064
    abort_msg = abort_result.fail_msg
7065
    if abort_msg:
7066
      logging.error("Aborting migration failed on target node %s: %s",
7067
                    target_node, abort_msg)
7068
      # Don't raise an exception here, as we stil have to try to revert the
7069
      # disk status, even if this step failed.
7070

    
7071
  def _ExecMigration(self):
7072
    """Migrate an instance.
7073

7074
    The migrate is done by:
7075
      - change the disks into dual-master mode
7076
      - wait until disks are fully synchronized again
7077
      - migrate the instance
7078
      - change disks on the new secondary node (the old primary) to secondary
7079
      - wait until disks are fully synchronized
7080
      - change disks into single-master mode
7081

7082
    """
7083
    instance = self.instance
7084
    target_node = self.target_node
7085
    source_node = self.source_node
7086

    
7087
    self.feedback_fn("* checking disk consistency between source and target")
7088
    for dev in instance.disks:
7089
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7090
        raise errors.OpExecError("Disk %s is degraded or not fully"
7091
                                 " synchronized on target node,"
7092
                                 " aborting migration" % dev.iv_name)
7093

    
7094
    # First get the migration information from the remote node
7095
    result = self.rpc.call_migration_info(source_node, instance)
7096
    msg = result.fail_msg
7097
    if msg:
7098
      log_err = ("Failed fetching source migration information from %s: %s" %
7099
                 (source_node, msg))
7100
      logging.error(log_err)
7101
      raise errors.OpExecError(log_err)
7102

    
7103
    self.migration_info = migration_info = result.payload
7104

    
7105
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7106
      # Then switch the disks to master/master mode
7107
      self._EnsureSecondary(target_node)
7108
      self._GoStandalone()
7109
      self._GoReconnect(True)
7110
      self._WaitUntilSync()
7111

    
7112
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7113
    result = self.rpc.call_accept_instance(target_node,
7114
                                           instance,
7115
                                           migration_info,
7116
                                           self.nodes_ip[target_node])
7117

    
7118
    msg = result.fail_msg
7119
    if msg:
7120
      logging.error("Instance pre-migration failed, trying to revert"
7121
                    " disk status: %s", msg)
7122
      self.feedback_fn("Pre-migration failed, aborting")
7123
      self._AbortMigration()
7124
      self._RevertDiskStatus()
7125
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7126
                               (instance.name, msg))
7127

    
7128
    self.feedback_fn("* migrating instance to %s" % target_node)
7129
    result = self.rpc.call_instance_migrate(source_node, instance,
7130
                                            self.nodes_ip[target_node],
7131
                                            self.live)
7132
    msg = result.fail_msg
7133
    if msg:
7134
      logging.error("Instance migration failed, trying to revert"
7135
                    " disk status: %s", msg)
7136
      self.feedback_fn("Migration failed, aborting")
7137
      self._AbortMigration()
7138
      self._RevertDiskStatus()
7139
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7140
                               (instance.name, msg))
7141

    
7142
    instance.primary_node = target_node
7143
    # distribute new instance config to the other nodes
7144
    self.cfg.Update(instance, self.feedback_fn)
7145

    
7146
    result = self.rpc.call_finalize_migration(target_node,
7147
                                              instance,
7148
                                              migration_info,
7149
                                              True)
7150
    msg = result.fail_msg
7151
    if msg:
7152
      logging.error("Instance migration succeeded, but finalization failed:"
7153
                    " %s", msg)
7154
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7155
                               msg)
7156

    
7157
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7158
      self._EnsureSecondary(source_node)
7159
      self._WaitUntilSync()
7160
      self._GoStandalone()
7161
      self._GoReconnect(False)
7162
      self._WaitUntilSync()
7163

    
7164
    self.feedback_fn("* done")
7165

    
7166
  def _ExecFailover(self):
7167
    """Failover an instance.
7168

7169
    The failover is done by shutting it down on its present node and
7170
    starting it on the secondary.
7171

7172
    """
7173
    instance = self.instance
7174
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7175

    
7176
    source_node = instance.primary_node
7177
    target_node = self.target_node
7178

    
7179
    if instance.admin_up:
7180
      self.feedback_fn("* checking disk consistency between source and target")
7181
      for dev in instance.disks:
7182
        # for drbd, these are drbd over lvm
7183
        if not _CheckDiskConsistency(self, dev, target_node, False):
7184
          if not self.ignore_consistency:
7185
            raise errors.OpExecError("Disk %s is degraded on target node,"
7186
                                     " aborting failover" % dev.iv_name)
7187
    else:
7188
      self.feedback_fn("* not checking disk consistency as instance is not"
7189
                       " running")
7190

    
7191
    self.feedback_fn("* shutting down instance on source node")
7192
    logging.info("Shutting down instance %s on node %s",
7193
                 instance.name, source_node)
7194

    
7195
    result = self.rpc.call_instance_shutdown(source_node, instance,
7196
                                             self.shutdown_timeout)
7197
    msg = result.fail_msg
7198
    if msg:
7199
      if self.ignore_consistency or primary_node.offline:
7200
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7201
                           " proceeding anyway; please make sure node"
7202
                           " %s is down; error details: %s",
7203
                           instance.name, source_node, source_node, msg)
7204
      else:
7205
        raise errors.OpExecError("Could not shutdown instance %s on"
7206
                                 " node %s: %s" %
7207
                                 (instance.name, source_node, msg))
7208

    
7209
    self.feedback_fn("* deactivating the instance's disks on source node")
7210
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
7211
      raise errors.OpExecError("Can't shut down the instance's disks.")
7212

    
7213
    instance.primary_node = target_node
7214
    # distribute new instance config to the other nodes
7215
    self.cfg.Update(instance, self.feedback_fn)
7216

    
7217
    # Only start the instance if it's marked as up
7218
    if instance.admin_up:
7219
      self.feedback_fn("* activating the instance's disks on target node")
7220
      logging.info("Starting instance %s on node %s",
7221
                   instance.name, target_node)
7222

    
7223
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7224
                                           ignore_secondaries=True)
7225
      if not disks_ok:
7226
        _ShutdownInstanceDisks(self, instance)
7227
        raise errors.OpExecError("Can't activate the instance's disks")
7228

    
7229
      self.feedback_fn("* starting the instance on the target node")
7230
      result = self.rpc.call_instance_start(target_node, instance, None, None)
7231
      msg = result.fail_msg
7232
      if msg:
7233
        _ShutdownInstanceDisks(self, instance)
7234
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7235
                                 (instance.name, target_node, msg))
7236

    
7237
  def Exec(self, feedback_fn):
7238
    """Perform the migration.
7239

7240
    """
7241
    self.feedback_fn = feedback_fn
7242
    self.source_node = self.instance.primary_node
7243

    
7244
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7245
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7246
      self.target_node = self.instance.secondary_nodes[0]
7247
      # Otherwise self.target_node has been populated either
7248
      # directly, or through an iallocator.
7249

    
7250
    self.all_nodes = [self.source_node, self.target_node]
7251
    self.nodes_ip = {
7252
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
7253
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
7254
      }
7255

    
7256
    if self.failover:
7257
      feedback_fn("Failover instance %s" % self.instance.name)
7258
      self._ExecFailover()
7259
    else:
7260
      feedback_fn("Migrating instance %s" % self.instance.name)
7261

    
7262
      if self.cleanup:
7263
        return self._ExecCleanup()
7264
      else:
7265
        return self._ExecMigration()
7266

    
7267

    
7268
def _CreateBlockDev(lu, node, instance, device, force_create,
7269
                    info, force_open):
7270
  """Create a tree of block devices on a given node.
7271

7272
  If this device type has to be created on secondaries, create it and
7273
  all its children.
7274

7275
  If not, just recurse to children keeping the same 'force' value.
7276

7277
  @param lu: the lu on whose behalf we execute
7278
  @param node: the node on which to create the device
7279
  @type instance: L{objects.Instance}
7280
  @param instance: the instance which owns the device
7281
  @type device: L{objects.Disk}
7282
  @param device: the device to create
7283
  @type force_create: boolean
7284
  @param force_create: whether to force creation of this device; this
7285
      will be change to True whenever we find a device which has
7286
      CreateOnSecondary() attribute
7287
  @param info: the extra 'metadata' we should attach to the device
7288
      (this will be represented as a LVM tag)
7289
  @type force_open: boolean
7290
  @param force_open: this parameter will be passes to the
7291
      L{backend.BlockdevCreate} function where it specifies
7292
      whether we run on primary or not, and it affects both
7293
      the child assembly and the device own Open() execution
7294

7295
  """
7296
  if device.CreateOnSecondary():
7297
    force_create = True
7298

    
7299
  if device.children:
7300
    for child in device.children:
7301
      _CreateBlockDev(lu, node, instance, child, force_create,
7302
                      info, force_open)
7303

    
7304
  if not force_create:
7305
    return
7306

    
7307
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7308

    
7309

    
7310
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7311
  """Create a single block device on a given node.
7312

7313
  This will not recurse over children of the device, so they must be
7314
  created in advance.
7315

7316
  @param lu: the lu on whose behalf we execute
7317
  @param node: the node on which to create the device
7318
  @type instance: L{objects.Instance}
7319
  @param instance: the instance which owns the device
7320
  @type device: L{objects.Disk}
7321
  @param device: the device to create
7322
  @param info: the extra 'metadata' we should attach to the device
7323
      (this will be represented as a LVM tag)
7324
  @type force_open: boolean
7325
  @param force_open: this parameter will be passes to the
7326
      L{backend.BlockdevCreate} function where it specifies
7327
      whether we run on primary or not, and it affects both
7328
      the child assembly and the device own Open() execution
7329

7330
  """
7331
  lu.cfg.SetDiskID(device, node)
7332
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7333
                                       instance.name, force_open, info)
7334
  result.Raise("Can't create block device %s on"
7335
               " node %s for instance %s" % (device, node, instance.name))
7336
  if device.physical_id is None:
7337
    device.physical_id = result.payload
7338

    
7339

    
7340
def _GenerateUniqueNames(lu, exts):
7341
  """Generate a suitable LV name.
7342

7343
  This will generate a logical volume name for the given instance.
7344

7345
  """
7346
  results = []
7347
  for val in exts:
7348
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7349
    results.append("%s%s" % (new_id, val))
7350
  return results
7351

    
7352

    
7353
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7354
                         iv_name, p_minor, s_minor):
7355
  """Generate a drbd8 device complete with its children.
7356

7357
  """
7358
  assert len(vgnames) == len(names) == 2
7359
  port = lu.cfg.AllocatePort()
7360
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7361
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7362
                          logical_id=(vgnames[0], names[0]))
7363
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7364
                          logical_id=(vgnames[1], names[1]))
7365
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7366
                          logical_id=(primary, secondary, port,
7367
                                      p_minor, s_minor,
7368
                                      shared_secret),
7369
                          children=[dev_data, dev_meta],
7370
                          iv_name=iv_name)
7371
  return drbd_dev
7372

    
7373

    
7374
def _GenerateDiskTemplate(lu, template_name,
7375
                          instance_name, primary_node,
7376
                          secondary_nodes, disk_info,
7377
                          file_storage_dir, file_driver,
7378
                          base_index, feedback_fn):
7379
  """Generate the entire disk layout for a given template type.
7380

7381
  """
7382
  #TODO: compute space requirements
7383

    
7384
  vgname = lu.cfg.GetVGName()
7385
  disk_count = len(disk_info)
7386
  disks = []
7387
  if template_name == constants.DT_DISKLESS:
7388
    pass
7389
  elif template_name == constants.DT_PLAIN:
7390
    if len(secondary_nodes) != 0:
7391
      raise errors.ProgrammerError("Wrong template configuration")
7392

    
7393
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7394
                                      for i in range(disk_count)])
7395
    for idx, disk in enumerate(disk_info):
7396
      disk_index = idx + base_index
7397
      vg = disk.get(constants.IDISK_VG, vgname)
7398
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7399
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7400
                              size=disk[constants.IDISK_SIZE],
7401
                              logical_id=(vg, names[idx]),
7402
                              iv_name="disk/%d" % disk_index,
7403
                              mode=disk[constants.IDISK_MODE])
7404
      disks.append(disk_dev)
7405
  elif template_name == constants.DT_DRBD8:
7406
    if len(secondary_nodes) != 1:
7407
      raise errors.ProgrammerError("Wrong template configuration")
7408
    remote_node = secondary_nodes[0]
7409
    minors = lu.cfg.AllocateDRBDMinor(
7410
      [primary_node, remote_node] * len(disk_info), instance_name)
7411

    
7412
    names = []
7413
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7414
                                               for i in range(disk_count)]):
7415
      names.append(lv_prefix + "_data")
7416
      names.append(lv_prefix + "_meta")
7417
    for idx, disk in enumerate(disk_info):
7418
      disk_index = idx + base_index
7419
      data_vg = disk.get(constants.IDISK_VG, vgname)
7420
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7421
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7422
                                      disk[constants.IDISK_SIZE],
7423
                                      [data_vg, meta_vg],
7424
                                      names[idx * 2:idx * 2 + 2],
7425
                                      "disk/%d" % disk_index,
7426
                                      minors[idx * 2], minors[idx * 2 + 1])
7427
      disk_dev.mode = disk[constants.IDISK_MODE]
7428
      disks.append(disk_dev)
7429
  elif template_name == constants.DT_FILE:
7430
    if len(secondary_nodes) != 0:
7431
      raise errors.ProgrammerError("Wrong template configuration")
7432

    
7433
    opcodes.RequireFileStorage()
7434

    
7435
    for idx, disk in enumerate(disk_info):
7436
      disk_index = idx + base_index
7437
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7438
                              size=disk[constants.IDISK_SIZE],
7439
                              iv_name="disk/%d" % disk_index,
7440
                              logical_id=(file_driver,
7441
                                          "%s/disk%d" % (file_storage_dir,
7442
                                                         disk_index)),
7443
                              mode=disk[constants.IDISK_MODE])
7444
      disks.append(disk_dev)
7445
  elif template_name == constants.DT_SHARED_FILE:
7446
    if len(secondary_nodes) != 0:
7447
      raise errors.ProgrammerError("Wrong template configuration")
7448

    
7449
    opcodes.RequireSharedFileStorage()
7450

    
7451
    for idx, disk in enumerate(disk_info):
7452
      disk_index = idx + base_index
7453
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7454
                              size=disk[constants.IDISK_SIZE],
7455
                              iv_name="disk/%d" % disk_index,
7456
                              logical_id=(file_driver,
7457
                                          "%s/disk%d" % (file_storage_dir,
7458
                                                         disk_index)),
7459
                              mode=disk[constants.IDISK_MODE])
7460
      disks.append(disk_dev)
7461
  elif template_name == constants.DT_BLOCK:
7462
    if len(secondary_nodes) != 0:
7463
      raise errors.ProgrammerError("Wrong template configuration")
7464

    
7465
    for idx, disk in enumerate(disk_info):
7466
      disk_index = idx + base_index
7467
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7468
                              size=disk[constants.IDISK_SIZE],
7469
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7470
                                          disk[constants.IDISK_ADOPT]),
7471
                              iv_name="disk/%d" % disk_index,
7472
                              mode=disk[constants.IDISK_MODE])
7473
      disks.append(disk_dev)
7474

    
7475
  else:
7476
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7477
  return disks
7478

    
7479

    
7480
def _GetInstanceInfoText(instance):
7481
  """Compute that text that should be added to the disk's metadata.
7482

7483
  """
7484
  return "originstname+%s" % instance.name
7485

    
7486

    
7487
def _CalcEta(time_taken, written, total_size):
7488
  """Calculates the ETA based on size written and total size.
7489

7490
  @param time_taken: The time taken so far
7491
  @param written: amount written so far
7492
  @param total_size: The total size of data to be written
7493
  @return: The remaining time in seconds
7494

7495
  """
7496
  avg_time = time_taken / float(written)
7497
  return (total_size - written) * avg_time
7498

    
7499

    
7500
def _WipeDisks(lu, instance):
7501
  """Wipes instance disks.
7502

7503
  @type lu: L{LogicalUnit}
7504
  @param lu: the logical unit on whose behalf we execute
7505
  @type instance: L{objects.Instance}
7506
  @param instance: the instance whose disks we should create
7507
  @return: the success of the wipe
7508

7509
  """
7510
  node = instance.primary_node
7511

    
7512
  for device in instance.disks:
7513
    lu.cfg.SetDiskID(device, node)
7514

    
7515
  logging.info("Pause sync of instance %s disks", instance.name)
7516
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7517

    
7518
  for idx, success in enumerate(result.payload):
7519
    if not success:
7520
      logging.warn("pause-sync of instance %s for disks %d failed",
7521
                   instance.name, idx)
7522

    
7523
  try:
7524
    for idx, device in enumerate(instance.disks):
7525
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7526
      # MAX_WIPE_CHUNK at max
7527
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7528
                            constants.MIN_WIPE_CHUNK_PERCENT)
7529
      # we _must_ make this an int, otherwise rounding errors will
7530
      # occur
7531
      wipe_chunk_size = int(wipe_chunk_size)
7532

    
7533
      lu.LogInfo("* Wiping disk %d", idx)
7534
      logging.info("Wiping disk %d for instance %s, node %s using"
7535
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7536

    
7537
      offset = 0
7538
      size = device.size
7539
      last_output = 0
7540
      start_time = time.time()
7541

    
7542
      while offset < size:
7543
        wipe_size = min(wipe_chunk_size, size - offset)
7544
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7545
                      idx, offset, wipe_size)
7546
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7547
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7548
                     (idx, offset, wipe_size))
7549
        now = time.time()
7550
        offset += wipe_size
7551
        if now - last_output >= 60:
7552
          eta = _CalcEta(now - start_time, offset, size)
7553
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7554
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7555
          last_output = now
7556
  finally:
7557
    logging.info("Resume sync of instance %s disks", instance.name)
7558

    
7559
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7560

    
7561
    for idx, success in enumerate(result.payload):
7562
      if not success:
7563
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7564
                      " look at the status and troubleshoot the issue", idx)
7565
        logging.warn("resume-sync of instance %s for disks %d failed",
7566
                     instance.name, idx)
7567

    
7568

    
7569
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7570
  """Create all disks for an instance.
7571

7572
  This abstracts away some work from AddInstance.
7573

7574
  @type lu: L{LogicalUnit}
7575
  @param lu: the logical unit on whose behalf we execute
7576
  @type instance: L{objects.Instance}
7577
  @param instance: the instance whose disks we should create
7578
  @type to_skip: list
7579
  @param to_skip: list of indices to skip
7580
  @type target_node: string
7581
  @param target_node: if passed, overrides the target node for creation
7582
  @rtype: boolean
7583
  @return: the success of the creation
7584

7585
  """
7586
  info = _GetInstanceInfoText(instance)
7587
  if target_node is None:
7588
    pnode = instance.primary_node
7589
    all_nodes = instance.all_nodes
7590
  else:
7591
    pnode = target_node
7592
    all_nodes = [pnode]
7593

    
7594
  if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7595
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7596
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7597

    
7598
    result.Raise("Failed to create directory '%s' on"
7599
                 " node %s" % (file_storage_dir, pnode))
7600

    
7601
  # Note: this needs to be kept in sync with adding of disks in
7602
  # LUInstanceSetParams
7603
  for idx, device in enumerate(instance.disks):
7604
    if to_skip and idx in to_skip:
7605
      continue
7606
    logging.info("Creating volume %s for instance %s",
7607
                 device.iv_name, instance.name)
7608
    #HARDCODE
7609
    for node in all_nodes:
7610
      f_create = node == pnode
7611
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7612

    
7613

    
7614
def _RemoveDisks(lu, instance, target_node=None):
7615
  """Remove all disks for an instance.
7616

7617
  This abstracts away some work from `AddInstance()` and
7618
  `RemoveInstance()`. Note that in case some of the devices couldn't
7619
  be removed, the removal will continue with the other ones (compare
7620
  with `_CreateDisks()`).
7621

7622
  @type lu: L{LogicalUnit}
7623
  @param lu: the logical unit on whose behalf we execute
7624
  @type instance: L{objects.Instance}
7625
  @param instance: the instance whose disks we should remove
7626
  @type target_node: string
7627
  @param target_node: used to override the node on which to remove the disks
7628
  @rtype: boolean
7629
  @return: the success of the removal
7630

7631
  """
7632
  logging.info("Removing block devices for instance %s", instance.name)
7633

    
7634
  all_result = True
7635
  for device in instance.disks:
7636
    if target_node:
7637
      edata = [(target_node, device)]
7638
    else:
7639
      edata = device.ComputeNodeTree(instance.primary_node)
7640
    for node, disk in edata:
7641
      lu.cfg.SetDiskID(disk, node)
7642
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7643
      if msg:
7644
        lu.LogWarning("Could not remove block device %s on node %s,"
7645
                      " continuing anyway: %s", device.iv_name, node, msg)
7646
        all_result = False
7647

    
7648
  if instance.disk_template == constants.DT_FILE:
7649
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7650
    if target_node:
7651
      tgt = target_node
7652
    else:
7653
      tgt = instance.primary_node
7654
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7655
    if result.fail_msg:
7656
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7657
                    file_storage_dir, instance.primary_node, result.fail_msg)
7658
      all_result = False
7659

    
7660
  return all_result
7661

    
7662

    
7663
def _ComputeDiskSizePerVG(disk_template, disks):
7664
  """Compute disk size requirements in the volume group
7665

7666
  """
7667
  def _compute(disks, payload):
7668
    """Universal algorithm.
7669

7670
    """
7671
    vgs = {}
7672
    for disk in disks:
7673
      vgs[disk[constants.IDISK_VG]] = \
7674
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7675

    
7676
    return vgs
7677

    
7678
  # Required free disk space as a function of disk and swap space
7679
  req_size_dict = {
7680
    constants.DT_DISKLESS: {},
7681
    constants.DT_PLAIN: _compute(disks, 0),
7682
    # 128 MB are added for drbd metadata for each disk
7683
    constants.DT_DRBD8: _compute(disks, 128),
7684
    constants.DT_FILE: {},
7685
    constants.DT_SHARED_FILE: {},
7686
  }
7687

    
7688
  if disk_template not in req_size_dict:
7689
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7690
                                 " is unknown" %  disk_template)
7691

    
7692
  return req_size_dict[disk_template]
7693

    
7694

    
7695
def _ComputeDiskSize(disk_template, disks):
7696
  """Compute disk size requirements in the volume group
7697

7698
  """
7699
  # Required free disk space as a function of disk and swap space
7700
  req_size_dict = {
7701
    constants.DT_DISKLESS: None,
7702
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7703
    # 128 MB are added for drbd metadata for each disk
7704
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7705
    constants.DT_FILE: None,
7706
    constants.DT_SHARED_FILE: 0,
7707
    constants.DT_BLOCK: 0,
7708
  }
7709

    
7710
  if disk_template not in req_size_dict:
7711
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7712
                                 " is unknown" %  disk_template)
7713

    
7714
  return req_size_dict[disk_template]
7715

    
7716

    
7717
def _FilterVmNodes(lu, nodenames):
7718
  """Filters out non-vm_capable nodes from a list.
7719

7720
  @type lu: L{LogicalUnit}
7721
  @param lu: the logical unit for which we check
7722
  @type nodenames: list
7723
  @param nodenames: the list of nodes on which we should check
7724
  @rtype: list
7725
  @return: the list of vm-capable nodes
7726

7727
  """
7728
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7729
  return [name for name in nodenames if name not in vm_nodes]
7730

    
7731

    
7732
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7733
  """Hypervisor parameter validation.
7734

7735
  This function abstract the hypervisor parameter validation to be
7736
  used in both instance create and instance modify.
7737

7738
  @type lu: L{LogicalUnit}
7739
  @param lu: the logical unit for which we check
7740
  @type nodenames: list
7741
  @param nodenames: the list of nodes on which we should check
7742
  @type hvname: string
7743
  @param hvname: the name of the hypervisor we should use
7744
  @type hvparams: dict
7745
  @param hvparams: the parameters which we need to check
7746
  @raise errors.OpPrereqError: if the parameters are not valid
7747

7748
  """
7749
  nodenames = _FilterVmNodes(lu, nodenames)
7750
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7751
                                                  hvname,
7752
                                                  hvparams)
7753
  for node in nodenames:
7754
    info = hvinfo[node]
7755
    if info.offline:
7756
      continue
7757
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7758

    
7759

    
7760
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7761
  """OS parameters validation.
7762

7763
  @type lu: L{LogicalUnit}
7764
  @param lu: the logical unit for which we check
7765
  @type required: boolean
7766
  @param required: whether the validation should fail if the OS is not
7767
      found
7768
  @type nodenames: list
7769
  @param nodenames: the list of nodes on which we should check
7770
  @type osname: string
7771
  @param osname: the name of the hypervisor we should use
7772
  @type osparams: dict
7773
  @param osparams: the parameters which we need to check
7774
  @raise errors.OpPrereqError: if the parameters are not valid
7775

7776
  """
7777
  nodenames = _FilterVmNodes(lu, nodenames)
7778
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7779
                                   [constants.OS_VALIDATE_PARAMETERS],
7780
                                   osparams)
7781
  for node, nres in result.items():
7782
    # we don't check for offline cases since this should be run only
7783
    # against the master node and/or an instance's nodes
7784
    nres.Raise("OS Parameters validation failed on node %s" % node)
7785
    if not nres.payload:
7786
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7787
                 osname, node)
7788

    
7789

    
7790
class LUInstanceCreate(LogicalUnit):
7791
  """Create an instance.
7792

7793
  """
7794
  HPATH = "instance-add"
7795
  HTYPE = constants.HTYPE_INSTANCE
7796
  REQ_BGL = False
7797

    
7798
  def CheckArguments(self):
7799
    """Check arguments.
7800

7801
    """
7802
    # do not require name_check to ease forward/backward compatibility
7803
    # for tools
7804
    if self.op.no_install and self.op.start:
7805
      self.LogInfo("No-installation mode selected, disabling startup")
7806
      self.op.start = False
7807
    # validate/normalize the instance name
7808
    self.op.instance_name = \
7809
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7810

    
7811
    if self.op.ip_check and not self.op.name_check:
7812
      # TODO: make the ip check more flexible and not depend on the name check
7813
      raise errors.OpPrereqError("Cannot do IP address check without a name"
7814
                                 " check", errors.ECODE_INVAL)
7815

    
7816
    # check nics' parameter names
7817
    for nic in self.op.nics:
7818
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7819

    
7820
    # check disks. parameter names and consistent adopt/no-adopt strategy
7821
    has_adopt = has_no_adopt = False
7822
    for disk in self.op.disks:
7823
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7824
      if constants.IDISK_ADOPT in disk:
7825
        has_adopt = True
7826
      else:
7827
        has_no_adopt = True
7828
    if has_adopt and has_no_adopt:
7829
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7830
                                 errors.ECODE_INVAL)
7831
    if has_adopt:
7832
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7833
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7834
                                   " '%s' disk template" %
7835
                                   self.op.disk_template,
7836
                                   errors.ECODE_INVAL)
7837
      if self.op.iallocator is not None:
7838
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7839
                                   " iallocator script", errors.ECODE_INVAL)
7840
      if self.op.mode == constants.INSTANCE_IMPORT:
7841
        raise errors.OpPrereqError("Disk adoption not allowed for"
7842
                                   " instance import", errors.ECODE_INVAL)
7843
    else:
7844
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
7845
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7846
                                   " but no 'adopt' parameter given" %
7847
                                   self.op.disk_template,
7848
                                   errors.ECODE_INVAL)
7849

    
7850
    self.adopt_disks = has_adopt
7851

    
7852
    # instance name verification
7853
    if self.op.name_check:
7854
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7855
      self.op.instance_name = self.hostname1.name
7856
      # used in CheckPrereq for ip ping check
7857
      self.check_ip = self.hostname1.ip
7858
    else:
7859
      self.check_ip = None
7860

    
7861
    # file storage checks
7862
    if (self.op.file_driver and
7863
        not self.op.file_driver in constants.FILE_DRIVER):
7864
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7865
                                 self.op.file_driver, errors.ECODE_INVAL)
7866

    
7867
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7868
      raise errors.OpPrereqError("File storage directory path not absolute",
7869
                                 errors.ECODE_INVAL)
7870

    
7871
    ### Node/iallocator related checks
7872
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7873

    
7874
    if self.op.pnode is not None:
7875
      if self.op.disk_template in constants.DTS_INT_MIRROR:
7876
        if self.op.snode is None:
7877
          raise errors.OpPrereqError("The networked disk templates need"
7878
                                     " a mirror node", errors.ECODE_INVAL)
7879
      elif self.op.snode:
7880
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7881
                        " template")
7882
        self.op.snode = None
7883

    
7884
    self._cds = _GetClusterDomainSecret()
7885

    
7886
    if self.op.mode == constants.INSTANCE_IMPORT:
7887
      # On import force_variant must be True, because if we forced it at
7888
      # initial install, our only chance when importing it back is that it
7889
      # works again!
7890
      self.op.force_variant = True
7891

    
7892
      if self.op.no_install:
7893
        self.LogInfo("No-installation mode has no effect during import")
7894

    
7895
    elif self.op.mode == constants.INSTANCE_CREATE:
7896
      if self.op.os_type is None:
7897
        raise errors.OpPrereqError("No guest OS specified",
7898
                                   errors.ECODE_INVAL)
7899
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7900
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7901
                                   " installation" % self.op.os_type,
7902
                                   errors.ECODE_STATE)
7903
      if self.op.disk_template is None:
7904
        raise errors.OpPrereqError("No disk template specified",
7905
                                   errors.ECODE_INVAL)
7906

    
7907
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7908
      # Check handshake to ensure both clusters have the same domain secret
7909
      src_handshake = self.op.source_handshake
7910
      if not src_handshake:
7911
        raise errors.OpPrereqError("Missing source handshake",
7912
                                   errors.ECODE_INVAL)
7913

    
7914
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7915
                                                           src_handshake)
7916
      if errmsg:
7917
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7918
                                   errors.ECODE_INVAL)
7919

    
7920
      # Load and check source CA
7921
      self.source_x509_ca_pem = self.op.source_x509_ca
7922
      if not self.source_x509_ca_pem:
7923
        raise errors.OpPrereqError("Missing source X509 CA",
7924
                                   errors.ECODE_INVAL)
7925

    
7926
      try:
7927
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7928
                                                    self._cds)
7929
      except OpenSSL.crypto.Error, err:
7930
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7931
                                   (err, ), errors.ECODE_INVAL)
7932

    
7933
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7934
      if errcode is not None:
7935
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7936
                                   errors.ECODE_INVAL)
7937

    
7938
      self.source_x509_ca = cert
7939

    
7940
      src_instance_name = self.op.source_instance_name
7941
      if not src_instance_name:
7942
        raise errors.OpPrereqError("Missing source instance name",
7943
                                   errors.ECODE_INVAL)
7944

    
7945
      self.source_instance_name = \
7946
          netutils.GetHostname(name=src_instance_name).name
7947

    
7948
    else:
7949
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
7950
                                 self.op.mode, errors.ECODE_INVAL)
7951

    
7952
  def ExpandNames(self):
7953
    """ExpandNames for CreateInstance.
7954

7955
    Figure out the right locks for instance creation.
7956

7957
    """
7958
    self.needed_locks = {}
7959

    
7960
    instance_name = self.op.instance_name
7961
    # this is just a preventive check, but someone might still add this
7962
    # instance in the meantime, and creation will fail at lock-add time
7963
    if instance_name in self.cfg.GetInstanceList():
7964
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7965
                                 instance_name, errors.ECODE_EXISTS)
7966

    
7967
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7968

    
7969
    if self.op.iallocator:
7970
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7971
    else:
7972
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7973
      nodelist = [self.op.pnode]
7974
      if self.op.snode is not None:
7975
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7976
        nodelist.append(self.op.snode)
7977
      self.needed_locks[locking.LEVEL_NODE] = nodelist
7978

    
7979
    # in case of import lock the source node too
7980
    if self.op.mode == constants.INSTANCE_IMPORT:
7981
      src_node = self.op.src_node
7982
      src_path = self.op.src_path
7983

    
7984
      if src_path is None:
7985
        self.op.src_path = src_path = self.op.instance_name
7986

    
7987
      if src_node is None:
7988
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7989
        self.op.src_node = None
7990
        if os.path.isabs(src_path):
7991
          raise errors.OpPrereqError("Importing an instance from an absolute"
7992
                                     " path requires a source node option",
7993
                                     errors.ECODE_INVAL)
7994
      else:
7995
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7996
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7997
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7998
        if not os.path.isabs(src_path):
7999
          self.op.src_path = src_path = \
8000
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8001

    
8002
  def _RunAllocator(self):
8003
    """Run the allocator based on input opcode.
8004

8005
    """
8006
    nics = [n.ToDict() for n in self.nics]
8007
    ial = IAllocator(self.cfg, self.rpc,
8008
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8009
                     name=self.op.instance_name,
8010
                     disk_template=self.op.disk_template,
8011
                     tags=self.op.tags,
8012
                     os=self.op.os_type,
8013
                     vcpus=self.be_full[constants.BE_VCPUS],
8014
                     memory=self.be_full[constants.BE_MEMORY],
8015
                     disks=self.disks,
8016
                     nics=nics,
8017
                     hypervisor=self.op.hypervisor,
8018
                     )
8019

    
8020
    ial.Run(self.op.iallocator)
8021

    
8022
    if not ial.success:
8023
      raise errors.OpPrereqError("Can't compute nodes using"
8024
                                 " iallocator '%s': %s" %
8025
                                 (self.op.iallocator, ial.info),
8026
                                 errors.ECODE_NORES)
8027
    if len(ial.result) != ial.required_nodes:
8028
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8029
                                 " of nodes (%s), required %s" %
8030
                                 (self.op.iallocator, len(ial.result),
8031
                                  ial.required_nodes), errors.ECODE_FAULT)
8032
    self.op.pnode = ial.result[0]
8033
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8034
                 self.op.instance_name, self.op.iallocator,
8035
                 utils.CommaJoin(ial.result))
8036
    if ial.required_nodes == 2:
8037
      self.op.snode = ial.result[1]
8038

    
8039
  def BuildHooksEnv(self):
8040
    """Build hooks env.
8041

8042
    This runs on master, primary and secondary nodes of the instance.
8043

8044
    """
8045
    env = {
8046
      "ADD_MODE": self.op.mode,
8047
      }
8048
    if self.op.mode == constants.INSTANCE_IMPORT:
8049
      env["SRC_NODE"] = self.op.src_node
8050
      env["SRC_PATH"] = self.op.src_path
8051
      env["SRC_IMAGES"] = self.src_images
8052

    
8053
    env.update(_BuildInstanceHookEnv(
8054
      name=self.op.instance_name,
8055
      primary_node=self.op.pnode,
8056
      secondary_nodes=self.secondaries,
8057
      status=self.op.start,
8058
      os_type=self.op.os_type,
8059
      memory=self.be_full[constants.BE_MEMORY],
8060
      vcpus=self.be_full[constants.BE_VCPUS],
8061
      nics=_NICListToTuple(self, self.nics),
8062
      disk_template=self.op.disk_template,
8063
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8064
             for d in self.disks],
8065
      bep=self.be_full,
8066
      hvp=self.hv_full,
8067
      hypervisor_name=self.op.hypervisor,
8068
      tags=self.op.tags,
8069
    ))
8070

    
8071
    return env
8072

    
8073
  def BuildHooksNodes(self):
8074
    """Build hooks nodes.
8075

8076
    """
8077
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8078
    return nl, nl
8079

    
8080
  def _ReadExportInfo(self):
8081
    """Reads the export information from disk.
8082

8083
    It will override the opcode source node and path with the actual
8084
    information, if these two were not specified before.
8085

8086
    @return: the export information
8087

8088
    """
8089
    assert self.op.mode == constants.INSTANCE_IMPORT
8090

    
8091
    src_node = self.op.src_node
8092
    src_path = self.op.src_path
8093

    
8094
    if src_node is None:
8095
      locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
8096
      exp_list = self.rpc.call_export_list(locked_nodes)
8097
      found = False
8098
      for node in exp_list:
8099
        if exp_list[node].fail_msg:
8100
          continue
8101
        if src_path in exp_list[node].payload:
8102
          found = True
8103
          self.op.src_node = src_node = node
8104
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8105
                                                       src_path)
8106
          break
8107
      if not found:
8108
        raise errors.OpPrereqError("No export found for relative path %s" %
8109
                                    src_path, errors.ECODE_INVAL)
8110

    
8111
    _CheckNodeOnline(self, src_node)
8112
    result = self.rpc.call_export_info(src_node, src_path)
8113
    result.Raise("No export or invalid export found in dir %s" % src_path)
8114

    
8115
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8116
    if not export_info.has_section(constants.INISECT_EXP):
8117
      raise errors.ProgrammerError("Corrupted export config",
8118
                                   errors.ECODE_ENVIRON)
8119

    
8120
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8121
    if (int(ei_version) != constants.EXPORT_VERSION):
8122
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8123
                                 (ei_version, constants.EXPORT_VERSION),
8124
                                 errors.ECODE_ENVIRON)
8125
    return export_info
8126

    
8127
  def _ReadExportParams(self, einfo):
8128
    """Use export parameters as defaults.
8129

8130
    In case the opcode doesn't specify (as in override) some instance
8131
    parameters, then try to use them from the export information, if
8132
    that declares them.
8133

8134
    """
8135
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8136

    
8137
    if self.op.disk_template is None:
8138
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8139
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8140
                                          "disk_template")
8141
      else:
8142
        raise errors.OpPrereqError("No disk template specified and the export"
8143
                                   " is missing the disk_template information",
8144
                                   errors.ECODE_INVAL)
8145

    
8146
    if not self.op.disks:
8147
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8148
        disks = []
8149
        # TODO: import the disk iv_name too
8150
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8151
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8152
          disks.append({constants.IDISK_SIZE: disk_sz})
8153
        self.op.disks = disks
8154
      else:
8155
        raise errors.OpPrereqError("No disk info specified and the export"
8156
                                   " is missing the disk information",
8157
                                   errors.ECODE_INVAL)
8158

    
8159
    if (not self.op.nics and
8160
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8161
      nics = []
8162
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8163
        ndict = {}
8164
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8165
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8166
          ndict[name] = v
8167
        nics.append(ndict)
8168
      self.op.nics = nics
8169

    
8170
    if (self.op.hypervisor is None and
8171
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8172
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8173
    if einfo.has_section(constants.INISECT_HYP):
8174
      # use the export parameters but do not override the ones
8175
      # specified by the user
8176
      for name, value in einfo.items(constants.INISECT_HYP):
8177
        if name not in self.op.hvparams:
8178
          self.op.hvparams[name] = value
8179

    
8180
    if einfo.has_section(constants.INISECT_BEP):
8181
      # use the parameters, without overriding
8182
      for name, value in einfo.items(constants.INISECT_BEP):
8183
        if name not in self.op.beparams:
8184
          self.op.beparams[name] = value
8185
    else:
8186
      # try to read the parameters old style, from the main section
8187
      for name in constants.BES_PARAMETERS:
8188
        if (name not in self.op.beparams and
8189
            einfo.has_option(constants.INISECT_INS, name)):
8190
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8191

    
8192
    if einfo.has_section(constants.INISECT_OSP):
8193
      # use the parameters, without overriding
8194
      for name, value in einfo.items(constants.INISECT_OSP):
8195
        if name not in self.op.osparams:
8196
          self.op.osparams[name] = value
8197

    
8198
  def _RevertToDefaults(self, cluster):
8199
    """Revert the instance parameters to the default values.
8200

8201
    """
8202
    # hvparams
8203
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8204
    for name in self.op.hvparams.keys():
8205
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8206
        del self.op.hvparams[name]
8207
    # beparams
8208
    be_defs = cluster.SimpleFillBE({})
8209
    for name in self.op.beparams.keys():
8210
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8211
        del self.op.beparams[name]
8212
    # nic params
8213
    nic_defs = cluster.SimpleFillNIC({})
8214
    for nic in self.op.nics:
8215
      for name in constants.NICS_PARAMETERS:
8216
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8217
          del nic[name]
8218
    # osparams
8219
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8220
    for name in self.op.osparams.keys():
8221
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8222
        del self.op.osparams[name]
8223

    
8224
  def CheckPrereq(self):
8225
    """Check prerequisites.
8226

8227
    """
8228
    if self.op.mode == constants.INSTANCE_IMPORT:
8229
      export_info = self._ReadExportInfo()
8230
      self._ReadExportParams(export_info)
8231

    
8232
    if (not self.cfg.GetVGName() and
8233
        self.op.disk_template not in constants.DTS_NOT_LVM):
8234
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8235
                                 " instances", errors.ECODE_STATE)
8236

    
8237
    if self.op.hypervisor is None:
8238
      self.op.hypervisor = self.cfg.GetHypervisorType()
8239

    
8240
    cluster = self.cfg.GetClusterInfo()
8241
    enabled_hvs = cluster.enabled_hypervisors
8242
    if self.op.hypervisor not in enabled_hvs:
8243
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8244
                                 " cluster (%s)" % (self.op.hypervisor,
8245
                                  ",".join(enabled_hvs)),
8246
                                 errors.ECODE_STATE)
8247

    
8248
    # Check tag validity
8249
    for tag in self.op.tags:
8250
      objects.TaggableObject.ValidateTag(tag)
8251

    
8252
    # check hypervisor parameter syntax (locally)
8253
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8254
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8255
                                      self.op.hvparams)
8256
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8257
    hv_type.CheckParameterSyntax(filled_hvp)
8258
    self.hv_full = filled_hvp
8259
    # check that we don't specify global parameters on an instance
8260
    _CheckGlobalHvParams(self.op.hvparams)
8261

    
8262
    # fill and remember the beparams dict
8263
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8264
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8265

    
8266
    # build os parameters
8267
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8268

    
8269
    # now that hvp/bep are in final format, let's reset to defaults,
8270
    # if told to do so
8271
    if self.op.identify_defaults:
8272
      self._RevertToDefaults(cluster)
8273

    
8274
    # NIC buildup
8275
    self.nics = []
8276
    for idx, nic in enumerate(self.op.nics):
8277
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8278
      nic_mode = nic_mode_req
8279
      if nic_mode is None:
8280
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8281

    
8282
      # in routed mode, for the first nic, the default ip is 'auto'
8283
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8284
        default_ip_mode = constants.VALUE_AUTO
8285
      else:
8286
        default_ip_mode = constants.VALUE_NONE
8287

    
8288
      # ip validity checks
8289
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8290
      if ip is None or ip.lower() == constants.VALUE_NONE:
8291
        nic_ip = None
8292
      elif ip.lower() == constants.VALUE_AUTO:
8293
        if not self.op.name_check:
8294
          raise errors.OpPrereqError("IP address set to auto but name checks"
8295
                                     " have been skipped",
8296
                                     errors.ECODE_INVAL)
8297
        nic_ip = self.hostname1.ip
8298
      else:
8299
        if not netutils.IPAddress.IsValid(ip):
8300
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8301
                                     errors.ECODE_INVAL)
8302
        nic_ip = ip
8303

    
8304
      # TODO: check the ip address for uniqueness
8305
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8306
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8307
                                   errors.ECODE_INVAL)
8308

    
8309
      # MAC address verification
8310
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8311
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8312
        mac = utils.NormalizeAndValidateMac(mac)
8313

    
8314
        try:
8315
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8316
        except errors.ReservationError:
8317
          raise errors.OpPrereqError("MAC address %s already in use"
8318
                                     " in cluster" % mac,
8319
                                     errors.ECODE_NOTUNIQUE)
8320

    
8321
      #  Build nic parameters
8322
      link = nic.get(constants.INIC_LINK, None)
8323
      nicparams = {}
8324
      if nic_mode_req:
8325
        nicparams[constants.NIC_MODE] = nic_mode_req
8326
      if link:
8327
        nicparams[constants.NIC_LINK] = link
8328

    
8329
      check_params = cluster.SimpleFillNIC(nicparams)
8330
      objects.NIC.CheckParameterSyntax(check_params)
8331
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8332

    
8333
    # disk checks/pre-build
8334
    default_vg = self.cfg.GetVGName()
8335
    self.disks = []
8336
    for disk in self.op.disks:
8337
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8338
      if mode not in constants.DISK_ACCESS_SET:
8339
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8340
                                   mode, errors.ECODE_INVAL)
8341
      size = disk.get(constants.IDISK_SIZE, None)
8342
      if size is None:
8343
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8344
      try:
8345
        size = int(size)
8346
      except (TypeError, ValueError):
8347
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8348
                                   errors.ECODE_INVAL)
8349

    
8350
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8351
      new_disk = {
8352
        constants.IDISK_SIZE: size,
8353
        constants.IDISK_MODE: mode,
8354
        constants.IDISK_VG: data_vg,
8355
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8356
        }
8357
      if constants.IDISK_ADOPT in disk:
8358
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8359
      self.disks.append(new_disk)
8360

    
8361
    if self.op.mode == constants.INSTANCE_IMPORT:
8362

    
8363
      # Check that the new instance doesn't have less disks than the export
8364
      instance_disks = len(self.disks)
8365
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8366
      if instance_disks < export_disks:
8367
        raise errors.OpPrereqError("Not enough disks to import."
8368
                                   " (instance: %d, export: %d)" %
8369
                                   (instance_disks, export_disks),
8370
                                   errors.ECODE_INVAL)
8371

    
8372
      disk_images = []
8373
      for idx in range(export_disks):
8374
        option = 'disk%d_dump' % idx
8375
        if export_info.has_option(constants.INISECT_INS, option):
8376
          # FIXME: are the old os-es, disk sizes, etc. useful?
8377
          export_name = export_info.get(constants.INISECT_INS, option)
8378
          image = utils.PathJoin(self.op.src_path, export_name)
8379
          disk_images.append(image)
8380
        else:
8381
          disk_images.append(False)
8382

    
8383
      self.src_images = disk_images
8384

    
8385
      old_name = export_info.get(constants.INISECT_INS, 'name')
8386
      try:
8387
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8388
      except (TypeError, ValueError), err:
8389
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8390
                                   " an integer: %s" % str(err),
8391
                                   errors.ECODE_STATE)
8392
      if self.op.instance_name == old_name:
8393
        for idx, nic in enumerate(self.nics):
8394
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8395
            nic_mac_ini = 'nic%d_mac' % idx
8396
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8397

    
8398
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8399

    
8400
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8401
    if self.op.ip_check:
8402
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8403
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8404
                                   (self.check_ip, self.op.instance_name),
8405
                                   errors.ECODE_NOTUNIQUE)
8406

    
8407
    #### mac address generation
8408
    # By generating here the mac address both the allocator and the hooks get
8409
    # the real final mac address rather than the 'auto' or 'generate' value.
8410
    # There is a race condition between the generation and the instance object
8411
    # creation, which means that we know the mac is valid now, but we're not
8412
    # sure it will be when we actually add the instance. If things go bad
8413
    # adding the instance will abort because of a duplicate mac, and the
8414
    # creation job will fail.
8415
    for nic in self.nics:
8416
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8417
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8418

    
8419
    #### allocator run
8420

    
8421
    if self.op.iallocator is not None:
8422
      self._RunAllocator()
8423

    
8424
    #### node related checks
8425

    
8426
    # check primary node
8427
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8428
    assert self.pnode is not None, \
8429
      "Cannot retrieve locked node %s" % self.op.pnode
8430
    if pnode.offline:
8431
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8432
                                 pnode.name, errors.ECODE_STATE)
8433
    if pnode.drained:
8434
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8435
                                 pnode.name, errors.ECODE_STATE)
8436
    if not pnode.vm_capable:
8437
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8438
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8439

    
8440
    self.secondaries = []
8441

    
8442
    # mirror node verification
8443
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8444
      if self.op.snode == pnode.name:
8445
        raise errors.OpPrereqError("The secondary node cannot be the"
8446
                                   " primary node", errors.ECODE_INVAL)
8447
      _CheckNodeOnline(self, self.op.snode)
8448
      _CheckNodeNotDrained(self, self.op.snode)
8449
      _CheckNodeVmCapable(self, self.op.snode)
8450
      self.secondaries.append(self.op.snode)
8451

    
8452
    nodenames = [pnode.name] + self.secondaries
8453

    
8454
    if not self.adopt_disks:
8455
      # Check lv size requirements, if not adopting
8456
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8457
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8458

    
8459
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8460
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8461
                                disk[constants.IDISK_ADOPT])
8462
                     for disk in self.disks])
8463
      if len(all_lvs) != len(self.disks):
8464
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8465
                                   errors.ECODE_INVAL)
8466
      for lv_name in all_lvs:
8467
        try:
8468
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8469
          # to ReserveLV uses the same syntax
8470
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8471
        except errors.ReservationError:
8472
          raise errors.OpPrereqError("LV named %s used by another instance" %
8473
                                     lv_name, errors.ECODE_NOTUNIQUE)
8474

    
8475
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8476
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8477

    
8478
      node_lvs = self.rpc.call_lv_list([pnode.name],
8479
                                       vg_names.payload.keys())[pnode.name]
8480
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8481
      node_lvs = node_lvs.payload
8482

    
8483
      delta = all_lvs.difference(node_lvs.keys())
8484
      if delta:
8485
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8486
                                   utils.CommaJoin(delta),
8487
                                   errors.ECODE_INVAL)
8488
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8489
      if online_lvs:
8490
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8491
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8492
                                   errors.ECODE_STATE)
8493
      # update the size of disk based on what is found
8494
      for dsk in self.disks:
8495
        dsk[constants.IDISK_SIZE] = \
8496
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8497
                                        dsk[constants.IDISK_ADOPT])][0]))
8498

    
8499
    elif self.op.disk_template == constants.DT_BLOCK:
8500
      # Normalize and de-duplicate device paths
8501
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8502
                       for disk in self.disks])
8503
      if len(all_disks) != len(self.disks):
8504
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8505
                                   errors.ECODE_INVAL)
8506
      baddisks = [d for d in all_disks
8507
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8508
      if baddisks:
8509
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8510
                                   " cannot be adopted" %
8511
                                   (", ".join(baddisks),
8512
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8513
                                   errors.ECODE_INVAL)
8514

    
8515
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8516
                                            list(all_disks))[pnode.name]
8517
      node_disks.Raise("Cannot get block device information from node %s" %
8518
                       pnode.name)
8519
      node_disks = node_disks.payload
8520
      delta = all_disks.difference(node_disks.keys())
8521
      if delta:
8522
        raise errors.OpPrereqError("Missing block device(s): %s" %
8523
                                   utils.CommaJoin(delta),
8524
                                   errors.ECODE_INVAL)
8525
      for dsk in self.disks:
8526
        dsk[constants.IDISK_SIZE] = \
8527
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8528

    
8529
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8530

    
8531
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8532
    # check OS parameters (remotely)
8533
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8534

    
8535
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8536

    
8537
    # memory check on primary node
8538
    if self.op.start:
8539
      _CheckNodeFreeMemory(self, self.pnode.name,
8540
                           "creating instance %s" % self.op.instance_name,
8541
                           self.be_full[constants.BE_MEMORY],
8542
                           self.op.hypervisor)
8543

    
8544
    self.dry_run_result = list(nodenames)
8545

    
8546
  def Exec(self, feedback_fn):
8547
    """Create and add the instance to the cluster.
8548

8549
    """
8550
    instance = self.op.instance_name
8551
    pnode_name = self.pnode.name
8552

    
8553
    ht_kind = self.op.hypervisor
8554
    if ht_kind in constants.HTS_REQ_PORT:
8555
      network_port = self.cfg.AllocatePort()
8556
    else:
8557
      network_port = None
8558

    
8559
    if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8560
      # this is needed because os.path.join does not accept None arguments
8561
      if self.op.file_storage_dir is None:
8562
        string_file_storage_dir = ""
8563
      else:
8564
        string_file_storage_dir = self.op.file_storage_dir
8565

    
8566
      # build the full file storage dir path
8567
      if self.op.disk_template == constants.DT_SHARED_FILE:
8568
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8569
      else:
8570
        get_fsd_fn = self.cfg.GetFileStorageDir
8571

    
8572
      file_storage_dir = utils.PathJoin(get_fsd_fn(),
8573
                                        string_file_storage_dir, instance)
8574
    else:
8575
      file_storage_dir = ""
8576

    
8577
    disks = _GenerateDiskTemplate(self,
8578
                                  self.op.disk_template,
8579
                                  instance, pnode_name,
8580
                                  self.secondaries,
8581
                                  self.disks,
8582
                                  file_storage_dir,
8583
                                  self.op.file_driver,
8584
                                  0,
8585
                                  feedback_fn)
8586

    
8587
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8588
                            primary_node=pnode_name,
8589
                            nics=self.nics, disks=disks,
8590
                            disk_template=self.op.disk_template,
8591
                            admin_up=False,
8592
                            network_port=network_port,
8593
                            beparams=self.op.beparams,
8594
                            hvparams=self.op.hvparams,
8595
                            hypervisor=self.op.hypervisor,
8596
                            osparams=self.op.osparams,
8597
                            )
8598

    
8599
    if self.op.tags:
8600
      for tag in self.op.tags:
8601
        iobj.AddTag(tag)
8602

    
8603
    if self.adopt_disks:
8604
      if self.op.disk_template == constants.DT_PLAIN:
8605
        # rename LVs to the newly-generated names; we need to construct
8606
        # 'fake' LV disks with the old data, plus the new unique_id
8607
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8608
        rename_to = []
8609
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8610
          rename_to.append(t_dsk.logical_id)
8611
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8612
          self.cfg.SetDiskID(t_dsk, pnode_name)
8613
        result = self.rpc.call_blockdev_rename(pnode_name,
8614
                                               zip(tmp_disks, rename_to))
8615
        result.Raise("Failed to rename adoped LVs")
8616
    else:
8617
      feedback_fn("* creating instance disks...")
8618
      try:
8619
        _CreateDisks(self, iobj)
8620
      except errors.OpExecError:
8621
        self.LogWarning("Device creation failed, reverting...")
8622
        try:
8623
          _RemoveDisks(self, iobj)
8624
        finally:
8625
          self.cfg.ReleaseDRBDMinors(instance)
8626
          raise
8627

    
8628
    feedback_fn("adding instance %s to cluster config" % instance)
8629

    
8630
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8631

    
8632
    # Declare that we don't want to remove the instance lock anymore, as we've
8633
    # added the instance to the config
8634
    del self.remove_locks[locking.LEVEL_INSTANCE]
8635

    
8636
    if self.op.mode == constants.INSTANCE_IMPORT:
8637
      # Release unused nodes
8638
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8639
    else:
8640
      # Release all nodes
8641
      _ReleaseLocks(self, locking.LEVEL_NODE)
8642

    
8643
    disk_abort = False
8644
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8645
      feedback_fn("* wiping instance disks...")
8646
      try:
8647
        _WipeDisks(self, iobj)
8648
      except errors.OpExecError, err:
8649
        logging.exception("Wiping disks failed")
8650
        self.LogWarning("Wiping instance disks failed (%s)", err)
8651
        disk_abort = True
8652

    
8653
    if disk_abort:
8654
      # Something is already wrong with the disks, don't do anything else
8655
      pass
8656
    elif self.op.wait_for_sync:
8657
      disk_abort = not _WaitForSync(self, iobj)
8658
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8659
      # make sure the disks are not degraded (still sync-ing is ok)
8660
      time.sleep(15)
8661
      feedback_fn("* checking mirrors status")
8662
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8663
    else:
8664
      disk_abort = False
8665

    
8666
    if disk_abort:
8667
      _RemoveDisks(self, iobj)
8668
      self.cfg.RemoveInstance(iobj.name)
8669
      # Make sure the instance lock gets removed
8670
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8671
      raise errors.OpExecError("There are some degraded disks for"
8672
                               " this instance")
8673

    
8674
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8675
      if self.op.mode == constants.INSTANCE_CREATE:
8676
        if not self.op.no_install:
8677
          feedback_fn("* running the instance OS create scripts...")
8678
          # FIXME: pass debug option from opcode to backend
8679
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8680
                                                 self.op.debug_level)
8681
          result.Raise("Could not add os for instance %s"
8682
                       " on node %s" % (instance, pnode_name))
8683

    
8684
      elif self.op.mode == constants.INSTANCE_IMPORT:
8685
        feedback_fn("* running the instance OS import scripts...")
8686

    
8687
        transfers = []
8688

    
8689
        for idx, image in enumerate(self.src_images):
8690
          if not image:
8691
            continue
8692

    
8693
          # FIXME: pass debug option from opcode to backend
8694
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8695
                                             constants.IEIO_FILE, (image, ),
8696
                                             constants.IEIO_SCRIPT,
8697
                                             (iobj.disks[idx], idx),
8698
                                             None)
8699
          transfers.append(dt)
8700

    
8701
        import_result = \
8702
          masterd.instance.TransferInstanceData(self, feedback_fn,
8703
                                                self.op.src_node, pnode_name,
8704
                                                self.pnode.secondary_ip,
8705
                                                iobj, transfers)
8706
        if not compat.all(import_result):
8707
          self.LogWarning("Some disks for instance %s on node %s were not"
8708
                          " imported successfully" % (instance, pnode_name))
8709

    
8710
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8711
        feedback_fn("* preparing remote import...")
8712
        # The source cluster will stop the instance before attempting to make a
8713
        # connection. In some cases stopping an instance can take a long time,
8714
        # hence the shutdown timeout is added to the connection timeout.
8715
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8716
                           self.op.source_shutdown_timeout)
8717
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8718

    
8719
        assert iobj.primary_node == self.pnode.name
8720
        disk_results = \
8721
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8722
                                        self.source_x509_ca,
8723
                                        self._cds, timeouts)
8724
        if not compat.all(disk_results):
8725
          # TODO: Should the instance still be started, even if some disks
8726
          # failed to import (valid for local imports, too)?
8727
          self.LogWarning("Some disks for instance %s on node %s were not"
8728
                          " imported successfully" % (instance, pnode_name))
8729

    
8730
        # Run rename script on newly imported instance
8731
        assert iobj.name == instance
8732
        feedback_fn("Running rename script for %s" % instance)
8733
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8734
                                                   self.source_instance_name,
8735
                                                   self.op.debug_level)
8736
        if result.fail_msg:
8737
          self.LogWarning("Failed to run rename script for %s on node"
8738
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8739

    
8740
      else:
8741
        # also checked in the prereq part
8742
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8743
                                     % self.op.mode)
8744

    
8745
    if self.op.start:
8746
      iobj.admin_up = True
8747
      self.cfg.Update(iobj, feedback_fn)
8748
      logging.info("Starting instance %s on node %s", instance, pnode_name)
8749
      feedback_fn("* starting instance...")
8750
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8751
      result.Raise("Could not start instance")
8752

    
8753
    return list(iobj.all_nodes)
8754

    
8755

    
8756
class LUInstanceConsole(NoHooksLU):
8757
  """Connect to an instance's console.
8758

8759
  This is somewhat special in that it returns the command line that
8760
  you need to run on the master node in order to connect to the
8761
  console.
8762

8763
  """
8764
  REQ_BGL = False
8765

    
8766
  def ExpandNames(self):
8767
    self._ExpandAndLockInstance()
8768

    
8769
  def CheckPrereq(self):
8770
    """Check prerequisites.
8771

8772
    This checks that the instance is in the cluster.
8773

8774
    """
8775
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8776
    assert self.instance is not None, \
8777
      "Cannot retrieve locked instance %s" % self.op.instance_name
8778
    _CheckNodeOnline(self, self.instance.primary_node)
8779

    
8780
  def Exec(self, feedback_fn):
8781
    """Connect to the console of an instance
8782

8783
    """
8784
    instance = self.instance
8785
    node = instance.primary_node
8786

    
8787
    node_insts = self.rpc.call_instance_list([node],
8788
                                             [instance.hypervisor])[node]
8789
    node_insts.Raise("Can't get node information from %s" % node)
8790

    
8791
    if instance.name not in node_insts.payload:
8792
      if instance.admin_up:
8793
        state = constants.INSTST_ERRORDOWN
8794
      else:
8795
        state = constants.INSTST_ADMINDOWN
8796
      raise errors.OpExecError("Instance %s is not running (state %s)" %
8797
                               (instance.name, state))
8798

    
8799
    logging.debug("Connecting to console of %s on %s", instance.name, node)
8800

    
8801
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8802

    
8803

    
8804
def _GetInstanceConsole(cluster, instance):
8805
  """Returns console information for an instance.
8806

8807
  @type cluster: L{objects.Cluster}
8808
  @type instance: L{objects.Instance}
8809
  @rtype: dict
8810

8811
  """
8812
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
8813
  # beparams and hvparams are passed separately, to avoid editing the
8814
  # instance and then saving the defaults in the instance itself.
8815
  hvparams = cluster.FillHV(instance)
8816
  beparams = cluster.FillBE(instance)
8817
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8818

    
8819
  assert console.instance == instance.name
8820
  assert console.Validate()
8821

    
8822
  return console.ToDict()
8823

    
8824

    
8825
class LUInstanceReplaceDisks(LogicalUnit):
8826
  """Replace the disks of an instance.
8827

8828
  """
8829
  HPATH = "mirrors-replace"
8830
  HTYPE = constants.HTYPE_INSTANCE
8831
  REQ_BGL = False
8832

    
8833
  def CheckArguments(self):
8834
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8835
                                  self.op.iallocator)
8836

    
8837
  def ExpandNames(self):
8838
    self._ExpandAndLockInstance()
8839

    
8840
    assert locking.LEVEL_NODE not in self.needed_locks
8841
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
8842

    
8843
    assert self.op.iallocator is None or self.op.remote_node is None, \
8844
      "Conflicting options"
8845

    
8846
    if self.op.remote_node is not None:
8847
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8848

    
8849
      # Warning: do not remove the locking of the new secondary here
8850
      # unless DRBD8.AddChildren is changed to work in parallel;
8851
      # currently it doesn't since parallel invocations of
8852
      # FindUnusedMinor will conflict
8853
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
8854
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8855
    else:
8856
      self.needed_locks[locking.LEVEL_NODE] = []
8857
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8858

    
8859
      if self.op.iallocator is not None:
8860
        # iallocator will select a new node in the same group
8861
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
8862

    
8863
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8864
                                   self.op.iallocator, self.op.remote_node,
8865
                                   self.op.disks, False, self.op.early_release)
8866

    
8867
    self.tasklets = [self.replacer]
8868

    
8869
  def DeclareLocks(self, level):
8870
    if level == locking.LEVEL_NODEGROUP:
8871
      assert self.op.remote_node is None
8872
      assert self.op.iallocator is not None
8873
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
8874

    
8875
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
8876
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
8877
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8878

    
8879
    elif level == locking.LEVEL_NODE:
8880
      if self.op.iallocator is not None:
8881
        assert self.op.remote_node is None
8882
        assert not self.needed_locks[locking.LEVEL_NODE]
8883

    
8884
        # Lock member nodes of all locked groups
8885
        self.needed_locks[locking.LEVEL_NODE] = [node_name
8886
          for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
8887
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
8888
      else:
8889
        self._LockInstancesNodes()
8890

    
8891
  def BuildHooksEnv(self):
8892
    """Build hooks env.
8893

8894
    This runs on the master, the primary and all the secondaries.
8895

8896
    """
8897
    instance = self.replacer.instance
8898
    env = {
8899
      "MODE": self.op.mode,
8900
      "NEW_SECONDARY": self.op.remote_node,
8901
      "OLD_SECONDARY": instance.secondary_nodes[0],
8902
      }
8903
    env.update(_BuildInstanceHookEnvByObject(self, instance))
8904
    return env
8905

    
8906
  def BuildHooksNodes(self):
8907
    """Build hooks nodes.
8908

8909
    """
8910
    instance = self.replacer.instance
8911
    nl = [
8912
      self.cfg.GetMasterNode(),
8913
      instance.primary_node,
8914
      ]
8915
    if self.op.remote_node is not None:
8916
      nl.append(self.op.remote_node)
8917
    return nl, nl
8918

    
8919
  def CheckPrereq(self):
8920
    """Check prerequisites.
8921

8922
    """
8923
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
8924
            self.op.iallocator is None)
8925

    
8926
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
8927
    if owned_groups:
8928
      groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8929
      if owned_groups != groups:
8930
        raise errors.OpExecError("Node groups used by instance '%s' changed"
8931
                                 " since lock was acquired, current list is %r,"
8932
                                 " used to be '%s'" %
8933
                                 (self.op.instance_name,
8934
                                  utils.CommaJoin(groups),
8935
                                  utils.CommaJoin(owned_groups)))
8936

    
8937
    return LogicalUnit.CheckPrereq(self)
8938

    
8939

    
8940
class TLReplaceDisks(Tasklet):
8941
  """Replaces disks for an instance.
8942

8943
  Note: Locking is not within the scope of this class.
8944

8945
  """
8946
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8947
               disks, delay_iallocator, early_release):
8948
    """Initializes this class.
8949

8950
    """
8951
    Tasklet.__init__(self, lu)
8952

    
8953
    # Parameters
8954
    self.instance_name = instance_name
8955
    self.mode = mode
8956
    self.iallocator_name = iallocator_name
8957
    self.remote_node = remote_node
8958
    self.disks = disks
8959
    self.delay_iallocator = delay_iallocator
8960
    self.early_release = early_release
8961

    
8962
    # Runtime data
8963
    self.instance = None
8964
    self.new_node = None
8965
    self.target_node = None
8966
    self.other_node = None
8967
    self.remote_node_info = None
8968
    self.node_secondary_ip = None
8969

    
8970
  @staticmethod
8971
  def CheckArguments(mode, remote_node, iallocator):
8972
    """Helper function for users of this class.
8973

8974
    """
8975
    # check for valid parameter combination
8976
    if mode == constants.REPLACE_DISK_CHG:
8977
      if remote_node is None and iallocator is None:
8978
        raise errors.OpPrereqError("When changing the secondary either an"
8979
                                   " iallocator script must be used or the"
8980
                                   " new node given", errors.ECODE_INVAL)
8981

    
8982
      if remote_node is not None and iallocator is not None:
8983
        raise errors.OpPrereqError("Give either the iallocator or the new"
8984
                                   " secondary, not both", errors.ECODE_INVAL)
8985

    
8986
    elif remote_node is not None or iallocator is not None:
8987
      # Not replacing the secondary
8988
      raise errors.OpPrereqError("The iallocator and new node options can"
8989
                                 " only be used when changing the"
8990
                                 " secondary node", errors.ECODE_INVAL)
8991

    
8992
  @staticmethod
8993
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8994
    """Compute a new secondary node using an IAllocator.
8995

8996
    """
8997
    ial = IAllocator(lu.cfg, lu.rpc,
8998
                     mode=constants.IALLOCATOR_MODE_RELOC,
8999
                     name=instance_name,
9000
                     relocate_from=relocate_from)
9001

    
9002
    ial.Run(iallocator_name)
9003

    
9004
    if not ial.success:
9005
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9006
                                 " %s" % (iallocator_name, ial.info),
9007
                                 errors.ECODE_NORES)
9008

    
9009
    if len(ial.result) != ial.required_nodes:
9010
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9011
                                 " of nodes (%s), required %s" %
9012
                                 (iallocator_name,
9013
                                  len(ial.result), ial.required_nodes),
9014
                                 errors.ECODE_FAULT)
9015

    
9016
    remote_node_name = ial.result[0]
9017

    
9018
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9019
               instance_name, remote_node_name)
9020

    
9021
    return remote_node_name
9022

    
9023
  def _FindFaultyDisks(self, node_name):
9024
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9025
                                    node_name, True)
9026

    
9027
  def _CheckDisksActivated(self, instance):
9028
    """Checks if the instance disks are activated.
9029

9030
    @param instance: The instance to check disks
9031
    @return: True if they are activated, False otherwise
9032

9033
    """
9034
    nodes = instance.all_nodes
9035

    
9036
    for idx, dev in enumerate(instance.disks):
9037
      for node in nodes:
9038
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9039
        self.cfg.SetDiskID(dev, node)
9040

    
9041
        result = self.rpc.call_blockdev_find(node, dev)
9042

    
9043
        if result.offline:
9044
          continue
9045
        elif result.fail_msg or not result.payload:
9046
          return False
9047

    
9048
    return True
9049

    
9050
  def CheckPrereq(self):
9051
    """Check prerequisites.
9052

9053
    This checks that the instance is in the cluster.
9054

9055
    """
9056
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9057
    assert instance is not None, \
9058
      "Cannot retrieve locked instance %s" % self.instance_name
9059

    
9060
    if instance.disk_template != constants.DT_DRBD8:
9061
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9062
                                 " instances", errors.ECODE_INVAL)
9063

    
9064
    if len(instance.secondary_nodes) != 1:
9065
      raise errors.OpPrereqError("The instance has a strange layout,"
9066
                                 " expected one secondary but found %d" %
9067
                                 len(instance.secondary_nodes),
9068
                                 errors.ECODE_FAULT)
9069

    
9070
    if not self.delay_iallocator:
9071
      self._CheckPrereq2()
9072

    
9073
  def _CheckPrereq2(self):
9074
    """Check prerequisites, second part.
9075

9076
    This function should always be part of CheckPrereq. It was separated and is
9077
    now called from Exec because during node evacuation iallocator was only
9078
    called with an unmodified cluster model, not taking planned changes into
9079
    account.
9080

9081
    """
9082
    instance = self.instance
9083
    secondary_node = instance.secondary_nodes[0]
9084

    
9085
    if self.iallocator_name is None:
9086
      remote_node = self.remote_node
9087
    else:
9088
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9089
                                       instance.name, instance.secondary_nodes)
9090

    
9091
    if remote_node is None:
9092
      self.remote_node_info = None
9093
    else:
9094
      assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
9095
             "Remote node '%s' is not locked" % remote_node
9096

    
9097
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9098
      assert self.remote_node_info is not None, \
9099
        "Cannot retrieve locked node %s" % remote_node
9100

    
9101
    if remote_node == self.instance.primary_node:
9102
      raise errors.OpPrereqError("The specified node is the primary node of"
9103
                                 " the instance", errors.ECODE_INVAL)
9104

    
9105
    if remote_node == secondary_node:
9106
      raise errors.OpPrereqError("The specified node is already the"
9107
                                 " secondary node of the instance",
9108
                                 errors.ECODE_INVAL)
9109

    
9110
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9111
                                    constants.REPLACE_DISK_CHG):
9112
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9113
                                 errors.ECODE_INVAL)
9114

    
9115
    if self.mode == constants.REPLACE_DISK_AUTO:
9116
      if not self._CheckDisksActivated(instance):
9117
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9118
                                   " first" % self.instance_name,
9119
                                   errors.ECODE_STATE)
9120
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9121
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9122

    
9123
      if faulty_primary and faulty_secondary:
9124
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9125
                                   " one node and can not be repaired"
9126
                                   " automatically" % self.instance_name,
9127
                                   errors.ECODE_STATE)
9128

    
9129
      if faulty_primary:
9130
        self.disks = faulty_primary
9131
        self.target_node = instance.primary_node
9132
        self.other_node = secondary_node
9133
        check_nodes = [self.target_node, self.other_node]
9134
      elif faulty_secondary:
9135
        self.disks = faulty_secondary
9136
        self.target_node = secondary_node
9137
        self.other_node = instance.primary_node
9138
        check_nodes = [self.target_node, self.other_node]
9139
      else:
9140
        self.disks = []
9141
        check_nodes = []
9142

    
9143
    else:
9144
      # Non-automatic modes
9145
      if self.mode == constants.REPLACE_DISK_PRI:
9146
        self.target_node = instance.primary_node
9147
        self.other_node = secondary_node
9148
        check_nodes = [self.target_node, self.other_node]
9149

    
9150
      elif self.mode == constants.REPLACE_DISK_SEC:
9151
        self.target_node = secondary_node
9152
        self.other_node = instance.primary_node
9153
        check_nodes = [self.target_node, self.other_node]
9154

    
9155
      elif self.mode == constants.REPLACE_DISK_CHG:
9156
        self.new_node = remote_node
9157
        self.other_node = instance.primary_node
9158
        self.target_node = secondary_node
9159
        check_nodes = [self.new_node, self.other_node]
9160

    
9161
        _CheckNodeNotDrained(self.lu, remote_node)
9162
        _CheckNodeVmCapable(self.lu, remote_node)
9163

    
9164
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9165
        assert old_node_info is not None
9166
        if old_node_info.offline and not self.early_release:
9167
          # doesn't make sense to delay the release
9168
          self.early_release = True
9169
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9170
                          " early-release mode", secondary_node)
9171

    
9172
      else:
9173
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9174
                                     self.mode)
9175

    
9176
      # If not specified all disks should be replaced
9177
      if not self.disks:
9178
        self.disks = range(len(self.instance.disks))
9179

    
9180
    for node in check_nodes:
9181
      _CheckNodeOnline(self.lu, node)
9182

    
9183
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9184
                                                          self.other_node,
9185
                                                          self.target_node]
9186
                              if node_name is not None)
9187

    
9188
    # Release unneeded node locks
9189
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9190

    
9191
    # Release any owned node group
9192
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9193
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9194

    
9195
    # Check whether disks are valid
9196
    for disk_idx in self.disks:
9197
      instance.FindDisk(disk_idx)
9198

    
9199
    # Get secondary node IP addresses
9200
    self.node_secondary_ip = \
9201
      dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
9202
           for node_name in touched_nodes)
9203

    
9204
  def Exec(self, feedback_fn):
9205
    """Execute disk replacement.
9206

9207
    This dispatches the disk replacement to the appropriate handler.
9208

9209
    """
9210
    if self.delay_iallocator:
9211
      self._CheckPrereq2()
9212

    
9213
    if __debug__:
9214
      # Verify owned locks before starting operation
9215
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9216
      assert set(owned_locks) == set(self.node_secondary_ip), \
9217
          ("Incorrect node locks, owning %s, expected %s" %
9218
           (owned_locks, self.node_secondary_ip.keys()))
9219

    
9220
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9221
      assert list(owned_locks) == [self.instance_name], \
9222
          "Instance '%s' not locked" % self.instance_name
9223

    
9224
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9225
          "Should not own any node group lock at this point"
9226

    
9227
    if not self.disks:
9228
      feedback_fn("No disks need replacement")
9229
      return
9230

    
9231
    feedback_fn("Replacing disk(s) %s for %s" %
9232
                (utils.CommaJoin(self.disks), self.instance.name))
9233

    
9234
    activate_disks = (not self.instance.admin_up)
9235

    
9236
    # Activate the instance disks if we're replacing them on a down instance
9237
    if activate_disks:
9238
      _StartInstanceDisks(self.lu, self.instance, True)
9239

    
9240
    try:
9241
      # Should we replace the secondary node?
9242
      if self.new_node is not None:
9243
        fn = self._ExecDrbd8Secondary
9244
      else:
9245
        fn = self._ExecDrbd8DiskOnly
9246

    
9247
      result = fn(feedback_fn)
9248
    finally:
9249
      # Deactivate the instance disks if we're replacing them on a
9250
      # down instance
9251
      if activate_disks:
9252
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9253

    
9254
    if __debug__:
9255
      # Verify owned locks
9256
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9257
      nodes = frozenset(self.node_secondary_ip)
9258
      assert ((self.early_release and not owned_locks) or
9259
              (not self.early_release and not (set(owned_locks) - nodes))), \
9260
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9261
         " nodes=%r" % (self.early_release, owned_locks, nodes))
9262

    
9263
    return result
9264

    
9265
  def _CheckVolumeGroup(self, nodes):
9266
    self.lu.LogInfo("Checking volume groups")
9267

    
9268
    vgname = self.cfg.GetVGName()
9269

    
9270
    # Make sure volume group exists on all involved nodes
9271
    results = self.rpc.call_vg_list(nodes)
9272
    if not results:
9273
      raise errors.OpExecError("Can't list volume groups on the nodes")
9274

    
9275
    for node in nodes:
9276
      res = results[node]
9277
      res.Raise("Error checking node %s" % node)
9278
      if vgname not in res.payload:
9279
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9280
                                 (vgname, node))
9281

    
9282
  def _CheckDisksExistence(self, nodes):
9283
    # Check disk existence
9284
    for idx, dev in enumerate(self.instance.disks):
9285
      if idx not in self.disks:
9286
        continue
9287

    
9288
      for node in nodes:
9289
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9290
        self.cfg.SetDiskID(dev, node)
9291

    
9292
        result = self.rpc.call_blockdev_find(node, dev)
9293

    
9294
        msg = result.fail_msg
9295
        if msg or not result.payload:
9296
          if not msg:
9297
            msg = "disk not found"
9298
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9299
                                   (idx, node, msg))
9300

    
9301
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9302
    for idx, dev in enumerate(self.instance.disks):
9303
      if idx not in self.disks:
9304
        continue
9305

    
9306
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9307
                      (idx, node_name))
9308

    
9309
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9310
                                   ldisk=ldisk):
9311
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9312
                                 " replace disks for instance %s" %
9313
                                 (node_name, self.instance.name))
9314

    
9315
  def _CreateNewStorage(self, node_name):
9316
    iv_names = {}
9317

    
9318
    for idx, dev in enumerate(self.instance.disks):
9319
      if idx not in self.disks:
9320
        continue
9321

    
9322
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9323

    
9324
      self.cfg.SetDiskID(dev, node_name)
9325

    
9326
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9327
      names = _GenerateUniqueNames(self.lu, lv_names)
9328

    
9329
      vg_data = dev.children[0].logical_id[0]
9330
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9331
                             logical_id=(vg_data, names[0]))
9332
      vg_meta = dev.children[1].logical_id[0]
9333
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9334
                             logical_id=(vg_meta, names[1]))
9335

    
9336
      new_lvs = [lv_data, lv_meta]
9337
      old_lvs = dev.children
9338
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9339

    
9340
      # we pass force_create=True to force the LVM creation
9341
      for new_lv in new_lvs:
9342
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9343
                        _GetInstanceInfoText(self.instance), False)
9344

    
9345
    return iv_names
9346

    
9347
  def _CheckDevices(self, node_name, iv_names):
9348
    for name, (dev, _, _) in iv_names.iteritems():
9349
      self.cfg.SetDiskID(dev, node_name)
9350

    
9351
      result = self.rpc.call_blockdev_find(node_name, dev)
9352

    
9353
      msg = result.fail_msg
9354
      if msg or not result.payload:
9355
        if not msg:
9356
          msg = "disk not found"
9357
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9358
                                 (name, msg))
9359

    
9360
      if result.payload.is_degraded:
9361
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9362

    
9363
  def _RemoveOldStorage(self, node_name, iv_names):
9364
    for name, (_, old_lvs, _) in iv_names.iteritems():
9365
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9366

    
9367
      for lv in old_lvs:
9368
        self.cfg.SetDiskID(lv, node_name)
9369

    
9370
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9371
        if msg:
9372
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9373
                             hint="remove unused LVs manually")
9374

    
9375
  def _ExecDrbd8DiskOnly(self, feedback_fn):
9376
    """Replace a disk on the primary or secondary for DRBD 8.
9377

9378
    The algorithm for replace is quite complicated:
9379

9380
      1. for each disk to be replaced:
9381

9382
        1. create new LVs on the target node with unique names
9383
        1. detach old LVs from the drbd device
9384
        1. rename old LVs to name_replaced.<time_t>
9385
        1. rename new LVs to old LVs
9386
        1. attach the new LVs (with the old names now) to the drbd device
9387

9388
      1. wait for sync across all devices
9389

9390
      1. for each modified disk:
9391

9392
        1. remove old LVs (which have the name name_replaces.<time_t>)
9393

9394
    Failures are not very well handled.
9395

9396
    """
9397
    steps_total = 6
9398

    
9399
    # Step: check device activation
9400
    self.lu.LogStep(1, steps_total, "Check device existence")
9401
    self._CheckDisksExistence([self.other_node, self.target_node])
9402
    self._CheckVolumeGroup([self.target_node, self.other_node])
9403

    
9404
    # Step: check other node consistency
9405
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9406
    self._CheckDisksConsistency(self.other_node,
9407
                                self.other_node == self.instance.primary_node,
9408
                                False)
9409

    
9410
    # Step: create new storage
9411
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9412
    iv_names = self._CreateNewStorage(self.target_node)
9413

    
9414
    # Step: for each lv, detach+rename*2+attach
9415
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9416
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9417
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9418

    
9419
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9420
                                                     old_lvs)
9421
      result.Raise("Can't detach drbd from local storage on node"
9422
                   " %s for device %s" % (self.target_node, dev.iv_name))
9423
      #dev.children = []
9424
      #cfg.Update(instance)
9425

    
9426
      # ok, we created the new LVs, so now we know we have the needed
9427
      # storage; as such, we proceed on the target node to rename
9428
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9429
      # using the assumption that logical_id == physical_id (which in
9430
      # turn is the unique_id on that node)
9431

    
9432
      # FIXME(iustin): use a better name for the replaced LVs
9433
      temp_suffix = int(time.time())
9434
      ren_fn = lambda d, suff: (d.physical_id[0],
9435
                                d.physical_id[1] + "_replaced-%s" % suff)
9436

    
9437
      # Build the rename list based on what LVs exist on the node
9438
      rename_old_to_new = []
9439
      for to_ren in old_lvs:
9440
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9441
        if not result.fail_msg and result.payload:
9442
          # device exists
9443
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9444

    
9445
      self.lu.LogInfo("Renaming the old LVs on the target node")
9446
      result = self.rpc.call_blockdev_rename(self.target_node,
9447
                                             rename_old_to_new)
9448
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9449

    
9450
      # Now we rename the new LVs to the old LVs
9451
      self.lu.LogInfo("Renaming the new LVs on the target node")
9452
      rename_new_to_old = [(new, old.physical_id)
9453
                           for old, new in zip(old_lvs, new_lvs)]
9454
      result = self.rpc.call_blockdev_rename(self.target_node,
9455
                                             rename_new_to_old)
9456
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9457

    
9458
      for old, new in zip(old_lvs, new_lvs):
9459
        new.logical_id = old.logical_id
9460
        self.cfg.SetDiskID(new, self.target_node)
9461

    
9462
      for disk in old_lvs:
9463
        disk.logical_id = ren_fn(disk, temp_suffix)
9464
        self.cfg.SetDiskID(disk, self.target_node)
9465

    
9466
      # Now that the new lvs have the old name, we can add them to the device
9467
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9468
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9469
                                                  new_lvs)
9470
      msg = result.fail_msg
9471
      if msg:
9472
        for new_lv in new_lvs:
9473
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9474
                                               new_lv).fail_msg
9475
          if msg2:
9476
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9477
                               hint=("cleanup manually the unused logical"
9478
                                     "volumes"))
9479
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9480

    
9481
      dev.children = new_lvs
9482

    
9483
      self.cfg.Update(self.instance, feedback_fn)
9484

    
9485
    cstep = 5
9486
    if self.early_release:
9487
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9488
      cstep += 1
9489
      self._RemoveOldStorage(self.target_node, iv_names)
9490
      # WARNING: we release both node locks here, do not do other RPCs
9491
      # than WaitForSync to the primary node
9492
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9493
                    names=[self.target_node, self.other_node])
9494

    
9495
    # Wait for sync
9496
    # This can fail as the old devices are degraded and _WaitForSync
9497
    # does a combined result over all disks, so we don't check its return value
9498
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9499
    cstep += 1
9500
    _WaitForSync(self.lu, self.instance)
9501

    
9502
    # Check all devices manually
9503
    self._CheckDevices(self.instance.primary_node, iv_names)
9504

    
9505
    # Step: remove old storage
9506
    if not self.early_release:
9507
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9508
      cstep += 1
9509
      self._RemoveOldStorage(self.target_node, iv_names)
9510

    
9511
  def _ExecDrbd8Secondary(self, feedback_fn):
9512
    """Replace the secondary node for DRBD 8.
9513

9514
    The algorithm for replace is quite complicated:
9515
      - for all disks of the instance:
9516
        - create new LVs on the new node with same names
9517
        - shutdown the drbd device on the old secondary
9518
        - disconnect the drbd network on the primary
9519
        - create the drbd device on the new secondary
9520
        - network attach the drbd on the primary, using an artifice:
9521
          the drbd code for Attach() will connect to the network if it
9522
          finds a device which is connected to the good local disks but
9523
          not network enabled
9524
      - wait for sync across all devices
9525
      - remove all disks from the old secondary
9526

9527
    Failures are not very well handled.
9528

9529
    """
9530
    steps_total = 6
9531

    
9532
    # Step: check device activation
9533
    self.lu.LogStep(1, steps_total, "Check device existence")
9534
    self._CheckDisksExistence([self.instance.primary_node])
9535
    self._CheckVolumeGroup([self.instance.primary_node])
9536

    
9537
    # Step: check other node consistency
9538
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9539
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9540

    
9541
    # Step: create new storage
9542
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9543
    for idx, dev in enumerate(self.instance.disks):
9544
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9545
                      (self.new_node, idx))
9546
      # we pass force_create=True to force LVM creation
9547
      for new_lv in dev.children:
9548
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9549
                        _GetInstanceInfoText(self.instance), False)
9550

    
9551
    # Step 4: dbrd minors and drbd setups changes
9552
    # after this, we must manually remove the drbd minors on both the
9553
    # error and the success paths
9554
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9555
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9556
                                         for dev in self.instance.disks],
9557
                                        self.instance.name)
9558
    logging.debug("Allocated minors %r", minors)
9559

    
9560
    iv_names = {}
9561
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9562
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9563
                      (self.new_node, idx))
9564
      # create new devices on new_node; note that we create two IDs:
9565
      # one without port, so the drbd will be activated without
9566
      # networking information on the new node at this stage, and one
9567
      # with network, for the latter activation in step 4
9568
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9569
      if self.instance.primary_node == o_node1:
9570
        p_minor = o_minor1
9571
      else:
9572
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9573
        p_minor = o_minor2
9574

    
9575
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9576
                      p_minor, new_minor, o_secret)
9577
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9578
                    p_minor, new_minor, o_secret)
9579

    
9580
      iv_names[idx] = (dev, dev.children, new_net_id)
9581
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9582
                    new_net_id)
9583
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9584
                              logical_id=new_alone_id,
9585
                              children=dev.children,
9586
                              size=dev.size)
9587
      try:
9588
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9589
                              _GetInstanceInfoText(self.instance), False)
9590
      except errors.GenericError:
9591
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9592
        raise
9593

    
9594
    # We have new devices, shutdown the drbd on the old secondary
9595
    for idx, dev in enumerate(self.instance.disks):
9596
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9597
      self.cfg.SetDiskID(dev, self.target_node)
9598
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9599
      if msg:
9600
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9601
                           "node: %s" % (idx, msg),
9602
                           hint=("Please cleanup this device manually as"
9603
                                 " soon as possible"))
9604

    
9605
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9606
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9607
                                               self.node_secondary_ip,
9608
                                               self.instance.disks)\
9609
                                              [self.instance.primary_node]
9610

    
9611
    msg = result.fail_msg
9612
    if msg:
9613
      # detaches didn't succeed (unlikely)
9614
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9615
      raise errors.OpExecError("Can't detach the disks from the network on"
9616
                               " old node: %s" % (msg,))
9617

    
9618
    # if we managed to detach at least one, we update all the disks of
9619
    # the instance to point to the new secondary
9620
    self.lu.LogInfo("Updating instance configuration")
9621
    for dev, _, new_logical_id in iv_names.itervalues():
9622
      dev.logical_id = new_logical_id
9623
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9624

    
9625
    self.cfg.Update(self.instance, feedback_fn)
9626

    
9627
    # and now perform the drbd attach
9628
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9629
                    " (standalone => connected)")
9630
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9631
                                            self.new_node],
9632
                                           self.node_secondary_ip,
9633
                                           self.instance.disks,
9634
                                           self.instance.name,
9635
                                           False)
9636
    for to_node, to_result in result.items():
9637
      msg = to_result.fail_msg
9638
      if msg:
9639
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9640
                           to_node, msg,
9641
                           hint=("please do a gnt-instance info to see the"
9642
                                 " status of disks"))
9643
    cstep = 5
9644
    if self.early_release:
9645
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9646
      cstep += 1
9647
      self._RemoveOldStorage(self.target_node, iv_names)
9648
      # WARNING: we release all node locks here, do not do other RPCs
9649
      # than WaitForSync to the primary node
9650
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9651
                    names=[self.instance.primary_node,
9652
                           self.target_node,
9653
                           self.new_node])
9654

    
9655
    # Wait for sync
9656
    # This can fail as the old devices are degraded and _WaitForSync
9657
    # does a combined result over all disks, so we don't check its return value
9658
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9659
    cstep += 1
9660
    _WaitForSync(self.lu, self.instance)
9661

    
9662
    # Check all devices manually
9663
    self._CheckDevices(self.instance.primary_node, iv_names)
9664

    
9665
    # Step: remove old storage
9666
    if not self.early_release:
9667
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9668
      self._RemoveOldStorage(self.target_node, iv_names)
9669

    
9670

    
9671
class LURepairNodeStorage(NoHooksLU):
9672
  """Repairs the volume group on a node.
9673

9674
  """
9675
  REQ_BGL = False
9676

    
9677
  def CheckArguments(self):
9678
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9679

    
9680
    storage_type = self.op.storage_type
9681

    
9682
    if (constants.SO_FIX_CONSISTENCY not in
9683
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9684
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9685
                                 " repaired" % storage_type,
9686
                                 errors.ECODE_INVAL)
9687

    
9688
  def ExpandNames(self):
9689
    self.needed_locks = {
9690
      locking.LEVEL_NODE: [self.op.node_name],
9691
      }
9692

    
9693
  def _CheckFaultyDisks(self, instance, node_name):
9694
    """Ensure faulty disks abort the opcode or at least warn."""
9695
    try:
9696
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9697
                                  node_name, True):
9698
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9699
                                   " node '%s'" % (instance.name, node_name),
9700
                                   errors.ECODE_STATE)
9701
    except errors.OpPrereqError, err:
9702
      if self.op.ignore_consistency:
9703
        self.proc.LogWarning(str(err.args[0]))
9704
      else:
9705
        raise
9706

    
9707
  def CheckPrereq(self):
9708
    """Check prerequisites.
9709

9710
    """
9711
    # Check whether any instance on this node has faulty disks
9712
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9713
      if not inst.admin_up:
9714
        continue
9715
      check_nodes = set(inst.all_nodes)
9716
      check_nodes.discard(self.op.node_name)
9717
      for inst_node_name in check_nodes:
9718
        self._CheckFaultyDisks(inst, inst_node_name)
9719

    
9720
  def Exec(self, feedback_fn):
9721
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9722
                (self.op.name, self.op.node_name))
9723

    
9724
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9725
    result = self.rpc.call_storage_execute(self.op.node_name,
9726
                                           self.op.storage_type, st_args,
9727
                                           self.op.name,
9728
                                           constants.SO_FIX_CONSISTENCY)
9729
    result.Raise("Failed to repair storage unit '%s' on %s" %
9730
                 (self.op.name, self.op.node_name))
9731

    
9732

    
9733
class LUNodeEvacStrategy(NoHooksLU):
9734
  """Computes the node evacuation strategy.
9735

9736
  """
9737
  REQ_BGL = False
9738

    
9739
  def CheckArguments(self):
9740
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9741

    
9742
  def ExpandNames(self):
9743
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9744
    self.needed_locks = locks = {}
9745
    if self.op.remote_node is None:
9746
      locks[locking.LEVEL_NODE] = locking.ALL_SET
9747
    else:
9748
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9749
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9750

    
9751
  def Exec(self, feedback_fn):
9752
    instances = []
9753
    for node in self.op.nodes:
9754
      instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9755
    if not instances:
9756
      return []
9757

    
9758
    if self.op.remote_node is not None:
9759
      result = []
9760
      for i in instances:
9761
        if i.primary_node == self.op.remote_node:
9762
          raise errors.OpPrereqError("Node %s is the primary node of"
9763
                                     " instance %s, cannot use it as"
9764
                                     " secondary" %
9765
                                     (self.op.remote_node, i.name),
9766
                                     errors.ECODE_INVAL)
9767
        result.append([i.name, self.op.remote_node])
9768
    else:
9769
      ial = IAllocator(self.cfg, self.rpc,
9770
                       mode=constants.IALLOCATOR_MODE_MEVAC,
9771
                       evac_nodes=self.op.nodes)
9772
      ial.Run(self.op.iallocator, validate=True)
9773
      if not ial.success:
9774
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9775
                                 errors.ECODE_NORES)
9776
      result = ial.result
9777
    return result
9778

    
9779

    
9780
class LUInstanceGrowDisk(LogicalUnit):
9781
  """Grow a disk of an instance.
9782

9783
  """
9784
  HPATH = "disk-grow"
9785
  HTYPE = constants.HTYPE_INSTANCE
9786
  REQ_BGL = False
9787

    
9788
  def ExpandNames(self):
9789
    self._ExpandAndLockInstance()
9790
    self.needed_locks[locking.LEVEL_NODE] = []
9791
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9792

    
9793
  def DeclareLocks(self, level):
9794
    if level == locking.LEVEL_NODE:
9795
      self._LockInstancesNodes()
9796

    
9797
  def BuildHooksEnv(self):
9798
    """Build hooks env.
9799

9800
    This runs on the master, the primary and all the secondaries.
9801

9802
    """
9803
    env = {
9804
      "DISK": self.op.disk,
9805
      "AMOUNT": self.op.amount,
9806
      }
9807
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9808
    return env
9809

    
9810
  def BuildHooksNodes(self):
9811
    """Build hooks nodes.
9812

9813
    """
9814
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9815
    return (nl, nl)
9816

    
9817
  def CheckPrereq(self):
9818
    """Check prerequisites.
9819

9820
    This checks that the instance is in the cluster.
9821

9822
    """
9823
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9824
    assert instance is not None, \
9825
      "Cannot retrieve locked instance %s" % self.op.instance_name
9826
    nodenames = list(instance.all_nodes)
9827
    for node in nodenames:
9828
      _CheckNodeOnline(self, node)
9829

    
9830
    self.instance = instance
9831

    
9832
    if instance.disk_template not in constants.DTS_GROWABLE:
9833
      raise errors.OpPrereqError("Instance's disk layout does not support"
9834
                                 " growing", errors.ECODE_INVAL)
9835

    
9836
    self.disk = instance.FindDisk(self.op.disk)
9837

    
9838
    if instance.disk_template not in (constants.DT_FILE,
9839
                                      constants.DT_SHARED_FILE):
9840
      # TODO: check the free disk space for file, when that feature will be
9841
      # supported
9842
      _CheckNodesFreeDiskPerVG(self, nodenames,
9843
                               self.disk.ComputeGrowth(self.op.amount))
9844

    
9845
  def Exec(self, feedback_fn):
9846
    """Execute disk grow.
9847

9848
    """
9849
    instance = self.instance
9850
    disk = self.disk
9851

    
9852
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9853
    if not disks_ok:
9854
      raise errors.OpExecError("Cannot activate block device to grow")
9855

    
9856
    # First run all grow ops in dry-run mode
9857
    for node in instance.all_nodes:
9858
      self.cfg.SetDiskID(disk, node)
9859
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
9860
      result.Raise("Grow request failed to node %s" % node)
9861

    
9862
    # We know that (as far as we can test) operations across different
9863
    # nodes will succeed, time to run it for real
9864
    for node in instance.all_nodes:
9865
      self.cfg.SetDiskID(disk, node)
9866
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
9867
      result.Raise("Grow request failed to node %s" % node)
9868

    
9869
      # TODO: Rewrite code to work properly
9870
      # DRBD goes into sync mode for a short amount of time after executing the
9871
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9872
      # calling "resize" in sync mode fails. Sleeping for a short amount of
9873
      # time is a work-around.
9874
      time.sleep(5)
9875

    
9876
    disk.RecordGrow(self.op.amount)
9877
    self.cfg.Update(instance, feedback_fn)
9878
    if self.op.wait_for_sync:
9879
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
9880
      if disk_abort:
9881
        self.proc.LogWarning("Disk sync-ing has not returned a good"
9882
                             " status; please check the instance")
9883
      if not instance.admin_up:
9884
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9885
    elif not instance.admin_up:
9886
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
9887
                           " not supposed to be running because no wait for"
9888
                           " sync mode was requested")
9889

    
9890

    
9891
class LUInstanceQueryData(NoHooksLU):
9892
  """Query runtime instance data.
9893

9894
  """
9895
  REQ_BGL = False
9896

    
9897
  def ExpandNames(self):
9898
    self.needed_locks = {}
9899

    
9900
    # Use locking if requested or when non-static information is wanted
9901
    if not (self.op.static or self.op.use_locking):
9902
      self.LogWarning("Non-static data requested, locks need to be acquired")
9903
      self.op.use_locking = True
9904

    
9905
    if self.op.instances or not self.op.use_locking:
9906
      # Expand instance names right here
9907
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
9908
    else:
9909
      # Will use acquired locks
9910
      self.wanted_names = None
9911

    
9912
    if self.op.use_locking:
9913
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9914

    
9915
      if self.wanted_names is None:
9916
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9917
      else:
9918
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9919

    
9920
      self.needed_locks[locking.LEVEL_NODE] = []
9921
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9922
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9923

    
9924
  def DeclareLocks(self, level):
9925
    if self.op.use_locking and level == locking.LEVEL_NODE:
9926
      self._LockInstancesNodes()
9927

    
9928
  def CheckPrereq(self):
9929
    """Check prerequisites.
9930

9931
    This only checks the optional instance list against the existing names.
9932

9933
    """
9934
    if self.wanted_names is None:
9935
      assert self.op.use_locking, "Locking was not used"
9936
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
9937

    
9938
    self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9939
                             for name in self.wanted_names]
9940

    
9941
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
9942
    """Returns the status of a block device
9943

9944
    """
9945
    if self.op.static or not node:
9946
      return None
9947

    
9948
    self.cfg.SetDiskID(dev, node)
9949

    
9950
    result = self.rpc.call_blockdev_find(node, dev)
9951
    if result.offline:
9952
      return None
9953

    
9954
    result.Raise("Can't compute disk status for %s" % instance_name)
9955

    
9956
    status = result.payload
9957
    if status is None:
9958
      return None
9959

    
9960
    return (status.dev_path, status.major, status.minor,
9961
            status.sync_percent, status.estimated_time,
9962
            status.is_degraded, status.ldisk_status)
9963

    
9964
  def _ComputeDiskStatus(self, instance, snode, dev):
9965
    """Compute block device status.
9966

9967
    """
9968
    if dev.dev_type in constants.LDS_DRBD:
9969
      # we change the snode then (otherwise we use the one passed in)
9970
      if dev.logical_id[0] == instance.primary_node:
9971
        snode = dev.logical_id[1]
9972
      else:
9973
        snode = dev.logical_id[0]
9974

    
9975
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9976
                                              instance.name, dev)
9977
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9978

    
9979
    if dev.children:
9980
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
9981
                      for child in dev.children]
9982
    else:
9983
      dev_children = []
9984

    
9985
    return {
9986
      "iv_name": dev.iv_name,
9987
      "dev_type": dev.dev_type,
9988
      "logical_id": dev.logical_id,
9989
      "physical_id": dev.physical_id,
9990
      "pstatus": dev_pstatus,
9991
      "sstatus": dev_sstatus,
9992
      "children": dev_children,
9993
      "mode": dev.mode,
9994
      "size": dev.size,
9995
      }
9996

    
9997
  def Exec(self, feedback_fn):
9998
    """Gather and return data"""
9999
    result = {}
10000

    
10001
    cluster = self.cfg.GetClusterInfo()
10002

    
10003
    for instance in self.wanted_instances:
10004
      if not self.op.static:
10005
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10006
                                                  instance.name,
10007
                                                  instance.hypervisor)
10008
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10009
        remote_info = remote_info.payload
10010
        if remote_info and "state" in remote_info:
10011
          remote_state = "up"
10012
        else:
10013
          remote_state = "down"
10014
      else:
10015
        remote_state = None
10016
      if instance.admin_up:
10017
        config_state = "up"
10018
      else:
10019
        config_state = "down"
10020

    
10021
      disks = [self._ComputeDiskStatus(instance, None, device)
10022
               for device in instance.disks]
10023

    
10024
      result[instance.name] = {
10025
        "name": instance.name,
10026
        "config_state": config_state,
10027
        "run_state": remote_state,
10028
        "pnode": instance.primary_node,
10029
        "snodes": instance.secondary_nodes,
10030
        "os": instance.os,
10031
        # this happens to be the same format used for hooks
10032
        "nics": _NICListToTuple(self, instance.nics),
10033
        "disk_template": instance.disk_template,
10034
        "disks": disks,
10035
        "hypervisor": instance.hypervisor,
10036
        "network_port": instance.network_port,
10037
        "hv_instance": instance.hvparams,
10038
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10039
        "be_instance": instance.beparams,
10040
        "be_actual": cluster.FillBE(instance),
10041
        "os_instance": instance.osparams,
10042
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10043
        "serial_no": instance.serial_no,
10044
        "mtime": instance.mtime,
10045
        "ctime": instance.ctime,
10046
        "uuid": instance.uuid,
10047
        }
10048

    
10049
    return result
10050

    
10051

    
10052
class LUInstanceSetParams(LogicalUnit):
10053
  """Modifies an instances's parameters.
10054

10055
  """
10056
  HPATH = "instance-modify"
10057
  HTYPE = constants.HTYPE_INSTANCE
10058
  REQ_BGL = False
10059

    
10060
  def CheckArguments(self):
10061
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10062
            self.op.hvparams or self.op.beparams or self.op.os_name):
10063
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10064

    
10065
    if self.op.hvparams:
10066
      _CheckGlobalHvParams(self.op.hvparams)
10067

    
10068
    # Disk validation
10069
    disk_addremove = 0
10070
    for disk_op, disk_dict in self.op.disks:
10071
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10072
      if disk_op == constants.DDM_REMOVE:
10073
        disk_addremove += 1
10074
        continue
10075
      elif disk_op == constants.DDM_ADD:
10076
        disk_addremove += 1
10077
      else:
10078
        if not isinstance(disk_op, int):
10079
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10080
        if not isinstance(disk_dict, dict):
10081
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10082
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10083

    
10084
      if disk_op == constants.DDM_ADD:
10085
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10086
        if mode not in constants.DISK_ACCESS_SET:
10087
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10088
                                     errors.ECODE_INVAL)
10089
        size = disk_dict.get(constants.IDISK_SIZE, None)
10090
        if size is None:
10091
          raise errors.OpPrereqError("Required disk parameter size missing",
10092
                                     errors.ECODE_INVAL)
10093
        try:
10094
          size = int(size)
10095
        except (TypeError, ValueError), err:
10096
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10097
                                     str(err), errors.ECODE_INVAL)
10098
        disk_dict[constants.IDISK_SIZE] = size
10099
      else:
10100
        # modification of disk
10101
        if constants.IDISK_SIZE in disk_dict:
10102
          raise errors.OpPrereqError("Disk size change not possible, use"
10103
                                     " grow-disk", errors.ECODE_INVAL)
10104

    
10105
    if disk_addremove > 1:
10106
      raise errors.OpPrereqError("Only one disk add or remove operation"
10107
                                 " supported at a time", errors.ECODE_INVAL)
10108

    
10109
    if self.op.disks and self.op.disk_template is not None:
10110
      raise errors.OpPrereqError("Disk template conversion and other disk"
10111
                                 " changes not supported at the same time",
10112
                                 errors.ECODE_INVAL)
10113

    
10114
    if (self.op.disk_template and
10115
        self.op.disk_template in constants.DTS_INT_MIRROR and
10116
        self.op.remote_node is None):
10117
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10118
                                 " one requires specifying a secondary node",
10119
                                 errors.ECODE_INVAL)
10120

    
10121
    # NIC validation
10122
    nic_addremove = 0
10123
    for nic_op, nic_dict in self.op.nics:
10124
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10125
      if nic_op == constants.DDM_REMOVE:
10126
        nic_addremove += 1
10127
        continue
10128
      elif nic_op == constants.DDM_ADD:
10129
        nic_addremove += 1
10130
      else:
10131
        if not isinstance(nic_op, int):
10132
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10133
        if not isinstance(nic_dict, dict):
10134
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10135
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10136

    
10137
      # nic_dict should be a dict
10138
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10139
      if nic_ip is not None:
10140
        if nic_ip.lower() == constants.VALUE_NONE:
10141
          nic_dict[constants.INIC_IP] = None
10142
        else:
10143
          if not netutils.IPAddress.IsValid(nic_ip):
10144
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10145
                                       errors.ECODE_INVAL)
10146

    
10147
      nic_bridge = nic_dict.get('bridge', None)
10148
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10149
      if nic_bridge and nic_link:
10150
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10151
                                   " at the same time", errors.ECODE_INVAL)
10152
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10153
        nic_dict['bridge'] = None
10154
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10155
        nic_dict[constants.INIC_LINK] = None
10156

    
10157
      if nic_op == constants.DDM_ADD:
10158
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10159
        if nic_mac is None:
10160
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10161

    
10162
      if constants.INIC_MAC in nic_dict:
10163
        nic_mac = nic_dict[constants.INIC_MAC]
10164
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10165
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10166

    
10167
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10168
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10169
                                     " modifying an existing nic",
10170
                                     errors.ECODE_INVAL)
10171

    
10172
    if nic_addremove > 1:
10173
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10174
                                 " supported at a time", errors.ECODE_INVAL)
10175

    
10176
  def ExpandNames(self):
10177
    self._ExpandAndLockInstance()
10178
    self.needed_locks[locking.LEVEL_NODE] = []
10179
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10180

    
10181
  def DeclareLocks(self, level):
10182
    if level == locking.LEVEL_NODE:
10183
      self._LockInstancesNodes()
10184
      if self.op.disk_template and self.op.remote_node:
10185
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10186
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10187

    
10188
  def BuildHooksEnv(self):
10189
    """Build hooks env.
10190

10191
    This runs on the master, primary and secondaries.
10192

10193
    """
10194
    args = dict()
10195
    if constants.BE_MEMORY in self.be_new:
10196
      args['memory'] = self.be_new[constants.BE_MEMORY]
10197
    if constants.BE_VCPUS in self.be_new:
10198
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
10199
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10200
    # information at all.
10201
    if self.op.nics:
10202
      args['nics'] = []
10203
      nic_override = dict(self.op.nics)
10204
      for idx, nic in enumerate(self.instance.nics):
10205
        if idx in nic_override:
10206
          this_nic_override = nic_override[idx]
10207
        else:
10208
          this_nic_override = {}
10209
        if constants.INIC_IP in this_nic_override:
10210
          ip = this_nic_override[constants.INIC_IP]
10211
        else:
10212
          ip = nic.ip
10213
        if constants.INIC_MAC in this_nic_override:
10214
          mac = this_nic_override[constants.INIC_MAC]
10215
        else:
10216
          mac = nic.mac
10217
        if idx in self.nic_pnew:
10218
          nicparams = self.nic_pnew[idx]
10219
        else:
10220
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10221
        mode = nicparams[constants.NIC_MODE]
10222
        link = nicparams[constants.NIC_LINK]
10223
        args['nics'].append((ip, mac, mode, link))
10224
      if constants.DDM_ADD in nic_override:
10225
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10226
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10227
        nicparams = self.nic_pnew[constants.DDM_ADD]
10228
        mode = nicparams[constants.NIC_MODE]
10229
        link = nicparams[constants.NIC_LINK]
10230
        args['nics'].append((ip, mac, mode, link))
10231
      elif constants.DDM_REMOVE in nic_override:
10232
        del args['nics'][-1]
10233

    
10234
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10235
    if self.op.disk_template:
10236
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10237

    
10238
    return env
10239

    
10240
  def BuildHooksNodes(self):
10241
    """Build hooks nodes.
10242

10243
    """
10244
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10245
    return (nl, nl)
10246

    
10247
  def CheckPrereq(self):
10248
    """Check prerequisites.
10249

10250
    This only checks the instance list against the existing names.
10251

10252
    """
10253
    # checking the new params on the primary/secondary nodes
10254

    
10255
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10256
    cluster = self.cluster = self.cfg.GetClusterInfo()
10257
    assert self.instance is not None, \
10258
      "Cannot retrieve locked instance %s" % self.op.instance_name
10259
    pnode = instance.primary_node
10260
    nodelist = list(instance.all_nodes)
10261

    
10262
    # OS change
10263
    if self.op.os_name and not self.op.force:
10264
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10265
                      self.op.force_variant)
10266
      instance_os = self.op.os_name
10267
    else:
10268
      instance_os = instance.os
10269

    
10270
    if self.op.disk_template:
10271
      if instance.disk_template == self.op.disk_template:
10272
        raise errors.OpPrereqError("Instance already has disk template %s" %
10273
                                   instance.disk_template, errors.ECODE_INVAL)
10274

    
10275
      if (instance.disk_template,
10276
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10277
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10278
                                   " %s to %s" % (instance.disk_template,
10279
                                                  self.op.disk_template),
10280
                                   errors.ECODE_INVAL)
10281
      _CheckInstanceDown(self, instance, "cannot change disk template")
10282
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10283
        if self.op.remote_node == pnode:
10284
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10285
                                     " as the primary node of the instance" %
10286
                                     self.op.remote_node, errors.ECODE_STATE)
10287
        _CheckNodeOnline(self, self.op.remote_node)
10288
        _CheckNodeNotDrained(self, self.op.remote_node)
10289
        # FIXME: here we assume that the old instance type is DT_PLAIN
10290
        assert instance.disk_template == constants.DT_PLAIN
10291
        disks = [{constants.IDISK_SIZE: d.size,
10292
                  constants.IDISK_VG: d.logical_id[0]}
10293
                 for d in instance.disks]
10294
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10295
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10296

    
10297
    # hvparams processing
10298
    if self.op.hvparams:
10299
      hv_type = instance.hypervisor
10300
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10301
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10302
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10303

    
10304
      # local check
10305
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10306
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10307
      self.hv_new = hv_new # the new actual values
10308
      self.hv_inst = i_hvdict # the new dict (without defaults)
10309
    else:
10310
      self.hv_new = self.hv_inst = {}
10311

    
10312
    # beparams processing
10313
    if self.op.beparams:
10314
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10315
                                   use_none=True)
10316
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10317
      be_new = cluster.SimpleFillBE(i_bedict)
10318
      self.be_new = be_new # the new actual values
10319
      self.be_inst = i_bedict # the new dict (without defaults)
10320
    else:
10321
      self.be_new = self.be_inst = {}
10322
    be_old = cluster.FillBE(instance)
10323

    
10324
    # osparams processing
10325
    if self.op.osparams:
10326
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10327
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10328
      self.os_inst = i_osdict # the new dict (without defaults)
10329
    else:
10330
      self.os_inst = {}
10331

    
10332
    self.warn = []
10333

    
10334
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10335
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10336
      mem_check_list = [pnode]
10337
      if be_new[constants.BE_AUTO_BALANCE]:
10338
        # either we changed auto_balance to yes or it was from before
10339
        mem_check_list.extend(instance.secondary_nodes)
10340
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10341
                                                  instance.hypervisor)
10342
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10343
                                         instance.hypervisor)
10344
      pninfo = nodeinfo[pnode]
10345
      msg = pninfo.fail_msg
10346
      if msg:
10347
        # Assume the primary node is unreachable and go ahead
10348
        self.warn.append("Can't get info from primary node %s: %s" %
10349
                         (pnode,  msg))
10350
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
10351
        self.warn.append("Node data from primary node %s doesn't contain"
10352
                         " free memory information" % pnode)
10353
      elif instance_info.fail_msg:
10354
        self.warn.append("Can't get instance runtime information: %s" %
10355
                        instance_info.fail_msg)
10356
      else:
10357
        if instance_info.payload:
10358
          current_mem = int(instance_info.payload['memory'])
10359
        else:
10360
          # Assume instance not running
10361
          # (there is a slight race condition here, but it's not very probable,
10362
          # and we have no other way to check)
10363
          current_mem = 0
10364
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10365
                    pninfo.payload['memory_free'])
10366
        if miss_mem > 0:
10367
          raise errors.OpPrereqError("This change will prevent the instance"
10368
                                     " from starting, due to %d MB of memory"
10369
                                     " missing on its primary node" % miss_mem,
10370
                                     errors.ECODE_NORES)
10371

    
10372
      if be_new[constants.BE_AUTO_BALANCE]:
10373
        for node, nres in nodeinfo.items():
10374
          if node not in instance.secondary_nodes:
10375
            continue
10376
          nres.Raise("Can't get info from secondary node %s" % node,
10377
                     prereq=True, ecode=errors.ECODE_STATE)
10378
          if not isinstance(nres.payload.get('memory_free', None), int):
10379
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10380
                                       " memory information" % node,
10381
                                       errors.ECODE_STATE)
10382
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
10383
            raise errors.OpPrereqError("This change will prevent the instance"
10384
                                       " from failover to its secondary node"
10385
                                       " %s, due to not enough memory" % node,
10386
                                       errors.ECODE_STATE)
10387

    
10388
    # NIC processing
10389
    self.nic_pnew = {}
10390
    self.nic_pinst = {}
10391
    for nic_op, nic_dict in self.op.nics:
10392
      if nic_op == constants.DDM_REMOVE:
10393
        if not instance.nics:
10394
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10395
                                     errors.ECODE_INVAL)
10396
        continue
10397
      if nic_op != constants.DDM_ADD:
10398
        # an existing nic
10399
        if not instance.nics:
10400
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10401
                                     " no NICs" % nic_op,
10402
                                     errors.ECODE_INVAL)
10403
        if nic_op < 0 or nic_op >= len(instance.nics):
10404
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10405
                                     " are 0 to %d" %
10406
                                     (nic_op, len(instance.nics) - 1),
10407
                                     errors.ECODE_INVAL)
10408
        old_nic_params = instance.nics[nic_op].nicparams
10409
        old_nic_ip = instance.nics[nic_op].ip
10410
      else:
10411
        old_nic_params = {}
10412
        old_nic_ip = None
10413

    
10414
      update_params_dict = dict([(key, nic_dict[key])
10415
                                 for key in constants.NICS_PARAMETERS
10416
                                 if key in nic_dict])
10417

    
10418
      if 'bridge' in nic_dict:
10419
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
10420

    
10421
      new_nic_params = _GetUpdatedParams(old_nic_params,
10422
                                         update_params_dict)
10423
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10424
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10425
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10426
      self.nic_pinst[nic_op] = new_nic_params
10427
      self.nic_pnew[nic_op] = new_filled_nic_params
10428
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10429

    
10430
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10431
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10432
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10433
        if msg:
10434
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10435
          if self.op.force:
10436
            self.warn.append(msg)
10437
          else:
10438
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10439
      if new_nic_mode == constants.NIC_MODE_ROUTED:
10440
        if constants.INIC_IP in nic_dict:
10441
          nic_ip = nic_dict[constants.INIC_IP]
10442
        else:
10443
          nic_ip = old_nic_ip
10444
        if nic_ip is None:
10445
          raise errors.OpPrereqError('Cannot set the nic ip to None'
10446
                                     ' on a routed nic', errors.ECODE_INVAL)
10447
      if constants.INIC_MAC in nic_dict:
10448
        nic_mac = nic_dict[constants.INIC_MAC]
10449
        if nic_mac is None:
10450
          raise errors.OpPrereqError('Cannot set the nic mac to None',
10451
                                     errors.ECODE_INVAL)
10452
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10453
          # otherwise generate the mac
10454
          nic_dict[constants.INIC_MAC] = \
10455
            self.cfg.GenerateMAC(self.proc.GetECId())
10456
        else:
10457
          # or validate/reserve the current one
10458
          try:
10459
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10460
          except errors.ReservationError:
10461
            raise errors.OpPrereqError("MAC address %s already in use"
10462
                                       " in cluster" % nic_mac,
10463
                                       errors.ECODE_NOTUNIQUE)
10464

    
10465
    # DISK processing
10466
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10467
      raise errors.OpPrereqError("Disk operations not supported for"
10468
                                 " diskless instances",
10469
                                 errors.ECODE_INVAL)
10470
    for disk_op, _ in self.op.disks:
10471
      if disk_op == constants.DDM_REMOVE:
10472
        if len(instance.disks) == 1:
10473
          raise errors.OpPrereqError("Cannot remove the last disk of"
10474
                                     " an instance", errors.ECODE_INVAL)
10475
        _CheckInstanceDown(self, instance, "cannot remove disks")
10476

    
10477
      if (disk_op == constants.DDM_ADD and
10478
          len(instance.disks) >= constants.MAX_DISKS):
10479
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10480
                                   " add more" % constants.MAX_DISKS,
10481
                                   errors.ECODE_STATE)
10482
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10483
        # an existing disk
10484
        if disk_op < 0 or disk_op >= len(instance.disks):
10485
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10486
                                     " are 0 to %d" %
10487
                                     (disk_op, len(instance.disks)),
10488
                                     errors.ECODE_INVAL)
10489

    
10490
    return
10491

    
10492
  def _ConvertPlainToDrbd(self, feedback_fn):
10493
    """Converts an instance from plain to drbd.
10494

10495
    """
10496
    feedback_fn("Converting template to drbd")
10497
    instance = self.instance
10498
    pnode = instance.primary_node
10499
    snode = self.op.remote_node
10500

    
10501
    # create a fake disk info for _GenerateDiskTemplate
10502
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10503
                  constants.IDISK_VG: d.logical_id[0]}
10504
                 for d in instance.disks]
10505
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10506
                                      instance.name, pnode, [snode],
10507
                                      disk_info, None, None, 0, feedback_fn)
10508
    info = _GetInstanceInfoText(instance)
10509
    feedback_fn("Creating aditional volumes...")
10510
    # first, create the missing data and meta devices
10511
    for disk in new_disks:
10512
      # unfortunately this is... not too nice
10513
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10514
                            info, True)
10515
      for child in disk.children:
10516
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10517
    # at this stage, all new LVs have been created, we can rename the
10518
    # old ones
10519
    feedback_fn("Renaming original volumes...")
10520
    rename_list = [(o, n.children[0].logical_id)
10521
                   for (o, n) in zip(instance.disks, new_disks)]
10522
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10523
    result.Raise("Failed to rename original LVs")
10524

    
10525
    feedback_fn("Initializing DRBD devices...")
10526
    # all child devices are in place, we can now create the DRBD devices
10527
    for disk in new_disks:
10528
      for node in [pnode, snode]:
10529
        f_create = node == pnode
10530
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10531

    
10532
    # at this point, the instance has been modified
10533
    instance.disk_template = constants.DT_DRBD8
10534
    instance.disks = new_disks
10535
    self.cfg.Update(instance, feedback_fn)
10536

    
10537
    # disks are created, waiting for sync
10538
    disk_abort = not _WaitForSync(self, instance,
10539
                                  oneshot=not self.op.wait_for_sync)
10540
    if disk_abort:
10541
      raise errors.OpExecError("There are some degraded disks for"
10542
                               " this instance, please cleanup manually")
10543

    
10544
  def _ConvertDrbdToPlain(self, feedback_fn):
10545
    """Converts an instance from drbd to plain.
10546

10547
    """
10548
    instance = self.instance
10549
    assert len(instance.secondary_nodes) == 1
10550
    pnode = instance.primary_node
10551
    snode = instance.secondary_nodes[0]
10552
    feedback_fn("Converting template to plain")
10553

    
10554
    old_disks = instance.disks
10555
    new_disks = [d.children[0] for d in old_disks]
10556

    
10557
    # copy over size and mode
10558
    for parent, child in zip(old_disks, new_disks):
10559
      child.size = parent.size
10560
      child.mode = parent.mode
10561

    
10562
    # update instance structure
10563
    instance.disks = new_disks
10564
    instance.disk_template = constants.DT_PLAIN
10565
    self.cfg.Update(instance, feedback_fn)
10566

    
10567
    feedback_fn("Removing volumes on the secondary node...")
10568
    for disk in old_disks:
10569
      self.cfg.SetDiskID(disk, snode)
10570
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10571
      if msg:
10572
        self.LogWarning("Could not remove block device %s on node %s,"
10573
                        " continuing anyway: %s", disk.iv_name, snode, msg)
10574

    
10575
    feedback_fn("Removing unneeded volumes on the primary node...")
10576
    for idx, disk in enumerate(old_disks):
10577
      meta = disk.children[1]
10578
      self.cfg.SetDiskID(meta, pnode)
10579
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10580
      if msg:
10581
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
10582
                        " continuing anyway: %s", idx, pnode, msg)
10583

    
10584
  def Exec(self, feedback_fn):
10585
    """Modifies an instance.
10586

10587
    All parameters take effect only at the next restart of the instance.
10588

10589
    """
10590
    # Process here the warnings from CheckPrereq, as we don't have a
10591
    # feedback_fn there.
10592
    for warn in self.warn:
10593
      feedback_fn("WARNING: %s" % warn)
10594

    
10595
    result = []
10596
    instance = self.instance
10597
    # disk changes
10598
    for disk_op, disk_dict in self.op.disks:
10599
      if disk_op == constants.DDM_REMOVE:
10600
        # remove the last disk
10601
        device = instance.disks.pop()
10602
        device_idx = len(instance.disks)
10603
        for node, disk in device.ComputeNodeTree(instance.primary_node):
10604
          self.cfg.SetDiskID(disk, node)
10605
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10606
          if msg:
10607
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
10608
                            " continuing anyway", device_idx, node, msg)
10609
        result.append(("disk/%d" % device_idx, "remove"))
10610
      elif disk_op == constants.DDM_ADD:
10611
        # add a new disk
10612
        if instance.disk_template in (constants.DT_FILE,
10613
                                        constants.DT_SHARED_FILE):
10614
          file_driver, file_path = instance.disks[0].logical_id
10615
          file_path = os.path.dirname(file_path)
10616
        else:
10617
          file_driver = file_path = None
10618
        disk_idx_base = len(instance.disks)
10619
        new_disk = _GenerateDiskTemplate(self,
10620
                                         instance.disk_template,
10621
                                         instance.name, instance.primary_node,
10622
                                         instance.secondary_nodes,
10623
                                         [disk_dict],
10624
                                         file_path,
10625
                                         file_driver,
10626
                                         disk_idx_base, feedback_fn)[0]
10627
        instance.disks.append(new_disk)
10628
        info = _GetInstanceInfoText(instance)
10629

    
10630
        logging.info("Creating volume %s for instance %s",
10631
                     new_disk.iv_name, instance.name)
10632
        # Note: this needs to be kept in sync with _CreateDisks
10633
        #HARDCODE
10634
        for node in instance.all_nodes:
10635
          f_create = node == instance.primary_node
10636
          try:
10637
            _CreateBlockDev(self, node, instance, new_disk,
10638
                            f_create, info, f_create)
10639
          except errors.OpExecError, err:
10640
            self.LogWarning("Failed to create volume %s (%s) on"
10641
                            " node %s: %s",
10642
                            new_disk.iv_name, new_disk, node, err)
10643
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10644
                       (new_disk.size, new_disk.mode)))
10645
      else:
10646
        # change a given disk
10647
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10648
        result.append(("disk.mode/%d" % disk_op,
10649
                       disk_dict[constants.IDISK_MODE]))
10650

    
10651
    if self.op.disk_template:
10652
      r_shut = _ShutdownInstanceDisks(self, instance)
10653
      if not r_shut:
10654
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10655
                                 " proceed with disk template conversion")
10656
      mode = (instance.disk_template, self.op.disk_template)
10657
      try:
10658
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
10659
      except:
10660
        self.cfg.ReleaseDRBDMinors(instance.name)
10661
        raise
10662
      result.append(("disk_template", self.op.disk_template))
10663

    
10664
    # NIC changes
10665
    for nic_op, nic_dict in self.op.nics:
10666
      if nic_op == constants.DDM_REMOVE:
10667
        # remove the last nic
10668
        del instance.nics[-1]
10669
        result.append(("nic.%d" % len(instance.nics), "remove"))
10670
      elif nic_op == constants.DDM_ADD:
10671
        # mac and bridge should be set, by now
10672
        mac = nic_dict[constants.INIC_MAC]
10673
        ip = nic_dict.get(constants.INIC_IP, None)
10674
        nicparams = self.nic_pinst[constants.DDM_ADD]
10675
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10676
        instance.nics.append(new_nic)
10677
        result.append(("nic.%d" % (len(instance.nics) - 1),
10678
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
10679
                       (new_nic.mac, new_nic.ip,
10680
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10681
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10682
                       )))
10683
      else:
10684
        for key in (constants.INIC_MAC, constants.INIC_IP):
10685
          if key in nic_dict:
10686
            setattr(instance.nics[nic_op], key, nic_dict[key])
10687
        if nic_op in self.nic_pinst:
10688
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10689
        for key, val in nic_dict.iteritems():
10690
          result.append(("nic.%s/%d" % (key, nic_op), val))
10691

    
10692
    # hvparams changes
10693
    if self.op.hvparams:
10694
      instance.hvparams = self.hv_inst
10695
      for key, val in self.op.hvparams.iteritems():
10696
        result.append(("hv/%s" % key, val))
10697

    
10698
    # beparams changes
10699
    if self.op.beparams:
10700
      instance.beparams = self.be_inst
10701
      for key, val in self.op.beparams.iteritems():
10702
        result.append(("be/%s" % key, val))
10703

    
10704
    # OS change
10705
    if self.op.os_name:
10706
      instance.os = self.op.os_name
10707

    
10708
    # osparams changes
10709
    if self.op.osparams:
10710
      instance.osparams = self.os_inst
10711
      for key, val in self.op.osparams.iteritems():
10712
        result.append(("os/%s" % key, val))
10713

    
10714
    self.cfg.Update(instance, feedback_fn)
10715

    
10716
    return result
10717

    
10718
  _DISK_CONVERSIONS = {
10719
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10720
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10721
    }
10722

    
10723

    
10724
class LUBackupQuery(NoHooksLU):
10725
  """Query the exports list
10726

10727
  """
10728
  REQ_BGL = False
10729

    
10730
  def ExpandNames(self):
10731
    self.needed_locks = {}
10732
    self.share_locks[locking.LEVEL_NODE] = 1
10733
    if not self.op.nodes:
10734
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10735
    else:
10736
      self.needed_locks[locking.LEVEL_NODE] = \
10737
        _GetWantedNodes(self, self.op.nodes)
10738

    
10739
  def Exec(self, feedback_fn):
10740
    """Compute the list of all the exported system images.
10741

10742
    @rtype: dict
10743
    @return: a dictionary with the structure node->(export-list)
10744
        where export-list is a list of the instances exported on
10745
        that node.
10746

10747
    """
10748
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
10749
    rpcresult = self.rpc.call_export_list(self.nodes)
10750
    result = {}
10751
    for node in rpcresult:
10752
      if rpcresult[node].fail_msg:
10753
        result[node] = False
10754
      else:
10755
        result[node] = rpcresult[node].payload
10756

    
10757
    return result
10758

    
10759

    
10760
class LUBackupPrepare(NoHooksLU):
10761
  """Prepares an instance for an export and returns useful information.
10762

10763
  """
10764
  REQ_BGL = False
10765

    
10766
  def ExpandNames(self):
10767
    self._ExpandAndLockInstance()
10768

    
10769
  def CheckPrereq(self):
10770
    """Check prerequisites.
10771

10772
    """
10773
    instance_name = self.op.instance_name
10774

    
10775
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10776
    assert self.instance is not None, \
10777
          "Cannot retrieve locked instance %s" % self.op.instance_name
10778
    _CheckNodeOnline(self, self.instance.primary_node)
10779

    
10780
    self._cds = _GetClusterDomainSecret()
10781

    
10782
  def Exec(self, feedback_fn):
10783
    """Prepares an instance for an export.
10784

10785
    """
10786
    instance = self.instance
10787

    
10788
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10789
      salt = utils.GenerateSecret(8)
10790

    
10791
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10792
      result = self.rpc.call_x509_cert_create(instance.primary_node,
10793
                                              constants.RIE_CERT_VALIDITY)
10794
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
10795

    
10796
      (name, cert_pem) = result.payload
10797

    
10798
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10799
                                             cert_pem)
10800

    
10801
      return {
10802
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10803
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10804
                          salt),
10805
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10806
        }
10807

    
10808
    return None
10809

    
10810

    
10811
class LUBackupExport(LogicalUnit):
10812
  """Export an instance to an image in the cluster.
10813

10814
  """
10815
  HPATH = "instance-export"
10816
  HTYPE = constants.HTYPE_INSTANCE
10817
  REQ_BGL = False
10818

    
10819
  def CheckArguments(self):
10820
    """Check the arguments.
10821

10822
    """
10823
    self.x509_key_name = self.op.x509_key_name
10824
    self.dest_x509_ca_pem = self.op.destination_x509_ca
10825

    
10826
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10827
      if not self.x509_key_name:
10828
        raise errors.OpPrereqError("Missing X509 key name for encryption",
10829
                                   errors.ECODE_INVAL)
10830

    
10831
      if not self.dest_x509_ca_pem:
10832
        raise errors.OpPrereqError("Missing destination X509 CA",
10833
                                   errors.ECODE_INVAL)
10834

    
10835
  def ExpandNames(self):
10836
    self._ExpandAndLockInstance()
10837

    
10838
    # Lock all nodes for local exports
10839
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10840
      # FIXME: lock only instance primary and destination node
10841
      #
10842
      # Sad but true, for now we have do lock all nodes, as we don't know where
10843
      # the previous export might be, and in this LU we search for it and
10844
      # remove it from its current node. In the future we could fix this by:
10845
      #  - making a tasklet to search (share-lock all), then create the
10846
      #    new one, then one to remove, after
10847
      #  - removing the removal operation altogether
10848
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10849

    
10850
  def DeclareLocks(self, level):
10851
    """Last minute lock declaration."""
10852
    # All nodes are locked anyway, so nothing to do here.
10853

    
10854
  def BuildHooksEnv(self):
10855
    """Build hooks env.
10856

10857
    This will run on the master, primary node and target node.
10858

10859
    """
10860
    env = {
10861
      "EXPORT_MODE": self.op.mode,
10862
      "EXPORT_NODE": self.op.target_node,
10863
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10864
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10865
      # TODO: Generic function for boolean env variables
10866
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10867
      }
10868

    
10869
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10870

    
10871
    return env
10872

    
10873
  def BuildHooksNodes(self):
10874
    """Build hooks nodes.
10875

10876
    """
10877
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10878

    
10879
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10880
      nl.append(self.op.target_node)
10881

    
10882
    return (nl, nl)
10883

    
10884
  def CheckPrereq(self):
10885
    """Check prerequisites.
10886

10887
    This checks that the instance and node names are valid.
10888

10889
    """
10890
    instance_name = self.op.instance_name
10891

    
10892
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10893
    assert self.instance is not None, \
10894
          "Cannot retrieve locked instance %s" % self.op.instance_name
10895
    _CheckNodeOnline(self, self.instance.primary_node)
10896

    
10897
    if (self.op.remove_instance and self.instance.admin_up and
10898
        not self.op.shutdown):
10899
      raise errors.OpPrereqError("Can not remove instance without shutting it"
10900
                                 " down before")
10901

    
10902
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10903
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10904
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10905
      assert self.dst_node is not None
10906

    
10907
      _CheckNodeOnline(self, self.dst_node.name)
10908
      _CheckNodeNotDrained(self, self.dst_node.name)
10909

    
10910
      self._cds = None
10911
      self.dest_disk_info = None
10912
      self.dest_x509_ca = None
10913

    
10914
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10915
      self.dst_node = None
10916

    
10917
      if len(self.op.target_node) != len(self.instance.disks):
10918
        raise errors.OpPrereqError(("Received destination information for %s"
10919
                                    " disks, but instance %s has %s disks") %
10920
                                   (len(self.op.target_node), instance_name,
10921
                                    len(self.instance.disks)),
10922
                                   errors.ECODE_INVAL)
10923

    
10924
      cds = _GetClusterDomainSecret()
10925

    
10926
      # Check X509 key name
10927
      try:
10928
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10929
      except (TypeError, ValueError), err:
10930
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10931

    
10932
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10933
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10934
                                   errors.ECODE_INVAL)
10935

    
10936
      # Load and verify CA
10937
      try:
10938
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10939
      except OpenSSL.crypto.Error, err:
10940
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10941
                                   (err, ), errors.ECODE_INVAL)
10942

    
10943
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10944
      if errcode is not None:
10945
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10946
                                   (msg, ), errors.ECODE_INVAL)
10947

    
10948
      self.dest_x509_ca = cert
10949

    
10950
      # Verify target information
10951
      disk_info = []
10952
      for idx, disk_data in enumerate(self.op.target_node):
10953
        try:
10954
          (host, port, magic) = \
10955
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10956
        except errors.GenericError, err:
10957
          raise errors.OpPrereqError("Target info for disk %s: %s" %
10958
                                     (idx, err), errors.ECODE_INVAL)
10959

    
10960
        disk_info.append((host, port, magic))
10961

    
10962
      assert len(disk_info) == len(self.op.target_node)
10963
      self.dest_disk_info = disk_info
10964

    
10965
    else:
10966
      raise errors.ProgrammerError("Unhandled export mode %r" %
10967
                                   self.op.mode)
10968

    
10969
    # instance disk type verification
10970
    # TODO: Implement export support for file-based disks
10971
    for disk in self.instance.disks:
10972
      if disk.dev_type == constants.LD_FILE:
10973
        raise errors.OpPrereqError("Export not supported for instances with"
10974
                                   " file-based disks", errors.ECODE_INVAL)
10975

    
10976
  def _CleanupExports(self, feedback_fn):
10977
    """Removes exports of current instance from all other nodes.
10978

10979
    If an instance in a cluster with nodes A..D was exported to node C, its
10980
    exports will be removed from the nodes A, B and D.
10981

10982
    """
10983
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
10984

    
10985
    nodelist = self.cfg.GetNodeList()
10986
    nodelist.remove(self.dst_node.name)
10987

    
10988
    # on one-node clusters nodelist will be empty after the removal
10989
    # if we proceed the backup would be removed because OpBackupQuery
10990
    # substitutes an empty list with the full cluster node list.
10991
    iname = self.instance.name
10992
    if nodelist:
10993
      feedback_fn("Removing old exports for instance %s" % iname)
10994
      exportlist = self.rpc.call_export_list(nodelist)
10995
      for node in exportlist:
10996
        if exportlist[node].fail_msg:
10997
          continue
10998
        if iname in exportlist[node].payload:
10999
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11000
          if msg:
11001
            self.LogWarning("Could not remove older export for instance %s"
11002
                            " on node %s: %s", iname, node, msg)
11003

    
11004
  def Exec(self, feedback_fn):
11005
    """Export an instance to an image in the cluster.
11006

11007
    """
11008
    assert self.op.mode in constants.EXPORT_MODES
11009

    
11010
    instance = self.instance
11011
    src_node = instance.primary_node
11012

    
11013
    if self.op.shutdown:
11014
      # shutdown the instance, but not the disks
11015
      feedback_fn("Shutting down instance %s" % instance.name)
11016
      result = self.rpc.call_instance_shutdown(src_node, instance,
11017
                                               self.op.shutdown_timeout)
11018
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11019
      result.Raise("Could not shutdown instance %s on"
11020
                   " node %s" % (instance.name, src_node))
11021

    
11022
    # set the disks ID correctly since call_instance_start needs the
11023
    # correct drbd minor to create the symlinks
11024
    for disk in instance.disks:
11025
      self.cfg.SetDiskID(disk, src_node)
11026

    
11027
    activate_disks = (not instance.admin_up)
11028

    
11029
    if activate_disks:
11030
      # Activate the instance disks if we'exporting a stopped instance
11031
      feedback_fn("Activating disks for %s" % instance.name)
11032
      _StartInstanceDisks(self, instance, None)
11033

    
11034
    try:
11035
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11036
                                                     instance)
11037

    
11038
      helper.CreateSnapshots()
11039
      try:
11040
        if (self.op.shutdown and instance.admin_up and
11041
            not self.op.remove_instance):
11042
          assert not activate_disks
11043
          feedback_fn("Starting instance %s" % instance.name)
11044
          result = self.rpc.call_instance_start(src_node, instance, None, None)
11045
          msg = result.fail_msg
11046
          if msg:
11047
            feedback_fn("Failed to start instance: %s" % msg)
11048
            _ShutdownInstanceDisks(self, instance)
11049
            raise errors.OpExecError("Could not start instance: %s" % msg)
11050

    
11051
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11052
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11053
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11054
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11055
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11056

    
11057
          (key_name, _, _) = self.x509_key_name
11058

    
11059
          dest_ca_pem = \
11060
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11061
                                            self.dest_x509_ca)
11062

    
11063
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11064
                                                     key_name, dest_ca_pem,
11065
                                                     timeouts)
11066
      finally:
11067
        helper.Cleanup()
11068

    
11069
      # Check for backwards compatibility
11070
      assert len(dresults) == len(instance.disks)
11071
      assert compat.all(isinstance(i, bool) for i in dresults), \
11072
             "Not all results are boolean: %r" % dresults
11073

    
11074
    finally:
11075
      if activate_disks:
11076
        feedback_fn("Deactivating disks for %s" % instance.name)
11077
        _ShutdownInstanceDisks(self, instance)
11078

    
11079
    if not (compat.all(dresults) and fin_resu):
11080
      failures = []
11081
      if not fin_resu:
11082
        failures.append("export finalization")
11083
      if not compat.all(dresults):
11084
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11085
                               if not dsk)
11086
        failures.append("disk export: disk(s) %s" % fdsk)
11087

    
11088
      raise errors.OpExecError("Export failed, errors in %s" %
11089
                               utils.CommaJoin(failures))
11090

    
11091
    # At this point, the export was successful, we can cleanup/finish
11092

    
11093
    # Remove instance if requested
11094
    if self.op.remove_instance:
11095
      feedback_fn("Removing instance %s" % instance.name)
11096
      _RemoveInstance(self, feedback_fn, instance,
11097
                      self.op.ignore_remove_failures)
11098

    
11099
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11100
      self._CleanupExports(feedback_fn)
11101

    
11102
    return fin_resu, dresults
11103

    
11104

    
11105
class LUBackupRemove(NoHooksLU):
11106
  """Remove exports related to the named instance.
11107

11108
  """
11109
  REQ_BGL = False
11110

    
11111
  def ExpandNames(self):
11112
    self.needed_locks = {}
11113
    # We need all nodes to be locked in order for RemoveExport to work, but we
11114
    # don't need to lock the instance itself, as nothing will happen to it (and
11115
    # we can remove exports also for a removed instance)
11116
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11117

    
11118
  def Exec(self, feedback_fn):
11119
    """Remove any export.
11120

11121
    """
11122
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11123
    # If the instance was not found we'll try with the name that was passed in.
11124
    # This will only work if it was an FQDN, though.
11125
    fqdn_warn = False
11126
    if not instance_name:
11127
      fqdn_warn = True
11128
      instance_name = self.op.instance_name
11129

    
11130
    locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
11131
    exportlist = self.rpc.call_export_list(locked_nodes)
11132
    found = False
11133
    for node in exportlist:
11134
      msg = exportlist[node].fail_msg
11135
      if msg:
11136
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11137
        continue
11138
      if instance_name in exportlist[node].payload:
11139
        found = True
11140
        result = self.rpc.call_export_remove(node, instance_name)
11141
        msg = result.fail_msg
11142
        if msg:
11143
          logging.error("Could not remove export for instance %s"
11144
                        " on node %s: %s", instance_name, node, msg)
11145

    
11146
    if fqdn_warn and not found:
11147
      feedback_fn("Export not found. If trying to remove an export belonging"
11148
                  " to a deleted instance please use its Fully Qualified"
11149
                  " Domain Name.")
11150

    
11151

    
11152
class LUGroupAdd(LogicalUnit):
11153
  """Logical unit for creating node groups.
11154

11155
  """
11156
  HPATH = "group-add"
11157
  HTYPE = constants.HTYPE_GROUP
11158
  REQ_BGL = False
11159

    
11160
  def ExpandNames(self):
11161
    # We need the new group's UUID here so that we can create and acquire the
11162
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11163
    # that it should not check whether the UUID exists in the configuration.
11164
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11165
    self.needed_locks = {}
11166
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11167

    
11168
  def CheckPrereq(self):
11169
    """Check prerequisites.
11170

11171
    This checks that the given group name is not an existing node group
11172
    already.
11173

11174
    """
11175
    try:
11176
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11177
    except errors.OpPrereqError:
11178
      pass
11179
    else:
11180
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11181
                                 " node group (UUID: %s)" %
11182
                                 (self.op.group_name, existing_uuid),
11183
                                 errors.ECODE_EXISTS)
11184

    
11185
    if self.op.ndparams:
11186
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11187

    
11188
  def BuildHooksEnv(self):
11189
    """Build hooks env.
11190

11191
    """
11192
    return {
11193
      "GROUP_NAME": self.op.group_name,
11194
      }
11195

    
11196
  def BuildHooksNodes(self):
11197
    """Build hooks nodes.
11198

11199
    """
11200
    mn = self.cfg.GetMasterNode()
11201
    return ([mn], [mn])
11202

    
11203
  def Exec(self, feedback_fn):
11204
    """Add the node group to the cluster.
11205

11206
    """
11207
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11208
                                  uuid=self.group_uuid,
11209
                                  alloc_policy=self.op.alloc_policy,
11210
                                  ndparams=self.op.ndparams)
11211

    
11212
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11213
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11214

    
11215

    
11216
class LUGroupAssignNodes(NoHooksLU):
11217
  """Logical unit for assigning nodes to groups.
11218

11219
  """
11220
  REQ_BGL = False
11221

    
11222
  def ExpandNames(self):
11223
    # These raise errors.OpPrereqError on their own:
11224
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11225
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11226

    
11227
    # We want to lock all the affected nodes and groups. We have readily
11228
    # available the list of nodes, and the *destination* group. To gather the
11229
    # list of "source" groups, we need to fetch node information later on.
11230
    self.needed_locks = {
11231
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11232
      locking.LEVEL_NODE: self.op.nodes,
11233
      }
11234

    
11235
  def DeclareLocks(self, level):
11236
    if level == locking.LEVEL_NODEGROUP:
11237
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11238

    
11239
      # Try to get all affected nodes' groups without having the group or node
11240
      # lock yet. Needs verification later in the code flow.
11241
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11242

    
11243
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11244

    
11245
  def CheckPrereq(self):
11246
    """Check prerequisites.
11247

11248
    """
11249
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11250
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11251
            frozenset(self.op.nodes))
11252

    
11253
    expected_locks = (set([self.group_uuid]) |
11254
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11255
    actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11256
    if actual_locks != expected_locks:
11257
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11258
                               " current groups are '%s', used to be '%s'" %
11259
                               (utils.CommaJoin(expected_locks),
11260
                                utils.CommaJoin(actual_locks)))
11261

    
11262
    self.node_data = self.cfg.GetAllNodesInfo()
11263
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11264
    instance_data = self.cfg.GetAllInstancesInfo()
11265

    
11266
    if self.group is None:
11267
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11268
                               (self.op.group_name, self.group_uuid))
11269

    
11270
    (new_splits, previous_splits) = \
11271
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11272
                                             for node in self.op.nodes],
11273
                                            self.node_data, instance_data)
11274

    
11275
    if new_splits:
11276
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11277

    
11278
      if not self.op.force:
11279
        raise errors.OpExecError("The following instances get split by this"
11280
                                 " change and --force was not given: %s" %
11281
                                 fmt_new_splits)
11282
      else:
11283
        self.LogWarning("This operation will split the following instances: %s",
11284
                        fmt_new_splits)
11285

    
11286
        if previous_splits:
11287
          self.LogWarning("In addition, these already-split instances continue"
11288
                          " to be split across groups: %s",
11289
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
11290

    
11291
  def Exec(self, feedback_fn):
11292
    """Assign nodes to a new group.
11293

11294
    """
11295
    for node in self.op.nodes:
11296
      self.node_data[node].group = self.group_uuid
11297

    
11298
    # FIXME: Depends on side-effects of modifying the result of
11299
    # C{cfg.GetAllNodesInfo}
11300

    
11301
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11302

    
11303
  @staticmethod
11304
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11305
    """Check for split instances after a node assignment.
11306

11307
    This method considers a series of node assignments as an atomic operation,
11308
    and returns information about split instances after applying the set of
11309
    changes.
11310

11311
    In particular, it returns information about newly split instances, and
11312
    instances that were already split, and remain so after the change.
11313

11314
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11315
    considered.
11316

11317
    @type changes: list of (node_name, new_group_uuid) pairs.
11318
    @param changes: list of node assignments to consider.
11319
    @param node_data: a dict with data for all nodes
11320
    @param instance_data: a dict with all instances to consider
11321
    @rtype: a two-tuple
11322
    @return: a list of instances that were previously okay and result split as a
11323
      consequence of this change, and a list of instances that were previously
11324
      split and this change does not fix.
11325

11326
    """
11327
    changed_nodes = dict((node, group) for node, group in changes
11328
                         if node_data[node].group != group)
11329

    
11330
    all_split_instances = set()
11331
    previously_split_instances = set()
11332

    
11333
    def InstanceNodes(instance):
11334
      return [instance.primary_node] + list(instance.secondary_nodes)
11335

    
11336
    for inst in instance_data.values():
11337
      if inst.disk_template not in constants.DTS_INT_MIRROR:
11338
        continue
11339

    
11340
      instance_nodes = InstanceNodes(inst)
11341

    
11342
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
11343
        previously_split_instances.add(inst.name)
11344

    
11345
      if len(set(changed_nodes.get(node, node_data[node].group)
11346
                 for node in instance_nodes)) > 1:
11347
        all_split_instances.add(inst.name)
11348

    
11349
    return (list(all_split_instances - previously_split_instances),
11350
            list(previously_split_instances & all_split_instances))
11351

    
11352

    
11353
class _GroupQuery(_QueryBase):
11354
  FIELDS = query.GROUP_FIELDS
11355

    
11356
  def ExpandNames(self, lu):
11357
    lu.needed_locks = {}
11358

    
11359
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11360
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11361

    
11362
    if not self.names:
11363
      self.wanted = [name_to_uuid[name]
11364
                     for name in utils.NiceSort(name_to_uuid.keys())]
11365
    else:
11366
      # Accept names to be either names or UUIDs.
11367
      missing = []
11368
      self.wanted = []
11369
      all_uuid = frozenset(self._all_groups.keys())
11370

    
11371
      for name in self.names:
11372
        if name in all_uuid:
11373
          self.wanted.append(name)
11374
        elif name in name_to_uuid:
11375
          self.wanted.append(name_to_uuid[name])
11376
        else:
11377
          missing.append(name)
11378

    
11379
      if missing:
11380
        raise errors.OpPrereqError("Some groups do not exist: %s" %
11381
                                   utils.CommaJoin(missing),
11382
                                   errors.ECODE_NOENT)
11383

    
11384
  def DeclareLocks(self, lu, level):
11385
    pass
11386

    
11387
  def _GetQueryData(self, lu):
11388
    """Computes the list of node groups and their attributes.
11389

11390
    """
11391
    do_nodes = query.GQ_NODE in self.requested_data
11392
    do_instances = query.GQ_INST in self.requested_data
11393

    
11394
    group_to_nodes = None
11395
    group_to_instances = None
11396

    
11397
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11398
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11399
    # latter GetAllInstancesInfo() is not enough, for we have to go through
11400
    # instance->node. Hence, we will need to process nodes even if we only need
11401
    # instance information.
11402
    if do_nodes or do_instances:
11403
      all_nodes = lu.cfg.GetAllNodesInfo()
11404
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11405
      node_to_group = {}
11406

    
11407
      for node in all_nodes.values():
11408
        if node.group in group_to_nodes:
11409
          group_to_nodes[node.group].append(node.name)
11410
          node_to_group[node.name] = node.group
11411

    
11412
      if do_instances:
11413
        all_instances = lu.cfg.GetAllInstancesInfo()
11414
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
11415

    
11416
        for instance in all_instances.values():
11417
          node = instance.primary_node
11418
          if node in node_to_group:
11419
            group_to_instances[node_to_group[node]].append(instance.name)
11420

    
11421
        if not do_nodes:
11422
          # Do not pass on node information if it was not requested.
11423
          group_to_nodes = None
11424

    
11425
    return query.GroupQueryData([self._all_groups[uuid]
11426
                                 for uuid in self.wanted],
11427
                                group_to_nodes, group_to_instances)
11428

    
11429

    
11430
class LUGroupQuery(NoHooksLU):
11431
  """Logical unit for querying node groups.
11432

11433
  """
11434
  REQ_BGL = False
11435

    
11436
  def CheckArguments(self):
11437
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11438
                          self.op.output_fields, False)
11439

    
11440
  def ExpandNames(self):
11441
    self.gq.ExpandNames(self)
11442

    
11443
  def Exec(self, feedback_fn):
11444
    return self.gq.OldStyleQuery(self)
11445

    
11446

    
11447
class LUGroupSetParams(LogicalUnit):
11448
  """Modifies the parameters of a node group.
11449

11450
  """
11451
  HPATH = "group-modify"
11452
  HTYPE = constants.HTYPE_GROUP
11453
  REQ_BGL = False
11454

    
11455
  def CheckArguments(self):
11456
    all_changes = [
11457
      self.op.ndparams,
11458
      self.op.alloc_policy,
11459
      ]
11460

    
11461
    if all_changes.count(None) == len(all_changes):
11462
      raise errors.OpPrereqError("Please pass at least one modification",
11463
                                 errors.ECODE_INVAL)
11464

    
11465
  def ExpandNames(self):
11466
    # This raises errors.OpPrereqError on its own:
11467
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11468

    
11469
    self.needed_locks = {
11470
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11471
      }
11472

    
11473
  def CheckPrereq(self):
11474
    """Check prerequisites.
11475

11476
    """
11477
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11478

    
11479
    if self.group is None:
11480
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11481
                               (self.op.group_name, self.group_uuid))
11482

    
11483
    if self.op.ndparams:
11484
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11485
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11486
      self.new_ndparams = new_ndparams
11487

    
11488
  def BuildHooksEnv(self):
11489
    """Build hooks env.
11490

11491
    """
11492
    return {
11493
      "GROUP_NAME": self.op.group_name,
11494
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
11495
      }
11496

    
11497
  def BuildHooksNodes(self):
11498
    """Build hooks nodes.
11499

11500
    """
11501
    mn = self.cfg.GetMasterNode()
11502
    return ([mn], [mn])
11503

    
11504
  def Exec(self, feedback_fn):
11505
    """Modifies the node group.
11506

11507
    """
11508
    result = []
11509

    
11510
    if self.op.ndparams:
11511
      self.group.ndparams = self.new_ndparams
11512
      result.append(("ndparams", str(self.group.ndparams)))
11513

    
11514
    if self.op.alloc_policy:
11515
      self.group.alloc_policy = self.op.alloc_policy
11516

    
11517
    self.cfg.Update(self.group, feedback_fn)
11518
    return result
11519

    
11520

    
11521

    
11522
class LUGroupRemove(LogicalUnit):
11523
  HPATH = "group-remove"
11524
  HTYPE = constants.HTYPE_GROUP
11525
  REQ_BGL = False
11526

    
11527
  def ExpandNames(self):
11528
    # This will raises errors.OpPrereqError on its own:
11529
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11530
    self.needed_locks = {
11531
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11532
      }
11533

    
11534
  def CheckPrereq(self):
11535
    """Check prerequisites.
11536

11537
    This checks that the given group name exists as a node group, that is
11538
    empty (i.e., contains no nodes), and that is not the last group of the
11539
    cluster.
11540

11541
    """
11542
    # Verify that the group is empty.
11543
    group_nodes = [node.name
11544
                   for node in self.cfg.GetAllNodesInfo().values()
11545
                   if node.group == self.group_uuid]
11546

    
11547
    if group_nodes:
11548
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
11549
                                 " nodes: %s" %
11550
                                 (self.op.group_name,
11551
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
11552
                                 errors.ECODE_STATE)
11553

    
11554
    # Verify the cluster would not be left group-less.
11555
    if len(self.cfg.GetNodeGroupList()) == 1:
11556
      raise errors.OpPrereqError("Group '%s' is the only group,"
11557
                                 " cannot be removed" %
11558
                                 self.op.group_name,
11559
                                 errors.ECODE_STATE)
11560

    
11561
  def BuildHooksEnv(self):
11562
    """Build hooks env.
11563

11564
    """
11565
    return {
11566
      "GROUP_NAME": self.op.group_name,
11567
      }
11568

    
11569
  def BuildHooksNodes(self):
11570
    """Build hooks nodes.
11571

11572
    """
11573
    mn = self.cfg.GetMasterNode()
11574
    return ([mn], [mn])
11575

    
11576
  def Exec(self, feedback_fn):
11577
    """Remove the node group.
11578

11579
    """
11580
    try:
11581
      self.cfg.RemoveNodeGroup(self.group_uuid)
11582
    except errors.ConfigurationError:
11583
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11584
                               (self.op.group_name, self.group_uuid))
11585

    
11586
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11587

    
11588

    
11589
class LUGroupRename(LogicalUnit):
11590
  HPATH = "group-rename"
11591
  HTYPE = constants.HTYPE_GROUP
11592
  REQ_BGL = False
11593

    
11594
  def ExpandNames(self):
11595
    # This raises errors.OpPrereqError on its own:
11596
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11597

    
11598
    self.needed_locks = {
11599
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11600
      }
11601

    
11602
  def CheckPrereq(self):
11603
    """Check prerequisites.
11604

11605
    Ensures requested new name is not yet used.
11606

11607
    """
11608
    try:
11609
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11610
    except errors.OpPrereqError:
11611
      pass
11612
    else:
11613
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11614
                                 " node group (UUID: %s)" %
11615
                                 (self.op.new_name, new_name_uuid),
11616
                                 errors.ECODE_EXISTS)
11617

    
11618
  def BuildHooksEnv(self):
11619
    """Build hooks env.
11620

11621
    """
11622
    return {
11623
      "OLD_NAME": self.op.group_name,
11624
      "NEW_NAME": self.op.new_name,
11625
      }
11626

    
11627
  def BuildHooksNodes(self):
11628
    """Build hooks nodes.
11629

11630
    """
11631
    mn = self.cfg.GetMasterNode()
11632

    
11633
    all_nodes = self.cfg.GetAllNodesInfo()
11634
    all_nodes.pop(mn, None)
11635

    
11636
    run_nodes = [mn]
11637
    run_nodes.extend(node.name for node in all_nodes.values()
11638
                     if node.group == self.group_uuid)
11639

    
11640
    return (run_nodes, run_nodes)
11641

    
11642
  def Exec(self, feedback_fn):
11643
    """Rename the node group.
11644

11645
    """
11646
    group = self.cfg.GetNodeGroup(self.group_uuid)
11647

    
11648
    if group is None:
11649
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11650
                               (self.op.group_name, self.group_uuid))
11651

    
11652
    group.name = self.op.new_name
11653
    self.cfg.Update(group, feedback_fn)
11654

    
11655
    return self.op.new_name
11656

    
11657

    
11658
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11659
  """Generic tags LU.
11660

11661
  This is an abstract class which is the parent of all the other tags LUs.
11662

11663
  """
11664
  def ExpandNames(self):
11665
    self.group_uuid = None
11666
    self.needed_locks = {}
11667
    if self.op.kind == constants.TAG_NODE:
11668
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11669
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
11670
    elif self.op.kind == constants.TAG_INSTANCE:
11671
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11672
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11673
    elif self.op.kind == constants.TAG_NODEGROUP:
11674
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11675

    
11676
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11677
    # not possible to acquire the BGL based on opcode parameters)
11678

    
11679
  def CheckPrereq(self):
11680
    """Check prerequisites.
11681

11682
    """
11683
    if self.op.kind == constants.TAG_CLUSTER:
11684
      self.target = self.cfg.GetClusterInfo()
11685
    elif self.op.kind == constants.TAG_NODE:
11686
      self.target = self.cfg.GetNodeInfo(self.op.name)
11687
    elif self.op.kind == constants.TAG_INSTANCE:
11688
      self.target = self.cfg.GetInstanceInfo(self.op.name)
11689
    elif self.op.kind == constants.TAG_NODEGROUP:
11690
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
11691
    else:
11692
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11693
                                 str(self.op.kind), errors.ECODE_INVAL)
11694

    
11695

    
11696
class LUTagsGet(TagsLU):
11697
  """Returns the tags of a given object.
11698

11699
  """
11700
  REQ_BGL = False
11701

    
11702
  def ExpandNames(self):
11703
    TagsLU.ExpandNames(self)
11704

    
11705
    # Share locks as this is only a read operation
11706
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11707

    
11708
  def Exec(self, feedback_fn):
11709
    """Returns the tag list.
11710

11711
    """
11712
    return list(self.target.GetTags())
11713

    
11714

    
11715
class LUTagsSearch(NoHooksLU):
11716
  """Searches the tags for a given pattern.
11717

11718
  """
11719
  REQ_BGL = False
11720

    
11721
  def ExpandNames(self):
11722
    self.needed_locks = {}
11723

    
11724
  def CheckPrereq(self):
11725
    """Check prerequisites.
11726

11727
    This checks the pattern passed for validity by compiling it.
11728

11729
    """
11730
    try:
11731
      self.re = re.compile(self.op.pattern)
11732
    except re.error, err:
11733
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11734
                                 (self.op.pattern, err), errors.ECODE_INVAL)
11735

    
11736
  def Exec(self, feedback_fn):
11737
    """Returns the tag list.
11738

11739
    """
11740
    cfg = self.cfg
11741
    tgts = [("/cluster", cfg.GetClusterInfo())]
11742
    ilist = cfg.GetAllInstancesInfo().values()
11743
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11744
    nlist = cfg.GetAllNodesInfo().values()
11745
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11746
    tgts.extend(("/nodegroup/%s" % n.name, n)
11747
                for n in cfg.GetAllNodeGroupsInfo().values())
11748
    results = []
11749
    for path, target in tgts:
11750
      for tag in target.GetTags():
11751
        if self.re.search(tag):
11752
          results.append((path, tag))
11753
    return results
11754

    
11755

    
11756
class LUTagsSet(TagsLU):
11757
  """Sets a tag on a given object.
11758

11759
  """
11760
  REQ_BGL = False
11761

    
11762
  def CheckPrereq(self):
11763
    """Check prerequisites.
11764

11765
    This checks the type and length of the tag name and value.
11766

11767
    """
11768
    TagsLU.CheckPrereq(self)
11769
    for tag in self.op.tags:
11770
      objects.TaggableObject.ValidateTag(tag)
11771

    
11772
  def Exec(self, feedback_fn):
11773
    """Sets the tag.
11774

11775
    """
11776
    try:
11777
      for tag in self.op.tags:
11778
        self.target.AddTag(tag)
11779
    except errors.TagError, err:
11780
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
11781
    self.cfg.Update(self.target, feedback_fn)
11782

    
11783

    
11784
class LUTagsDel(TagsLU):
11785
  """Delete a list of tags from a given object.
11786

11787
  """
11788
  REQ_BGL = False
11789

    
11790
  def CheckPrereq(self):
11791
    """Check prerequisites.
11792

11793
    This checks that we have the given tag.
11794

11795
    """
11796
    TagsLU.CheckPrereq(self)
11797
    for tag in self.op.tags:
11798
      objects.TaggableObject.ValidateTag(tag)
11799
    del_tags = frozenset(self.op.tags)
11800
    cur_tags = self.target.GetTags()
11801

    
11802
    diff_tags = del_tags - cur_tags
11803
    if diff_tags:
11804
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
11805
      raise errors.OpPrereqError("Tag(s) %s not found" %
11806
                                 (utils.CommaJoin(diff_names), ),
11807
                                 errors.ECODE_NOENT)
11808

    
11809
  def Exec(self, feedback_fn):
11810
    """Remove the tag from the object.
11811

11812
    """
11813
    for tag in self.op.tags:
11814
      self.target.RemoveTag(tag)
11815
    self.cfg.Update(self.target, feedback_fn)
11816

    
11817

    
11818
class LUTestDelay(NoHooksLU):
11819
  """Sleep for a specified amount of time.
11820

11821
  This LU sleeps on the master and/or nodes for a specified amount of
11822
  time.
11823

11824
  """
11825
  REQ_BGL = False
11826

    
11827
  def ExpandNames(self):
11828
    """Expand names and set required locks.
11829

11830
    This expands the node list, if any.
11831

11832
    """
11833
    self.needed_locks = {}
11834
    if self.op.on_nodes:
11835
      # _GetWantedNodes can be used here, but is not always appropriate to use
11836
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11837
      # more information.
11838
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11839
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11840

    
11841
  def _TestDelay(self):
11842
    """Do the actual sleep.
11843

11844
    """
11845
    if self.op.on_master:
11846
      if not utils.TestDelay(self.op.duration):
11847
        raise errors.OpExecError("Error during master delay test")
11848
    if self.op.on_nodes:
11849
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11850
      for node, node_result in result.items():
11851
        node_result.Raise("Failure during rpc call to node %s" % node)
11852

    
11853
  def Exec(self, feedback_fn):
11854
    """Execute the test delay opcode, with the wanted repetitions.
11855

11856
    """
11857
    if self.op.repeat == 0:
11858
      self._TestDelay()
11859
    else:
11860
      top_value = self.op.repeat - 1
11861
      for i in range(self.op.repeat):
11862
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11863
        self._TestDelay()
11864

    
11865

    
11866
class LUTestJqueue(NoHooksLU):
11867
  """Utility LU to test some aspects of the job queue.
11868

11869
  """
11870
  REQ_BGL = False
11871

    
11872
  # Must be lower than default timeout for WaitForJobChange to see whether it
11873
  # notices changed jobs
11874
  _CLIENT_CONNECT_TIMEOUT = 20.0
11875
  _CLIENT_CONFIRM_TIMEOUT = 60.0
11876

    
11877
  @classmethod
11878
  def _NotifyUsingSocket(cls, cb, errcls):
11879
    """Opens a Unix socket and waits for another program to connect.
11880

11881
    @type cb: callable
11882
    @param cb: Callback to send socket name to client
11883
    @type errcls: class
11884
    @param errcls: Exception class to use for errors
11885

11886
    """
11887
    # Using a temporary directory as there's no easy way to create temporary
11888
    # sockets without writing a custom loop around tempfile.mktemp and
11889
    # socket.bind
11890
    tmpdir = tempfile.mkdtemp()
11891
    try:
11892
      tmpsock = utils.PathJoin(tmpdir, "sock")
11893

    
11894
      logging.debug("Creating temporary socket at %s", tmpsock)
11895
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11896
      try:
11897
        sock.bind(tmpsock)
11898
        sock.listen(1)
11899

    
11900
        # Send details to client
11901
        cb(tmpsock)
11902

    
11903
        # Wait for client to connect before continuing
11904
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11905
        try:
11906
          (conn, _) = sock.accept()
11907
        except socket.error, err:
11908
          raise errcls("Client didn't connect in time (%s)" % err)
11909
      finally:
11910
        sock.close()
11911
    finally:
11912
      # Remove as soon as client is connected
11913
      shutil.rmtree(tmpdir)
11914

    
11915
    # Wait for client to close
11916
    try:
11917
      try:
11918
        # pylint: disable-msg=E1101
11919
        # Instance of '_socketobject' has no ... member
11920
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11921
        conn.recv(1)
11922
      except socket.error, err:
11923
        raise errcls("Client failed to confirm notification (%s)" % err)
11924
    finally:
11925
      conn.close()
11926

    
11927
  def _SendNotification(self, test, arg, sockname):
11928
    """Sends a notification to the client.
11929

11930
    @type test: string
11931
    @param test: Test name
11932
    @param arg: Test argument (depends on test)
11933
    @type sockname: string
11934
    @param sockname: Socket path
11935

11936
    """
11937
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11938

    
11939
  def _Notify(self, prereq, test, arg):
11940
    """Notifies the client of a test.
11941

11942
    @type prereq: bool
11943
    @param prereq: Whether this is a prereq-phase test
11944
    @type test: string
11945
    @param test: Test name
11946
    @param arg: Test argument (depends on test)
11947

11948
    """
11949
    if prereq:
11950
      errcls = errors.OpPrereqError
11951
    else:
11952
      errcls = errors.OpExecError
11953

    
11954
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11955
                                                  test, arg),
11956
                                   errcls)
11957

    
11958
  def CheckArguments(self):
11959
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11960
    self.expandnames_calls = 0
11961

    
11962
  def ExpandNames(self):
11963
    checkargs_calls = getattr(self, "checkargs_calls", 0)
11964
    if checkargs_calls < 1:
11965
      raise errors.ProgrammerError("CheckArguments was not called")
11966

    
11967
    self.expandnames_calls += 1
11968

    
11969
    if self.op.notify_waitlock:
11970
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
11971

    
11972
    self.LogInfo("Expanding names")
11973

    
11974
    # Get lock on master node (just to get a lock, not for a particular reason)
11975
    self.needed_locks = {
11976
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11977
      }
11978

    
11979
  def Exec(self, feedback_fn):
11980
    if self.expandnames_calls < 1:
11981
      raise errors.ProgrammerError("ExpandNames was not called")
11982

    
11983
    if self.op.notify_exec:
11984
      self._Notify(False, constants.JQT_EXEC, None)
11985

    
11986
    self.LogInfo("Executing")
11987

    
11988
    if self.op.log_messages:
11989
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11990
      for idx, msg in enumerate(self.op.log_messages):
11991
        self.LogInfo("Sending log message %s", idx + 1)
11992
        feedback_fn(constants.JQT_MSGPREFIX + msg)
11993
        # Report how many test messages have been sent
11994
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11995

    
11996
    if self.op.fail:
11997
      raise errors.OpExecError("Opcode failure was requested")
11998

    
11999
    return True
12000

    
12001

    
12002
class IAllocator(object):
12003
  """IAllocator framework.
12004

12005
  An IAllocator instance has three sets of attributes:
12006
    - cfg that is needed to query the cluster
12007
    - input data (all members of the _KEYS class attribute are required)
12008
    - four buffer attributes (in|out_data|text), that represent the
12009
      input (to the external script) in text and data structure format,
12010
      and the output from it, again in two formats
12011
    - the result variables from the script (success, info, nodes) for
12012
      easy usage
12013

12014
  """
12015
  # pylint: disable-msg=R0902
12016
  # lots of instance attributes
12017

    
12018
  def __init__(self, cfg, rpc, mode, **kwargs):
12019
    self.cfg = cfg
12020
    self.rpc = rpc
12021
    # init buffer variables
12022
    self.in_text = self.out_text = self.in_data = self.out_data = None
12023
    # init all input fields so that pylint is happy
12024
    self.mode = mode
12025
    self.memory = self.disks = self.disk_template = None
12026
    self.os = self.tags = self.nics = self.vcpus = None
12027
    self.hypervisor = None
12028
    self.relocate_from = None
12029
    self.name = None
12030
    self.evac_nodes = None
12031
    self.instances = None
12032
    self.reloc_mode = None
12033
    self.target_groups = []
12034
    # computed fields
12035
    self.required_nodes = None
12036
    # init result fields
12037
    self.success = self.info = self.result = None
12038

    
12039
    try:
12040
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12041
    except KeyError:
12042
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12043
                                   " IAllocator" % self.mode)
12044

    
12045
    keyset = [n for (n, _) in keydata]
12046

    
12047
    for key in kwargs:
12048
      if key not in keyset:
12049
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12050
                                     " IAllocator" % key)
12051
      setattr(self, key, kwargs[key])
12052

    
12053
    for key in keyset:
12054
      if key not in kwargs:
12055
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12056
                                     " IAllocator" % key)
12057
    self._BuildInputData(compat.partial(fn, self), keydata)
12058

    
12059
  def _ComputeClusterData(self):
12060
    """Compute the generic allocator input data.
12061

12062
    This is the data that is independent of the actual operation.
12063

12064
    """
12065
    cfg = self.cfg
12066
    cluster_info = cfg.GetClusterInfo()
12067
    # cluster data
12068
    data = {
12069
      "version": constants.IALLOCATOR_VERSION,
12070
      "cluster_name": cfg.GetClusterName(),
12071
      "cluster_tags": list(cluster_info.GetTags()),
12072
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12073
      # we don't have job IDs
12074
      }
12075
    ninfo = cfg.GetAllNodesInfo()
12076
    iinfo = cfg.GetAllInstancesInfo().values()
12077
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12078

    
12079
    # node data
12080
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
12081

    
12082
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12083
      hypervisor_name = self.hypervisor
12084
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12085
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12086
    elif self.mode in (constants.IALLOCATOR_MODE_MEVAC,
12087
                       constants.IALLOCATOR_MODE_MRELOC):
12088
      hypervisor_name = cluster_info.enabled_hypervisors[0]
12089

    
12090
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12091
                                        hypervisor_name)
12092
    node_iinfo = \
12093
      self.rpc.call_all_instances_info(node_list,
12094
                                       cluster_info.enabled_hypervisors)
12095

    
12096
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12097

    
12098
    config_ndata = self._ComputeBasicNodeData(ninfo)
12099
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12100
                                                 i_list, config_ndata)
12101
    assert len(data["nodes"]) == len(ninfo), \
12102
        "Incomplete node data computed"
12103

    
12104
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12105

    
12106
    self.in_data = data
12107

    
12108
  @staticmethod
12109
  def _ComputeNodeGroupData(cfg):
12110
    """Compute node groups data.
12111

12112
    """
12113
    ng = dict((guuid, {
12114
      "name": gdata.name,
12115
      "alloc_policy": gdata.alloc_policy,
12116
      })
12117
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12118

    
12119
    return ng
12120

    
12121
  @staticmethod
12122
  def _ComputeBasicNodeData(node_cfg):
12123
    """Compute global node data.
12124

12125
    @rtype: dict
12126
    @returns: a dict of name: (node dict, node config)
12127

12128
    """
12129
    # fill in static (config-based) values
12130
    node_results = dict((ninfo.name, {
12131
      "tags": list(ninfo.GetTags()),
12132
      "primary_ip": ninfo.primary_ip,
12133
      "secondary_ip": ninfo.secondary_ip,
12134
      "offline": ninfo.offline,
12135
      "drained": ninfo.drained,
12136
      "master_candidate": ninfo.master_candidate,
12137
      "group": ninfo.group,
12138
      "master_capable": ninfo.master_capable,
12139
      "vm_capable": ninfo.vm_capable,
12140
      })
12141
      for ninfo in node_cfg.values())
12142

    
12143
    return node_results
12144

    
12145
  @staticmethod
12146
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12147
                              node_results):
12148
    """Compute global node data.
12149

12150
    @param node_results: the basic node structures as filled from the config
12151

12152
    """
12153
    # make a copy of the current dict
12154
    node_results = dict(node_results)
12155
    for nname, nresult in node_data.items():
12156
      assert nname in node_results, "Missing basic data for node %s" % nname
12157
      ninfo = node_cfg[nname]
12158

    
12159
      if not (ninfo.offline or ninfo.drained):
12160
        nresult.Raise("Can't get data for node %s" % nname)
12161
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12162
                                nname)
12163
        remote_info = nresult.payload
12164

    
12165
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
12166
                     'vg_size', 'vg_free', 'cpu_total']:
12167
          if attr not in remote_info:
12168
            raise errors.OpExecError("Node '%s' didn't return attribute"
12169
                                     " '%s'" % (nname, attr))
12170
          if not isinstance(remote_info[attr], int):
12171
            raise errors.OpExecError("Node '%s' returned invalid value"
12172
                                     " for '%s': %s" %
12173
                                     (nname, attr, remote_info[attr]))
12174
        # compute memory used by primary instances
12175
        i_p_mem = i_p_up_mem = 0
12176
        for iinfo, beinfo in i_list:
12177
          if iinfo.primary_node == nname:
12178
            i_p_mem += beinfo[constants.BE_MEMORY]
12179
            if iinfo.name not in node_iinfo[nname].payload:
12180
              i_used_mem = 0
12181
            else:
12182
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
12183
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12184
            remote_info['memory_free'] -= max(0, i_mem_diff)
12185

    
12186
            if iinfo.admin_up:
12187
              i_p_up_mem += beinfo[constants.BE_MEMORY]
12188

    
12189
        # compute memory used by instances
12190
        pnr_dyn = {
12191
          "total_memory": remote_info['memory_total'],
12192
          "reserved_memory": remote_info['memory_dom0'],
12193
          "free_memory": remote_info['memory_free'],
12194
          "total_disk": remote_info['vg_size'],
12195
          "free_disk": remote_info['vg_free'],
12196
          "total_cpus": remote_info['cpu_total'],
12197
          "i_pri_memory": i_p_mem,
12198
          "i_pri_up_memory": i_p_up_mem,
12199
          }
12200
        pnr_dyn.update(node_results[nname])
12201
        node_results[nname] = pnr_dyn
12202

    
12203
    return node_results
12204

    
12205
  @staticmethod
12206
  def _ComputeInstanceData(cluster_info, i_list):
12207
    """Compute global instance data.
12208

12209
    """
12210
    instance_data = {}
12211
    for iinfo, beinfo in i_list:
12212
      nic_data = []
12213
      for nic in iinfo.nics:
12214
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12215
        nic_dict = {
12216
          "mac": nic.mac,
12217
          "ip": nic.ip,
12218
          "mode": filled_params[constants.NIC_MODE],
12219
          "link": filled_params[constants.NIC_LINK],
12220
          }
12221
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12222
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12223
        nic_data.append(nic_dict)
12224
      pir = {
12225
        "tags": list(iinfo.GetTags()),
12226
        "admin_up": iinfo.admin_up,
12227
        "vcpus": beinfo[constants.BE_VCPUS],
12228
        "memory": beinfo[constants.BE_MEMORY],
12229
        "os": iinfo.os,
12230
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12231
        "nics": nic_data,
12232
        "disks": [{constants.IDISK_SIZE: dsk.size,
12233
                   constants.IDISK_MODE: dsk.mode}
12234
                  for dsk in iinfo.disks],
12235
        "disk_template": iinfo.disk_template,
12236
        "hypervisor": iinfo.hypervisor,
12237
        }
12238
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12239
                                                 pir["disks"])
12240
      instance_data[iinfo.name] = pir
12241

    
12242
    return instance_data
12243

    
12244
  def _AddNewInstance(self):
12245
    """Add new instance data to allocator structure.
12246

12247
    This in combination with _AllocatorGetClusterData will create the
12248
    correct structure needed as input for the allocator.
12249

12250
    The checks for the completeness of the opcode must have already been
12251
    done.
12252

12253
    """
12254
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12255

    
12256
    if self.disk_template in constants.DTS_INT_MIRROR:
12257
      self.required_nodes = 2
12258
    else:
12259
      self.required_nodes = 1
12260

    
12261
    request = {
12262
      "name": self.name,
12263
      "disk_template": self.disk_template,
12264
      "tags": self.tags,
12265
      "os": self.os,
12266
      "vcpus": self.vcpus,
12267
      "memory": self.memory,
12268
      "disks": self.disks,
12269
      "disk_space_total": disk_space,
12270
      "nics": self.nics,
12271
      "required_nodes": self.required_nodes,
12272
      "hypervisor": self.hypervisor,
12273
      }
12274

    
12275
    return request
12276

    
12277
  def _AddRelocateInstance(self):
12278
    """Add relocate instance data to allocator structure.
12279

12280
    This in combination with _IAllocatorGetClusterData will create the
12281
    correct structure needed as input for the allocator.
12282

12283
    The checks for the completeness of the opcode must have already been
12284
    done.
12285

12286
    """
12287
    instance = self.cfg.GetInstanceInfo(self.name)
12288
    if instance is None:
12289
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
12290
                                   " IAllocator" % self.name)
12291

    
12292
    if instance.disk_template not in constants.DTS_MIRRORED:
12293
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12294
                                 errors.ECODE_INVAL)
12295

    
12296
    if instance.disk_template in constants.DTS_INT_MIRROR and \
12297
        len(instance.secondary_nodes) != 1:
12298
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
12299
                                 errors.ECODE_STATE)
12300

    
12301
    self.required_nodes = 1
12302
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12303
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12304

    
12305
    request = {
12306
      "name": self.name,
12307
      "disk_space_total": disk_space,
12308
      "required_nodes": self.required_nodes,
12309
      "relocate_from": self.relocate_from,
12310
      }
12311
    return request
12312

    
12313
  def _AddEvacuateNodes(self):
12314
    """Add evacuate nodes data to allocator structure.
12315

12316
    """
12317
    request = {
12318
      "evac_nodes": self.evac_nodes
12319
      }
12320
    return request
12321

    
12322
  def _AddMultiRelocate(self):
12323
    """Get data for multi-relocate requests.
12324

12325
    """
12326
    return {
12327
      "instances": self.instances,
12328
      "reloc_mode": self.reloc_mode,
12329
      "target_groups": self.target_groups,
12330
      }
12331

    
12332
  def _BuildInputData(self, fn, keydata):
12333
    """Build input data structures.
12334

12335
    """
12336
    self._ComputeClusterData()
12337

    
12338
    request = fn()
12339
    request["type"] = self.mode
12340
    for keyname, keytype in keydata:
12341
      if keyname not in request:
12342
        raise errors.ProgrammerError("Request parameter %s is missing" %
12343
                                     keyname)
12344
      val = request[keyname]
12345
      if not keytype(val):
12346
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
12347
                                     " validation, value %s, expected"
12348
                                     " type %s" % (keyname, val, keytype))
12349
    self.in_data["request"] = request
12350

    
12351
    self.in_text = serializer.Dump(self.in_data)
12352

    
12353
  _STRING_LIST = ht.TListOf(ht.TString)
12354
  _MODE_DATA = {
12355
    constants.IALLOCATOR_MODE_ALLOC:
12356
      (_AddNewInstance,
12357
       [
12358
        ("name", ht.TString),
12359
        ("memory", ht.TInt),
12360
        ("disks", ht.TListOf(ht.TDict)),
12361
        ("disk_template", ht.TString),
12362
        ("os", ht.TString),
12363
        ("tags", _STRING_LIST),
12364
        ("nics", ht.TListOf(ht.TDict)),
12365
        ("vcpus", ht.TInt),
12366
        ("hypervisor", ht.TString),
12367
        ], ht.TList),
12368
    constants.IALLOCATOR_MODE_RELOC:
12369
      (_AddRelocateInstance,
12370
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
12371
       ht.TList),
12372
    constants.IALLOCATOR_MODE_MEVAC:
12373
      (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
12374
       ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
12375
    constants.IALLOCATOR_MODE_MRELOC:
12376
      (_AddMultiRelocate, [
12377
        ("instances", _STRING_LIST),
12378
        ("reloc_mode", ht.TElemOf(constants.IALLOCATOR_MRELOC_MODES)),
12379
        ("target_groups", _STRING_LIST),
12380
        ],
12381
       ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12382
         # pylint: disable-msg=E1101
12383
         # Class '...' has no 'OP_ID' member
12384
         "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12385
                              opcodes.OpInstanceMigrate.OP_ID,
12386
                              opcodes.OpInstanceReplaceDisks.OP_ID])
12387
         })))),
12388
    }
12389

    
12390
  def Run(self, name, validate=True, call_fn=None):
12391
    """Run an instance allocator and return the results.
12392

12393
    """
12394
    if call_fn is None:
12395
      call_fn = self.rpc.call_iallocator_runner
12396

    
12397
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
12398
    result.Raise("Failure while running the iallocator script")
12399

    
12400
    self.out_text = result.payload
12401
    if validate:
12402
      self._ValidateResult()
12403

    
12404
  def _ValidateResult(self):
12405
    """Process the allocator results.
12406

12407
    This will process and if successful save the result in
12408
    self.out_data and the other parameters.
12409

12410
    """
12411
    try:
12412
      rdict = serializer.Load(self.out_text)
12413
    except Exception, err:
12414
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
12415

    
12416
    if not isinstance(rdict, dict):
12417
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
12418

    
12419
    # TODO: remove backwards compatiblity in later versions
12420
    if "nodes" in rdict and "result" not in rdict:
12421
      rdict["result"] = rdict["nodes"]
12422
      del rdict["nodes"]
12423

    
12424
    for key in "success", "info", "result":
12425
      if key not in rdict:
12426
        raise errors.OpExecError("Can't parse iallocator results:"
12427
                                 " missing key '%s'" % key)
12428
      setattr(self, key, rdict[key])
12429

    
12430
    if not self._result_check(self.result):
12431
      raise errors.OpExecError("Iallocator returned invalid result,"
12432
                               " expected %s, got %s" %
12433
                               (self._result_check, self.result),
12434
                               errors.ECODE_INVAL)
12435

    
12436
    if self.mode in (constants.IALLOCATOR_MODE_RELOC,
12437
                     constants.IALLOCATOR_MODE_MEVAC):
12438
      node2group = dict((name, ndata["group"])
12439
                        for (name, ndata) in self.in_data["nodes"].items())
12440

    
12441
      fn = compat.partial(self._NodesToGroups, node2group,
12442
                          self.in_data["nodegroups"])
12443

    
12444
      if self.mode == constants.IALLOCATOR_MODE_RELOC:
12445
        assert self.relocate_from is not None
12446
        assert self.required_nodes == 1
12447

    
12448
        request_groups = fn(self.relocate_from)
12449
        result_groups = fn(rdict["result"])
12450

    
12451
        if result_groups != request_groups:
12452
          raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12453
                                   " differ from original groups (%s)" %
12454
                                   (utils.CommaJoin(result_groups),
12455
                                    utils.CommaJoin(request_groups)))
12456
      elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
12457
        request_groups = fn(self.evac_nodes)
12458
        for (instance_name, secnode) in self.result:
12459
          result_groups = fn([secnode])
12460
          if result_groups != request_groups:
12461
            raise errors.OpExecError("Iallocator returned new secondary node"
12462
                                     " '%s' (group '%s') for instance '%s'"
12463
                                     " which is not in original group '%s'" %
12464
                                     (secnode, utils.CommaJoin(result_groups),
12465
                                      instance_name,
12466
                                      utils.CommaJoin(request_groups)))
12467
      else:
12468
        raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
12469

    
12470
    self.out_data = rdict
12471

    
12472
  @staticmethod
12473
  def _NodesToGroups(node2group, groups, nodes):
12474
    """Returns a list of unique group names for a list of nodes.
12475

12476
    @type node2group: dict
12477
    @param node2group: Map from node name to group UUID
12478
    @type groups: dict
12479
    @param groups: Group information
12480
    @type nodes: list
12481
    @param nodes: Node names
12482

12483
    """
12484
    result = set()
12485

    
12486
    for node in nodes:
12487
      try:
12488
        group_uuid = node2group[node]
12489
      except KeyError:
12490
        # Ignore unknown node
12491
        pass
12492
      else:
12493
        try:
12494
          group = groups[group_uuid]
12495
        except KeyError:
12496
          # Can't find group, let's use UUID
12497
          group_name = group_uuid
12498
        else:
12499
          group_name = group["name"]
12500

    
12501
        result.add(group_name)
12502

    
12503
    return sorted(result)
12504

    
12505

    
12506
class LUTestAllocator(NoHooksLU):
12507
  """Run allocator tests.
12508

12509
  This LU runs the allocator tests
12510

12511
  """
12512
  def CheckPrereq(self):
12513
    """Check prerequisites.
12514

12515
    This checks the opcode parameters depending on the director and mode test.
12516

12517
    """
12518
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12519
      for attr in ["memory", "disks", "disk_template",
12520
                   "os", "tags", "nics", "vcpus"]:
12521
        if not hasattr(self.op, attr):
12522
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
12523
                                     attr, errors.ECODE_INVAL)
12524
      iname = self.cfg.ExpandInstanceName(self.op.name)
12525
      if iname is not None:
12526
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
12527
                                   iname, errors.ECODE_EXISTS)
12528
      if not isinstance(self.op.nics, list):
12529
        raise errors.OpPrereqError("Invalid parameter 'nics'",
12530
                                   errors.ECODE_INVAL)
12531
      if not isinstance(self.op.disks, list):
12532
        raise errors.OpPrereqError("Invalid parameter 'disks'",
12533
                                   errors.ECODE_INVAL)
12534
      for row in self.op.disks:
12535
        if (not isinstance(row, dict) or
12536
            constants.IDISK_SIZE not in row or
12537
            not isinstance(row[constants.IDISK_SIZE], int) or
12538
            constants.IDISK_MODE not in row or
12539
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
12540
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
12541
                                     " parameter", errors.ECODE_INVAL)
12542
      if self.op.hypervisor is None:
12543
        self.op.hypervisor = self.cfg.GetHypervisorType()
12544
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12545
      fname = _ExpandInstanceName(self.cfg, self.op.name)
12546
      self.op.name = fname
12547
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12548
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12549
      if not hasattr(self.op, "evac_nodes"):
12550
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12551
                                   " opcode input", errors.ECODE_INVAL)
12552
    elif self.op.mode == constants.IALLOCATOR_MODE_MRELOC:
12553
      if self.op.instances:
12554
        self.op.instances = _GetWantedInstances(self, self.op.instances)
12555
      else:
12556
        raise errors.OpPrereqError("Missing instances to relocate",
12557
                                   errors.ECODE_INVAL)
12558
    else:
12559
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12560
                                 self.op.mode, errors.ECODE_INVAL)
12561

    
12562
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12563
      if self.op.allocator is None:
12564
        raise errors.OpPrereqError("Missing allocator name",
12565
                                   errors.ECODE_INVAL)
12566
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12567
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
12568
                                 self.op.direction, errors.ECODE_INVAL)
12569

    
12570
  def Exec(self, feedback_fn):
12571
    """Run the allocator test.
12572

12573
    """
12574
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12575
      ial = IAllocator(self.cfg, self.rpc,
12576
                       mode=self.op.mode,
12577
                       name=self.op.name,
12578
                       memory=self.op.memory,
12579
                       disks=self.op.disks,
12580
                       disk_template=self.op.disk_template,
12581
                       os=self.op.os,
12582
                       tags=self.op.tags,
12583
                       nics=self.op.nics,
12584
                       vcpus=self.op.vcpus,
12585
                       hypervisor=self.op.hypervisor,
12586
                       )
12587
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12588
      ial = IAllocator(self.cfg, self.rpc,
12589
                       mode=self.op.mode,
12590
                       name=self.op.name,
12591
                       relocate_from=list(self.relocate_from),
12592
                       )
12593
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12594
      ial = IAllocator(self.cfg, self.rpc,
12595
                       mode=self.op.mode,
12596
                       evac_nodes=self.op.evac_nodes)
12597
    elif self.op.mode == constants.IALLOCATOR_MODE_MRELOC:
12598
      ial = IAllocator(self.cfg, self.rpc,
12599
                       mode=self.op.mode,
12600
                       instances=self.op.instances,
12601
                       reloc_mode=self.op.reloc_mode,
12602
                       target_groups=self.op.target_groups)
12603
    else:
12604
      raise errors.ProgrammerError("Uncatched mode %s in"
12605
                                   " LUTestAllocator.Exec", self.op.mode)
12606

    
12607
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
12608
      result = ial.in_text
12609
    else:
12610
      ial.Run(self.op.allocator, validate=False)
12611
      result = ial.out_text
12612
    return result
12613

    
12614

    
12615
#: Query type implementations
12616
_QUERY_IMPL = {
12617
  constants.QR_INSTANCE: _InstanceQuery,
12618
  constants.QR_NODE: _NodeQuery,
12619
  constants.QR_GROUP: _GroupQuery,
12620
  constants.QR_OS: _OsQuery,
12621
  }
12622

    
12623
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12624

    
12625

    
12626
def _GetQueryImplementation(name):
12627
  """Returns the implemtnation for a query type.
12628

12629
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
12630

12631
  """
12632
  try:
12633
    return _QUERY_IMPL[name]
12634
  except KeyError:
12635
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12636
                               errors.ECODE_INVAL)