Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ ae1a845c

History | View | Annotate | Download (464.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable-msg=W0611
64

    
65

    
66
def _SupportsOob(cfg, node):
67
  """Tells if node supports OOB.
68

69
  @type cfg: L{config.ConfigWriter}
70
  @param cfg: The cluster configuration
71
  @type node: L{objects.Node}
72
  @param node: The node
73
  @return: The OOB script if supported or an empty string otherwise
74

75
  """
76
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
77

    
78

    
79
class ResultWithJobs:
80
  """Data container for LU results with jobs.
81

82
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
83
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
84
  contained in the C{jobs} attribute and include the job IDs in the opcode
85
  result.
86

87
  """
88
  def __init__(self, jobs, **kwargs):
89
    """Initializes this class.
90

91
    Additional return values can be specified as keyword arguments.
92

93
    @type jobs: list of lists of L{opcode.OpCode}
94
    @param jobs: A list of lists of opcode objects
95

96
    """
97
    self.jobs = jobs
98
    self.other = kwargs
99

    
100

    
101
class LogicalUnit(object):
102
  """Logical Unit base class.
103

104
  Subclasses must follow these rules:
105
    - implement ExpandNames
106
    - implement CheckPrereq (except when tasklets are used)
107
    - implement Exec (except when tasklets are used)
108
    - implement BuildHooksEnv
109
    - implement BuildHooksNodes
110
    - redefine HPATH and HTYPE
111
    - optionally redefine their run requirements:
112
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113

114
  Note that all commands require root permissions.
115

116
  @ivar dry_run_result: the value (if any) that will be returned to the caller
117
      in dry-run mode (signalled by opcode dry_run parameter)
118

119
  """
120
  HPATH = None
121
  HTYPE = None
122
  REQ_BGL = True
123

    
124
  def __init__(self, processor, op, context, rpc):
125
    """Constructor for LogicalUnit.
126

127
    This needs to be overridden in derived classes in order to check op
128
    validity.
129

130
    """
131
    self.proc = processor
132
    self.op = op
133
    self.cfg = context.cfg
134
    self.glm = context.glm
135
    self.context = context
136
    self.rpc = rpc
137
    # Dicts used to declare locking needs to mcpu
138
    self.needed_locks = None
139
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
140
    self.add_locks = {}
141
    self.remove_locks = {}
142
    # Used to force good behavior when calling helper functions
143
    self.recalculate_locks = {}
144
    # logging
145
    self.Log = processor.Log # pylint: disable-msg=C0103
146
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
147
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
148
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
149
    # support for dry-run
150
    self.dry_run_result = None
151
    # support for generic debug attribute
152
    if (not hasattr(self.op, "debug_level") or
153
        not isinstance(self.op.debug_level, int)):
154
      self.op.debug_level = 0
155

    
156
    # Tasklets
157
    self.tasklets = None
158

    
159
    # Validate opcode parameters and set defaults
160
    self.op.Validate(True)
161

    
162
    self.CheckArguments()
163

    
164
  def CheckArguments(self):
165
    """Check syntactic validity for the opcode arguments.
166

167
    This method is for doing a simple syntactic check and ensure
168
    validity of opcode parameters, without any cluster-related
169
    checks. While the same can be accomplished in ExpandNames and/or
170
    CheckPrereq, doing these separate is better because:
171

172
      - ExpandNames is left as as purely a lock-related function
173
      - CheckPrereq is run after we have acquired locks (and possible
174
        waited for them)
175

176
    The function is allowed to change the self.op attribute so that
177
    later methods can no longer worry about missing parameters.
178

179
    """
180
    pass
181

    
182
  def ExpandNames(self):
183
    """Expand names for this LU.
184

185
    This method is called before starting to execute the opcode, and it should
186
    update all the parameters of the opcode to their canonical form (e.g. a
187
    short node name must be fully expanded after this method has successfully
188
    completed). This way locking, hooks, logging, etc. can work correctly.
189

190
    LUs which implement this method must also populate the self.needed_locks
191
    member, as a dict with lock levels as keys, and a list of needed lock names
192
    as values. Rules:
193

194
      - use an empty dict if you don't need any lock
195
      - if you don't need any lock at a particular level omit that level
196
      - don't put anything for the BGL level
197
      - if you want all locks at a level use locking.ALL_SET as a value
198

199
    If you need to share locks (rather than acquire them exclusively) at one
200
    level you can modify self.share_locks, setting a true value (usually 1) for
201
    that level. By default locks are not shared.
202

203
    This function can also define a list of tasklets, which then will be
204
    executed in order instead of the usual LU-level CheckPrereq and Exec
205
    functions, if those are not defined by the LU.
206

207
    Examples::
208

209
      # Acquire all nodes and one instance
210
      self.needed_locks = {
211
        locking.LEVEL_NODE: locking.ALL_SET,
212
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
213
      }
214
      # Acquire just two nodes
215
      self.needed_locks = {
216
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
217
      }
218
      # Acquire no locks
219
      self.needed_locks = {} # No, you can't leave it to the default value None
220

221
    """
222
    # The implementation of this method is mandatory only if the new LU is
223
    # concurrent, so that old LUs don't need to be changed all at the same
224
    # time.
225
    if self.REQ_BGL:
226
      self.needed_locks = {} # Exclusive LUs don't need locks.
227
    else:
228
      raise NotImplementedError
229

    
230
  def DeclareLocks(self, level):
231
    """Declare LU locking needs for a level
232

233
    While most LUs can just declare their locking needs at ExpandNames time,
234
    sometimes there's the need to calculate some locks after having acquired
235
    the ones before. This function is called just before acquiring locks at a
236
    particular level, but after acquiring the ones at lower levels, and permits
237
    such calculations. It can be used to modify self.needed_locks, and by
238
    default it does nothing.
239

240
    This function is only called if you have something already set in
241
    self.needed_locks for the level.
242

243
    @param level: Locking level which is going to be locked
244
    @type level: member of ganeti.locking.LEVELS
245

246
    """
247

    
248
  def CheckPrereq(self):
249
    """Check prerequisites for this LU.
250

251
    This method should check that the prerequisites for the execution
252
    of this LU are fulfilled. It can do internode communication, but
253
    it should be idempotent - no cluster or system changes are
254
    allowed.
255

256
    The method should raise errors.OpPrereqError in case something is
257
    not fulfilled. Its return value is ignored.
258

259
    This method should also update all the parameters of the opcode to
260
    their canonical form if it hasn't been done by ExpandNames before.
261

262
    """
263
    if self.tasklets is not None:
264
      for (idx, tl) in enumerate(self.tasklets):
265
        logging.debug("Checking prerequisites for tasklet %s/%s",
266
                      idx + 1, len(self.tasklets))
267
        tl.CheckPrereq()
268
    else:
269
      pass
270

    
271
  def Exec(self, feedback_fn):
272
    """Execute the LU.
273

274
    This method should implement the actual work. It should raise
275
    errors.OpExecError for failures that are somewhat dealt with in
276
    code, or expected.
277

278
    """
279
    if self.tasklets is not None:
280
      for (idx, tl) in enumerate(self.tasklets):
281
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
282
        tl.Exec(feedback_fn)
283
    else:
284
      raise NotImplementedError
285

    
286
  def BuildHooksEnv(self):
287
    """Build hooks environment for this LU.
288

289
    @rtype: dict
290
    @return: Dictionary containing the environment that will be used for
291
      running the hooks for this LU. The keys of the dict must not be prefixed
292
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
293
      will extend the environment with additional variables. If no environment
294
      should be defined, an empty dictionary should be returned (not C{None}).
295
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
296
      will not be called.
297

298
    """
299
    raise NotImplementedError
300

    
301
  def BuildHooksNodes(self):
302
    """Build list of nodes to run LU's hooks.
303

304
    @rtype: tuple; (list, list)
305
    @return: Tuple containing a list of node names on which the hook
306
      should run before the execution and a list of node names on which the
307
      hook should run after the execution. No nodes should be returned as an
308
      empty list (and not None).
309
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
310
      will not be called.
311

312
    """
313
    raise NotImplementedError
314

    
315
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
316
    """Notify the LU about the results of its hooks.
317

318
    This method is called every time a hooks phase is executed, and notifies
319
    the Logical Unit about the hooks' result. The LU can then use it to alter
320
    its result based on the hooks.  By default the method does nothing and the
321
    previous result is passed back unchanged but any LU can define it if it
322
    wants to use the local cluster hook-scripts somehow.
323

324
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
325
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
326
    @param hook_results: the results of the multi-node hooks rpc call
327
    @param feedback_fn: function used send feedback back to the caller
328
    @param lu_result: the previous Exec result this LU had, or None
329
        in the PRE phase
330
    @return: the new Exec result, based on the previous result
331
        and hook results
332

333
    """
334
    # API must be kept, thus we ignore the unused argument and could
335
    # be a function warnings
336
    # pylint: disable-msg=W0613,R0201
337
    return lu_result
338

    
339
  def _ExpandAndLockInstance(self):
340
    """Helper function to expand and lock an instance.
341

342
    Many LUs that work on an instance take its name in self.op.instance_name
343
    and need to expand it and then declare the expanded name for locking. This
344
    function does it, and then updates self.op.instance_name to the expanded
345
    name. It also initializes needed_locks as a dict, if this hasn't been done
346
    before.
347

348
    """
349
    if self.needed_locks is None:
350
      self.needed_locks = {}
351
    else:
352
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
353
        "_ExpandAndLockInstance called with instance-level locks set"
354
    self.op.instance_name = _ExpandInstanceName(self.cfg,
355
                                                self.op.instance_name)
356
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
357

    
358
  def _LockInstancesNodes(self, primary_only=False):
359
    """Helper function to declare instances' nodes for locking.
360

361
    This function should be called after locking one or more instances to lock
362
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
363
    with all primary or secondary nodes for instances already locked and
364
    present in self.needed_locks[locking.LEVEL_INSTANCE].
365

366
    It should be called from DeclareLocks, and for safety only works if
367
    self.recalculate_locks[locking.LEVEL_NODE] is set.
368

369
    In the future it may grow parameters to just lock some instance's nodes, or
370
    to just lock primaries or secondary nodes, if needed.
371

372
    If should be called in DeclareLocks in a way similar to::
373

374
      if level == locking.LEVEL_NODE:
375
        self._LockInstancesNodes()
376

377
    @type primary_only: boolean
378
    @param primary_only: only lock primary nodes of locked instances
379

380
    """
381
    assert locking.LEVEL_NODE in self.recalculate_locks, \
382
      "_LockInstancesNodes helper function called with no nodes to recalculate"
383

    
384
    # TODO: check if we're really been called with the instance locks held
385

    
386
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
387
    # future we might want to have different behaviors depending on the value
388
    # of self.recalculate_locks[locking.LEVEL_NODE]
389
    wanted_nodes = []
390
    for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
391
      instance = self.context.cfg.GetInstanceInfo(instance_name)
392
      wanted_nodes.append(instance.primary_node)
393
      if not primary_only:
394
        wanted_nodes.extend(instance.secondary_nodes)
395

    
396
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
397
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
398
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
399
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
400

    
401
    del self.recalculate_locks[locking.LEVEL_NODE]
402

    
403

    
404
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
405
  """Simple LU which runs no hooks.
406

407
  This LU is intended as a parent for other LogicalUnits which will
408
  run no hooks, in order to reduce duplicate code.
409

410
  """
411
  HPATH = None
412
  HTYPE = None
413

    
414
  def BuildHooksEnv(self):
415
    """Empty BuildHooksEnv for NoHooksLu.
416

417
    This just raises an error.
418

419
    """
420
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
421

    
422
  def BuildHooksNodes(self):
423
    """Empty BuildHooksNodes for NoHooksLU.
424

425
    """
426
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
427

    
428

    
429
class Tasklet:
430
  """Tasklet base class.
431

432
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
433
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
434
  tasklets know nothing about locks.
435

436
  Subclasses must follow these rules:
437
    - Implement CheckPrereq
438
    - Implement Exec
439

440
  """
441
  def __init__(self, lu):
442
    self.lu = lu
443

    
444
    # Shortcuts
445
    self.cfg = lu.cfg
446
    self.rpc = lu.rpc
447

    
448
  def CheckPrereq(self):
449
    """Check prerequisites for this tasklets.
450

451
    This method should check whether the prerequisites for the execution of
452
    this tasklet are fulfilled. It can do internode communication, but it
453
    should be idempotent - no cluster or system changes are allowed.
454

455
    The method should raise errors.OpPrereqError in case something is not
456
    fulfilled. Its return value is ignored.
457

458
    This method should also update all parameters to their canonical form if it
459
    hasn't been done before.
460

461
    """
462
    pass
463

    
464
  def Exec(self, feedback_fn):
465
    """Execute the tasklet.
466

467
    This method should implement the actual work. It should raise
468
    errors.OpExecError for failures that are somewhat dealt with in code, or
469
    expected.
470

471
    """
472
    raise NotImplementedError
473

    
474

    
475
class _QueryBase:
476
  """Base for query utility classes.
477

478
  """
479
  #: Attribute holding field definitions
480
  FIELDS = None
481

    
482
  def __init__(self, filter_, fields, use_locking):
483
    """Initializes this class.
484

485
    """
486
    self.use_locking = use_locking
487

    
488
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
489
                             namefield="name")
490
    self.requested_data = self.query.RequestedData()
491
    self.names = self.query.RequestedNames()
492

    
493
    # Sort only if no names were requested
494
    self.sort_by_name = not self.names
495

    
496
    self.do_locking = None
497
    self.wanted = None
498

    
499
  def _GetNames(self, lu, all_names, lock_level):
500
    """Helper function to determine names asked for in the query.
501

502
    """
503
    if self.do_locking:
504
      names = lu.glm.list_owned(lock_level)
505
    else:
506
      names = all_names
507

    
508
    if self.wanted == locking.ALL_SET:
509
      assert not self.names
510
      # caller didn't specify names, so ordering is not important
511
      return utils.NiceSort(names)
512

    
513
    # caller specified names and we must keep the same order
514
    assert self.names
515
    assert not self.do_locking or lu.glm.is_owned(lock_level)
516

    
517
    missing = set(self.wanted).difference(names)
518
    if missing:
519
      raise errors.OpExecError("Some items were removed before retrieving"
520
                               " their data: %s" % missing)
521

    
522
    # Return expanded names
523
    return self.wanted
524

    
525
  def ExpandNames(self, lu):
526
    """Expand names for this query.
527

528
    See L{LogicalUnit.ExpandNames}.
529

530
    """
531
    raise NotImplementedError()
532

    
533
  def DeclareLocks(self, lu, level):
534
    """Declare locks for this query.
535

536
    See L{LogicalUnit.DeclareLocks}.
537

538
    """
539
    raise NotImplementedError()
540

    
541
  def _GetQueryData(self, lu):
542
    """Collects all data for this query.
543

544
    @return: Query data object
545

546
    """
547
    raise NotImplementedError()
548

    
549
  def NewStyleQuery(self, lu):
550
    """Collect data and execute query.
551

552
    """
553
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
554
                                  sort_by_name=self.sort_by_name)
555

    
556
  def OldStyleQuery(self, lu):
557
    """Collect data and execute query.
558

559
    """
560
    return self.query.OldStyleQuery(self._GetQueryData(lu),
561
                                    sort_by_name=self.sort_by_name)
562

    
563

    
564
def _GetWantedNodes(lu, nodes):
565
  """Returns list of checked and expanded node names.
566

567
  @type lu: L{LogicalUnit}
568
  @param lu: the logical unit on whose behalf we execute
569
  @type nodes: list
570
  @param nodes: list of node names or None for all nodes
571
  @rtype: list
572
  @return: the list of nodes, sorted
573
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
574

575
  """
576
  if nodes:
577
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
578

    
579
  return utils.NiceSort(lu.cfg.GetNodeList())
580

    
581

    
582
def _GetWantedInstances(lu, instances):
583
  """Returns list of checked and expanded instance names.
584

585
  @type lu: L{LogicalUnit}
586
  @param lu: the logical unit on whose behalf we execute
587
  @type instances: list
588
  @param instances: list of instance names or None for all instances
589
  @rtype: list
590
  @return: the list of instances, sorted
591
  @raise errors.OpPrereqError: if the instances parameter is wrong type
592
  @raise errors.OpPrereqError: if any of the passed instances is not found
593

594
  """
595
  if instances:
596
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
597
  else:
598
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
599
  return wanted
600

    
601

    
602
def _GetUpdatedParams(old_params, update_dict,
603
                      use_default=True, use_none=False):
604
  """Return the new version of a parameter dictionary.
605

606
  @type old_params: dict
607
  @param old_params: old parameters
608
  @type update_dict: dict
609
  @param update_dict: dict containing new parameter values, or
610
      constants.VALUE_DEFAULT to reset the parameter to its default
611
      value
612
  @param use_default: boolean
613
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
614
      values as 'to be deleted' values
615
  @param use_none: boolean
616
  @type use_none: whether to recognise C{None} values as 'to be
617
      deleted' values
618
  @rtype: dict
619
  @return: the new parameter dictionary
620

621
  """
622
  params_copy = copy.deepcopy(old_params)
623
  for key, val in update_dict.iteritems():
624
    if ((use_default and val == constants.VALUE_DEFAULT) or
625
        (use_none and val is None)):
626
      try:
627
        del params_copy[key]
628
      except KeyError:
629
        pass
630
    else:
631
      params_copy[key] = val
632
  return params_copy
633

    
634

    
635
def _ReleaseLocks(lu, level, names=None, keep=None):
636
  """Releases locks owned by an LU.
637

638
  @type lu: L{LogicalUnit}
639
  @param level: Lock level
640
  @type names: list or None
641
  @param names: Names of locks to release
642
  @type keep: list or None
643
  @param keep: Names of locks to retain
644

645
  """
646
  assert not (keep is not None and names is not None), \
647
         "Only one of the 'names' and the 'keep' parameters can be given"
648

    
649
  if names is not None:
650
    should_release = names.__contains__
651
  elif keep:
652
    should_release = lambda name: name not in keep
653
  else:
654
    should_release = None
655

    
656
  if should_release:
657
    retain = []
658
    release = []
659

    
660
    # Determine which locks to release
661
    for name in lu.glm.list_owned(level):
662
      if should_release(name):
663
        release.append(name)
664
      else:
665
        retain.append(name)
666

    
667
    assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
668

    
669
    # Release just some locks
670
    lu.glm.release(level, names=release)
671

    
672
    assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
673
  else:
674
    # Release everything
675
    lu.glm.release(level)
676

    
677
    assert not lu.glm.is_owned(level), "No locks should be owned"
678

    
679

    
680
def _MapInstanceDisksToNodes(instances):
681
  """Creates a map from (node, volume) to instance name.
682

683
  @type instances: list of L{objects.Instance}
684
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
685

686
  """
687
  return dict(((node, vol), inst.name)
688
              for inst in instances
689
              for (node, vols) in inst.MapLVsByNode().items()
690
              for vol in vols)
691

    
692

    
693
def _RunPostHook(lu, node_name):
694
  """Runs the post-hook for an opcode on a single node.
695

696
  """
697
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
698
  try:
699
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
700
  except:
701
    # pylint: disable-msg=W0702
702
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
703

    
704

    
705
def _CheckOutputFields(static, dynamic, selected):
706
  """Checks whether all selected fields are valid.
707

708
  @type static: L{utils.FieldSet}
709
  @param static: static fields set
710
  @type dynamic: L{utils.FieldSet}
711
  @param dynamic: dynamic fields set
712

713
  """
714
  f = utils.FieldSet()
715
  f.Extend(static)
716
  f.Extend(dynamic)
717

    
718
  delta = f.NonMatching(selected)
719
  if delta:
720
    raise errors.OpPrereqError("Unknown output fields selected: %s"
721
                               % ",".join(delta), errors.ECODE_INVAL)
722

    
723

    
724
def _CheckGlobalHvParams(params):
725
  """Validates that given hypervisor params are not global ones.
726

727
  This will ensure that instances don't get customised versions of
728
  global params.
729

730
  """
731
  used_globals = constants.HVC_GLOBALS.intersection(params)
732
  if used_globals:
733
    msg = ("The following hypervisor parameters are global and cannot"
734
           " be customized at instance level, please modify them at"
735
           " cluster level: %s" % utils.CommaJoin(used_globals))
736
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
737

    
738

    
739
def _CheckNodeOnline(lu, node, msg=None):
740
  """Ensure that a given node is online.
741

742
  @param lu: the LU on behalf of which we make the check
743
  @param node: the node to check
744
  @param msg: if passed, should be a message to replace the default one
745
  @raise errors.OpPrereqError: if the node is offline
746

747
  """
748
  if msg is None:
749
    msg = "Can't use offline node"
750
  if lu.cfg.GetNodeInfo(node).offline:
751
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
752

    
753

    
754
def _CheckNodeNotDrained(lu, node):
755
  """Ensure that a given node is not drained.
756

757
  @param lu: the LU on behalf of which we make the check
758
  @param node: the node to check
759
  @raise errors.OpPrereqError: if the node is drained
760

761
  """
762
  if lu.cfg.GetNodeInfo(node).drained:
763
    raise errors.OpPrereqError("Can't use drained node %s" % node,
764
                               errors.ECODE_STATE)
765

    
766

    
767
def _CheckNodeVmCapable(lu, node):
768
  """Ensure that a given node is vm capable.
769

770
  @param lu: the LU on behalf of which we make the check
771
  @param node: the node to check
772
  @raise errors.OpPrereqError: if the node is not vm capable
773

774
  """
775
  if not lu.cfg.GetNodeInfo(node).vm_capable:
776
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
777
                               errors.ECODE_STATE)
778

    
779

    
780
def _CheckNodeHasOS(lu, node, os_name, force_variant):
781
  """Ensure that a node supports a given OS.
782

783
  @param lu: the LU on behalf of which we make the check
784
  @param node: the node to check
785
  @param os_name: the OS to query about
786
  @param force_variant: whether to ignore variant errors
787
  @raise errors.OpPrereqError: if the node is not supporting the OS
788

789
  """
790
  result = lu.rpc.call_os_get(node, os_name)
791
  result.Raise("OS '%s' not in supported OS list for node %s" %
792
               (os_name, node),
793
               prereq=True, ecode=errors.ECODE_INVAL)
794
  if not force_variant:
795
    _CheckOSVariant(result.payload, os_name)
796

    
797

    
798
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
799
  """Ensure that a node has the given secondary ip.
800

801
  @type lu: L{LogicalUnit}
802
  @param lu: the LU on behalf of which we make the check
803
  @type node: string
804
  @param node: the node to check
805
  @type secondary_ip: string
806
  @param secondary_ip: the ip to check
807
  @type prereq: boolean
808
  @param prereq: whether to throw a prerequisite or an execute error
809
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
810
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
811

812
  """
813
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
814
  result.Raise("Failure checking secondary ip on node %s" % node,
815
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
816
  if not result.payload:
817
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
818
           " please fix and re-run this command" % secondary_ip)
819
    if prereq:
820
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
821
    else:
822
      raise errors.OpExecError(msg)
823

    
824

    
825
def _GetClusterDomainSecret():
826
  """Reads the cluster domain secret.
827

828
  """
829
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
830
                               strict=True)
831

    
832

    
833
def _CheckInstanceDown(lu, instance, reason):
834
  """Ensure that an instance is not running."""
835
  if instance.admin_up:
836
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
837
                               (instance.name, reason), errors.ECODE_STATE)
838

    
839
  pnode = instance.primary_node
840
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
841
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
842
              prereq=True, ecode=errors.ECODE_ENVIRON)
843

    
844
  if instance.name in ins_l.payload:
845
    raise errors.OpPrereqError("Instance %s is running, %s" %
846
                               (instance.name, reason), errors.ECODE_STATE)
847

    
848

    
849
def _ExpandItemName(fn, name, kind):
850
  """Expand an item name.
851

852
  @param fn: the function to use for expansion
853
  @param name: requested item name
854
  @param kind: text description ('Node' or 'Instance')
855
  @return: the resolved (full) name
856
  @raise errors.OpPrereqError: if the item is not found
857

858
  """
859
  full_name = fn(name)
860
  if full_name is None:
861
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
862
                               errors.ECODE_NOENT)
863
  return full_name
864

    
865

    
866
def _ExpandNodeName(cfg, name):
867
  """Wrapper over L{_ExpandItemName} for nodes."""
868
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
869

    
870

    
871
def _ExpandInstanceName(cfg, name):
872
  """Wrapper over L{_ExpandItemName} for instance."""
873
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
874

    
875

    
876
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
877
                          memory, vcpus, nics, disk_template, disks,
878
                          bep, hvp, hypervisor_name, tags):
879
  """Builds instance related env variables for hooks
880

881
  This builds the hook environment from individual variables.
882

883
  @type name: string
884
  @param name: the name of the instance
885
  @type primary_node: string
886
  @param primary_node: the name of the instance's primary node
887
  @type secondary_nodes: list
888
  @param secondary_nodes: list of secondary nodes as strings
889
  @type os_type: string
890
  @param os_type: the name of the instance's OS
891
  @type status: boolean
892
  @param status: the should_run status of the instance
893
  @type memory: string
894
  @param memory: the memory size of the instance
895
  @type vcpus: string
896
  @param vcpus: the count of VCPUs the instance has
897
  @type nics: list
898
  @param nics: list of tuples (ip, mac, mode, link) representing
899
      the NICs the instance has
900
  @type disk_template: string
901
  @param disk_template: the disk template of the instance
902
  @type disks: list
903
  @param disks: the list of (size, mode) pairs
904
  @type bep: dict
905
  @param bep: the backend parameters for the instance
906
  @type hvp: dict
907
  @param hvp: the hypervisor parameters for the instance
908
  @type hypervisor_name: string
909
  @param hypervisor_name: the hypervisor for the instance
910
  @type tags: list
911
  @param tags: list of instance tags as strings
912
  @rtype: dict
913
  @return: the hook environment for this instance
914

915
  """
916
  if status:
917
    str_status = "up"
918
  else:
919
    str_status = "down"
920
  env = {
921
    "OP_TARGET": name,
922
    "INSTANCE_NAME": name,
923
    "INSTANCE_PRIMARY": primary_node,
924
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
925
    "INSTANCE_OS_TYPE": os_type,
926
    "INSTANCE_STATUS": str_status,
927
    "INSTANCE_MEMORY": memory,
928
    "INSTANCE_VCPUS": vcpus,
929
    "INSTANCE_DISK_TEMPLATE": disk_template,
930
    "INSTANCE_HYPERVISOR": hypervisor_name,
931
  }
932

    
933
  if nics:
934
    nic_count = len(nics)
935
    for idx, (ip, mac, mode, link) in enumerate(nics):
936
      if ip is None:
937
        ip = ""
938
      env["INSTANCE_NIC%d_IP" % idx] = ip
939
      env["INSTANCE_NIC%d_MAC" % idx] = mac
940
      env["INSTANCE_NIC%d_MODE" % idx] = mode
941
      env["INSTANCE_NIC%d_LINK" % idx] = link
942
      if mode == constants.NIC_MODE_BRIDGED:
943
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
944
  else:
945
    nic_count = 0
946

    
947
  env["INSTANCE_NIC_COUNT"] = nic_count
948

    
949
  if disks:
950
    disk_count = len(disks)
951
    for idx, (size, mode) in enumerate(disks):
952
      env["INSTANCE_DISK%d_SIZE" % idx] = size
953
      env["INSTANCE_DISK%d_MODE" % idx] = mode
954
  else:
955
    disk_count = 0
956

    
957
  env["INSTANCE_DISK_COUNT"] = disk_count
958

    
959
  if not tags:
960
    tags = []
961

    
962
  env["INSTANCE_TAGS"] = " ".join(tags)
963

    
964
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
965
    for key, value in source.items():
966
      env["INSTANCE_%s_%s" % (kind, key)] = value
967

    
968
  return env
969

    
970

    
971
def _NICListToTuple(lu, nics):
972
  """Build a list of nic information tuples.
973

974
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
975
  value in LUInstanceQueryData.
976

977
  @type lu:  L{LogicalUnit}
978
  @param lu: the logical unit on whose behalf we execute
979
  @type nics: list of L{objects.NIC}
980
  @param nics: list of nics to convert to hooks tuples
981

982
  """
983
  hooks_nics = []
984
  cluster = lu.cfg.GetClusterInfo()
985
  for nic in nics:
986
    ip = nic.ip
987
    mac = nic.mac
988
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
989
    mode = filled_params[constants.NIC_MODE]
990
    link = filled_params[constants.NIC_LINK]
991
    hooks_nics.append((ip, mac, mode, link))
992
  return hooks_nics
993

    
994

    
995
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
996
  """Builds instance related env variables for hooks from an object.
997

998
  @type lu: L{LogicalUnit}
999
  @param lu: the logical unit on whose behalf we execute
1000
  @type instance: L{objects.Instance}
1001
  @param instance: the instance for which we should build the
1002
      environment
1003
  @type override: dict
1004
  @param override: dictionary with key/values that will override
1005
      our values
1006
  @rtype: dict
1007
  @return: the hook environment dictionary
1008

1009
  """
1010
  cluster = lu.cfg.GetClusterInfo()
1011
  bep = cluster.FillBE(instance)
1012
  hvp = cluster.FillHV(instance)
1013
  args = {
1014
    "name": instance.name,
1015
    "primary_node": instance.primary_node,
1016
    "secondary_nodes": instance.secondary_nodes,
1017
    "os_type": instance.os,
1018
    "status": instance.admin_up,
1019
    "memory": bep[constants.BE_MEMORY],
1020
    "vcpus": bep[constants.BE_VCPUS],
1021
    "nics": _NICListToTuple(lu, instance.nics),
1022
    "disk_template": instance.disk_template,
1023
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1024
    "bep": bep,
1025
    "hvp": hvp,
1026
    "hypervisor_name": instance.hypervisor,
1027
    "tags": instance.tags,
1028
  }
1029
  if override:
1030
    args.update(override)
1031
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1032

    
1033

    
1034
def _AdjustCandidatePool(lu, exceptions):
1035
  """Adjust the candidate pool after node operations.
1036

1037
  """
1038
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1039
  if mod_list:
1040
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1041
               utils.CommaJoin(node.name for node in mod_list))
1042
    for name in mod_list:
1043
      lu.context.ReaddNode(name)
1044
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1045
  if mc_now > mc_max:
1046
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1047
               (mc_now, mc_max))
1048

    
1049

    
1050
def _DecideSelfPromotion(lu, exceptions=None):
1051
  """Decide whether I should promote myself as a master candidate.
1052

1053
  """
1054
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1055
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1056
  # the new node will increase mc_max with one, so:
1057
  mc_should = min(mc_should + 1, cp_size)
1058
  return mc_now < mc_should
1059

    
1060

    
1061
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1062
  """Check that the brigdes needed by a list of nics exist.
1063

1064
  """
1065
  cluster = lu.cfg.GetClusterInfo()
1066
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1067
  brlist = [params[constants.NIC_LINK] for params in paramslist
1068
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1069
  if brlist:
1070
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1071
    result.Raise("Error checking bridges on destination node '%s'" %
1072
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1073

    
1074

    
1075
def _CheckInstanceBridgesExist(lu, instance, node=None):
1076
  """Check that the brigdes needed by an instance exist.
1077

1078
  """
1079
  if node is None:
1080
    node = instance.primary_node
1081
  _CheckNicsBridgesExist(lu, instance.nics, node)
1082

    
1083

    
1084
def _CheckOSVariant(os_obj, name):
1085
  """Check whether an OS name conforms to the os variants specification.
1086

1087
  @type os_obj: L{objects.OS}
1088
  @param os_obj: OS object to check
1089
  @type name: string
1090
  @param name: OS name passed by the user, to check for validity
1091

1092
  """
1093
  if not os_obj.supported_variants:
1094
    return
1095
  variant = objects.OS.GetVariant(name)
1096
  if not variant:
1097
    raise errors.OpPrereqError("OS name must include a variant",
1098
                               errors.ECODE_INVAL)
1099

    
1100
  if variant not in os_obj.supported_variants:
1101
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1102

    
1103

    
1104
def _GetNodeInstancesInner(cfg, fn):
1105
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1106

    
1107

    
1108
def _GetNodeInstances(cfg, node_name):
1109
  """Returns a list of all primary and secondary instances on a node.
1110

1111
  """
1112

    
1113
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1114

    
1115

    
1116
def _GetNodePrimaryInstances(cfg, node_name):
1117
  """Returns primary instances on a node.
1118

1119
  """
1120
  return _GetNodeInstancesInner(cfg,
1121
                                lambda inst: node_name == inst.primary_node)
1122

    
1123

    
1124
def _GetNodeSecondaryInstances(cfg, node_name):
1125
  """Returns secondary instances on a node.
1126

1127
  """
1128
  return _GetNodeInstancesInner(cfg,
1129
                                lambda inst: node_name in inst.secondary_nodes)
1130

    
1131

    
1132
def _GetStorageTypeArgs(cfg, storage_type):
1133
  """Returns the arguments for a storage type.
1134

1135
  """
1136
  # Special case for file storage
1137
  if storage_type == constants.ST_FILE:
1138
    # storage.FileStorage wants a list of storage directories
1139
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1140

    
1141
  return []
1142

    
1143

    
1144
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1145
  faulty = []
1146

    
1147
  for dev in instance.disks:
1148
    cfg.SetDiskID(dev, node_name)
1149

    
1150
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1151
  result.Raise("Failed to get disk status from node %s" % node_name,
1152
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1153

    
1154
  for idx, bdev_status in enumerate(result.payload):
1155
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1156
      faulty.append(idx)
1157

    
1158
  return faulty
1159

    
1160

    
1161
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1162
  """Check the sanity of iallocator and node arguments and use the
1163
  cluster-wide iallocator if appropriate.
1164

1165
  Check that at most one of (iallocator, node) is specified. If none is
1166
  specified, then the LU's opcode's iallocator slot is filled with the
1167
  cluster-wide default iallocator.
1168

1169
  @type iallocator_slot: string
1170
  @param iallocator_slot: the name of the opcode iallocator slot
1171
  @type node_slot: string
1172
  @param node_slot: the name of the opcode target node slot
1173

1174
  """
1175
  node = getattr(lu.op, node_slot, None)
1176
  iallocator = getattr(lu.op, iallocator_slot, None)
1177

    
1178
  if node is not None and iallocator is not None:
1179
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1180
                               errors.ECODE_INVAL)
1181
  elif node is None and iallocator is None:
1182
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1183
    if default_iallocator:
1184
      setattr(lu.op, iallocator_slot, default_iallocator)
1185
    else:
1186
      raise errors.OpPrereqError("No iallocator or node given and no"
1187
                                 " cluster-wide default iallocator found;"
1188
                                 " please specify either an iallocator or a"
1189
                                 " node, or set a cluster-wide default"
1190
                                 " iallocator")
1191

    
1192

    
1193
class LUClusterPostInit(LogicalUnit):
1194
  """Logical unit for running hooks after cluster initialization.
1195

1196
  """
1197
  HPATH = "cluster-init"
1198
  HTYPE = constants.HTYPE_CLUSTER
1199

    
1200
  def BuildHooksEnv(self):
1201
    """Build hooks env.
1202

1203
    """
1204
    return {
1205
      "OP_TARGET": self.cfg.GetClusterName(),
1206
      }
1207

    
1208
  def BuildHooksNodes(self):
1209
    """Build hooks nodes.
1210

1211
    """
1212
    return ([], [self.cfg.GetMasterNode()])
1213

    
1214
  def Exec(self, feedback_fn):
1215
    """Nothing to do.
1216

1217
    """
1218
    return True
1219

    
1220

    
1221
class LUClusterDestroy(LogicalUnit):
1222
  """Logical unit for destroying the cluster.
1223

1224
  """
1225
  HPATH = "cluster-destroy"
1226
  HTYPE = constants.HTYPE_CLUSTER
1227

    
1228
  def BuildHooksEnv(self):
1229
    """Build hooks env.
1230

1231
    """
1232
    return {
1233
      "OP_TARGET": self.cfg.GetClusterName(),
1234
      }
1235

    
1236
  def BuildHooksNodes(self):
1237
    """Build hooks nodes.
1238

1239
    """
1240
    return ([], [])
1241

    
1242
  def CheckPrereq(self):
1243
    """Check prerequisites.
1244

1245
    This checks whether the cluster is empty.
1246

1247
    Any errors are signaled by raising errors.OpPrereqError.
1248

1249
    """
1250
    master = self.cfg.GetMasterNode()
1251

    
1252
    nodelist = self.cfg.GetNodeList()
1253
    if len(nodelist) != 1 or nodelist[0] != master:
1254
      raise errors.OpPrereqError("There are still %d node(s) in"
1255
                                 " this cluster." % (len(nodelist) - 1),
1256
                                 errors.ECODE_INVAL)
1257
    instancelist = self.cfg.GetInstanceList()
1258
    if instancelist:
1259
      raise errors.OpPrereqError("There are still %d instance(s) in"
1260
                                 " this cluster." % len(instancelist),
1261
                                 errors.ECODE_INVAL)
1262

    
1263
  def Exec(self, feedback_fn):
1264
    """Destroys the cluster.
1265

1266
    """
1267
    master = self.cfg.GetMasterNode()
1268

    
1269
    # Run post hooks on master node before it's removed
1270
    _RunPostHook(self, master)
1271

    
1272
    result = self.rpc.call_node_stop_master(master, False)
1273
    result.Raise("Could not disable the master role")
1274

    
1275
    return master
1276

    
1277

    
1278
def _VerifyCertificate(filename):
1279
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1280

1281
  @type filename: string
1282
  @param filename: Path to PEM file
1283

1284
  """
1285
  try:
1286
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1287
                                           utils.ReadFile(filename))
1288
  except Exception, err: # pylint: disable-msg=W0703
1289
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1290
            "Failed to load X509 certificate %s: %s" % (filename, err))
1291

    
1292
  (errcode, msg) = \
1293
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1294
                                constants.SSL_CERT_EXPIRATION_ERROR)
1295

    
1296
  if msg:
1297
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1298
  else:
1299
    fnamemsg = None
1300

    
1301
  if errcode is None:
1302
    return (None, fnamemsg)
1303
  elif errcode == utils.CERT_WARNING:
1304
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1305
  elif errcode == utils.CERT_ERROR:
1306
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1307

    
1308
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1309

    
1310

    
1311
def _GetAllHypervisorParameters(cluster, instances):
1312
  """Compute the set of all hypervisor parameters.
1313

1314
  @type cluster: L{objects.Cluster}
1315
  @param cluster: the cluster object
1316
  @param instances: list of L{objects.Instance}
1317
  @param instances: additional instances from which to obtain parameters
1318
  @rtype: list of (origin, hypervisor, parameters)
1319
  @return: a list with all parameters found, indicating the hypervisor they
1320
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1321

1322
  """
1323
  hvp_data = []
1324

    
1325
  for hv_name in cluster.enabled_hypervisors:
1326
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1327

    
1328
  for os_name, os_hvp in cluster.os_hvp.items():
1329
    for hv_name, hv_params in os_hvp.items():
1330
      if hv_params:
1331
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1332
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1333

    
1334
  # TODO: collapse identical parameter values in a single one
1335
  for instance in instances:
1336
    if instance.hvparams:
1337
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1338
                       cluster.FillHV(instance)))
1339

    
1340
  return hvp_data
1341

    
1342

    
1343
class _VerifyErrors(object):
1344
  """Mix-in for cluster/group verify LUs.
1345

1346
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1347
  self.op and self._feedback_fn to be available.)
1348

1349
  """
1350
  TCLUSTER = "cluster"
1351
  TNODE = "node"
1352
  TINSTANCE = "instance"
1353

    
1354
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1355
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1356
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1357
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1358
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1359
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1360
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1361
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1362
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1363
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1364
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1365
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1366
  ENODEDRBD = (TNODE, "ENODEDRBD")
1367
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1368
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1369
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1370
  ENODEHV = (TNODE, "ENODEHV")
1371
  ENODELVM = (TNODE, "ENODELVM")
1372
  ENODEN1 = (TNODE, "ENODEN1")
1373
  ENODENET = (TNODE, "ENODENET")
1374
  ENODEOS = (TNODE, "ENODEOS")
1375
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1376
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1377
  ENODERPC = (TNODE, "ENODERPC")
1378
  ENODESSH = (TNODE, "ENODESSH")
1379
  ENODEVERSION = (TNODE, "ENODEVERSION")
1380
  ENODESETUP = (TNODE, "ENODESETUP")
1381
  ENODETIME = (TNODE, "ENODETIME")
1382
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1383

    
1384
  ETYPE_FIELD = "code"
1385
  ETYPE_ERROR = "ERROR"
1386
  ETYPE_WARNING = "WARNING"
1387

    
1388
  def _Error(self, ecode, item, msg, *args, **kwargs):
1389
    """Format an error message.
1390

1391
    Based on the opcode's error_codes parameter, either format a
1392
    parseable error code, or a simpler error string.
1393

1394
    This must be called only from Exec and functions called from Exec.
1395

1396
    """
1397
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1398
    itype, etxt = ecode
1399
    # first complete the msg
1400
    if args:
1401
      msg = msg % args
1402
    # then format the whole message
1403
    if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1404
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1405
    else:
1406
      if item:
1407
        item = " " + item
1408
      else:
1409
        item = ""
1410
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1411
    # and finally report it via the feedback_fn
1412
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1413

    
1414
  def _ErrorIf(self, cond, *args, **kwargs):
1415
    """Log an error message if the passed condition is True.
1416

1417
    """
1418
    cond = (bool(cond)
1419
            or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1420
    if cond:
1421
      self._Error(*args, **kwargs)
1422
    # do not mark the operation as failed for WARN cases only
1423
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1424
      self.bad = self.bad or cond
1425

    
1426

    
1427
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1428
  """Verifies the cluster config.
1429

1430
  """
1431
  REQ_BGL = True
1432

    
1433
  def _VerifyHVP(self, hvp_data):
1434
    """Verifies locally the syntax of the hypervisor parameters.
1435

1436
    """
1437
    for item, hv_name, hv_params in hvp_data:
1438
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1439
             (item, hv_name))
1440
      try:
1441
        hv_class = hypervisor.GetHypervisor(hv_name)
1442
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1443
        hv_class.CheckParameterSyntax(hv_params)
1444
      except errors.GenericError, err:
1445
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1446

    
1447
  def ExpandNames(self):
1448
    # Information can be safely retrieved as the BGL is acquired in exclusive
1449
    # mode
1450
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1451
    self.all_node_info = self.cfg.GetAllNodesInfo()
1452
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1453
    self.needed_locks = {}
1454

    
1455
  def Exec(self, feedback_fn):
1456
    """Verify integrity of cluster, performing various test on nodes.
1457

1458
    """
1459
    self.bad = False
1460
    self._feedback_fn = feedback_fn
1461

    
1462
    feedback_fn("* Verifying cluster config")
1463

    
1464
    for msg in self.cfg.VerifyConfig():
1465
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1466

    
1467
    feedback_fn("* Verifying cluster certificate files")
1468

    
1469
    for cert_filename in constants.ALL_CERT_FILES:
1470
      (errcode, msg) = _VerifyCertificate(cert_filename)
1471
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1472

    
1473
    feedback_fn("* Verifying hypervisor parameters")
1474

    
1475
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1476
                                                self.all_inst_info.values()))
1477

    
1478
    feedback_fn("* Verifying all nodes belong to an existing group")
1479

    
1480
    # We do this verification here because, should this bogus circumstance
1481
    # occur, it would never be caught by VerifyGroup, which only acts on
1482
    # nodes/instances reachable from existing node groups.
1483

    
1484
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1485
                         if node.group not in self.all_group_info)
1486

    
1487
    dangling_instances = {}
1488
    no_node_instances = []
1489

    
1490
    for inst in self.all_inst_info.values():
1491
      if inst.primary_node in dangling_nodes:
1492
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1493
      elif inst.primary_node not in self.all_node_info:
1494
        no_node_instances.append(inst.name)
1495

    
1496
    pretty_dangling = [
1497
        "%s (%s)" %
1498
        (node.name,
1499
         utils.CommaJoin(dangling_instances.get(node.name,
1500
                                                ["no instances"])))
1501
        for node in dangling_nodes]
1502

    
1503
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1504
                  "the following nodes (and their instances) belong to a non"
1505
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1506

    
1507
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1508
                  "the following instances have a non-existing primary-node:"
1509
                  " %s", utils.CommaJoin(no_node_instances))
1510

    
1511
    return (not self.bad, [g.name for g in self.all_group_info.values()])
1512

    
1513

    
1514
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1515
  """Verifies the status of a node group.
1516

1517
  """
1518
  HPATH = "cluster-verify"
1519
  HTYPE = constants.HTYPE_CLUSTER
1520
  REQ_BGL = False
1521

    
1522
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1523

    
1524
  class NodeImage(object):
1525
    """A class representing the logical and physical status of a node.
1526

1527
    @type name: string
1528
    @ivar name: the node name to which this object refers
1529
    @ivar volumes: a structure as returned from
1530
        L{ganeti.backend.GetVolumeList} (runtime)
1531
    @ivar instances: a list of running instances (runtime)
1532
    @ivar pinst: list of configured primary instances (config)
1533
    @ivar sinst: list of configured secondary instances (config)
1534
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1535
        instances for which this node is secondary (config)
1536
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1537
    @ivar dfree: free disk, as reported by the node (runtime)
1538
    @ivar offline: the offline status (config)
1539
    @type rpc_fail: boolean
1540
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1541
        not whether the individual keys were correct) (runtime)
1542
    @type lvm_fail: boolean
1543
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1544
    @type hyp_fail: boolean
1545
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1546
    @type ghost: boolean
1547
    @ivar ghost: whether this is a known node or not (config)
1548
    @type os_fail: boolean
1549
    @ivar os_fail: whether the RPC call didn't return valid OS data
1550
    @type oslist: list
1551
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1552
    @type vm_capable: boolean
1553
    @ivar vm_capable: whether the node can host instances
1554

1555
    """
1556
    def __init__(self, offline=False, name=None, vm_capable=True):
1557
      self.name = name
1558
      self.volumes = {}
1559
      self.instances = []
1560
      self.pinst = []
1561
      self.sinst = []
1562
      self.sbp = {}
1563
      self.mfree = 0
1564
      self.dfree = 0
1565
      self.offline = offline
1566
      self.vm_capable = vm_capable
1567
      self.rpc_fail = False
1568
      self.lvm_fail = False
1569
      self.hyp_fail = False
1570
      self.ghost = False
1571
      self.os_fail = False
1572
      self.oslist = {}
1573

    
1574
  def ExpandNames(self):
1575
    # This raises errors.OpPrereqError on its own:
1576
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1577

    
1578
    # Get instances in node group; this is unsafe and needs verification later
1579
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1580

    
1581
    self.needed_locks = {
1582
      locking.LEVEL_INSTANCE: inst_names,
1583
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1584
      locking.LEVEL_NODE: [],
1585
      }
1586

    
1587
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1588

    
1589
  def DeclareLocks(self, level):
1590
    if level == locking.LEVEL_NODE:
1591
      # Get members of node group; this is unsafe and needs verification later
1592
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1593

    
1594
      all_inst_info = self.cfg.GetAllInstancesInfo()
1595

    
1596
      # In Exec(), we warn about mirrored instances that have primary and
1597
      # secondary living in separate node groups. To fully verify that
1598
      # volumes for these instances are healthy, we will need to do an
1599
      # extra call to their secondaries. We ensure here those nodes will
1600
      # be locked.
1601
      for inst in self.glm.list_owned(locking.LEVEL_INSTANCE):
1602
        # Important: access only the instances whose lock is owned
1603
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1604
          nodes.update(all_inst_info[inst].secondary_nodes)
1605

    
1606
      self.needed_locks[locking.LEVEL_NODE] = nodes
1607

    
1608
  def CheckPrereq(self):
1609
    group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1610
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1611

    
1612
    unlocked_nodes = \
1613
        group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1614

    
1615
    unlocked_instances = \
1616
        group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
1617

    
1618
    if unlocked_nodes:
1619
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1620
                                 utils.CommaJoin(unlocked_nodes))
1621

    
1622
    if unlocked_instances:
1623
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1624
                                 utils.CommaJoin(unlocked_instances))
1625

    
1626
    self.all_node_info = self.cfg.GetAllNodesInfo()
1627
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1628

    
1629
    self.my_node_names = utils.NiceSort(group_nodes)
1630
    self.my_inst_names = utils.NiceSort(group_instances)
1631

    
1632
    self.my_node_info = dict((name, self.all_node_info[name])
1633
                             for name in self.my_node_names)
1634

    
1635
    self.my_inst_info = dict((name, self.all_inst_info[name])
1636
                             for name in self.my_inst_names)
1637

    
1638
    # We detect here the nodes that will need the extra RPC calls for verifying
1639
    # split LV volumes; they should be locked.
1640
    extra_lv_nodes = set()
1641

    
1642
    for inst in self.my_inst_info.values():
1643
      if inst.disk_template in constants.DTS_INT_MIRROR:
1644
        group = self.my_node_info[inst.primary_node].group
1645
        for nname in inst.secondary_nodes:
1646
          if self.all_node_info[nname].group != group:
1647
            extra_lv_nodes.add(nname)
1648

    
1649
    unlocked_lv_nodes = \
1650
        extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1651

    
1652
    if unlocked_lv_nodes:
1653
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1654
                                 utils.CommaJoin(unlocked_lv_nodes))
1655
    self.extra_lv_nodes = list(extra_lv_nodes)
1656

    
1657
  def _VerifyNode(self, ninfo, nresult):
1658
    """Perform some basic validation on data returned from a node.
1659

1660
      - check the result data structure is well formed and has all the
1661
        mandatory fields
1662
      - check ganeti version
1663

1664
    @type ninfo: L{objects.Node}
1665
    @param ninfo: the node to check
1666
    @param nresult: the results from the node
1667
    @rtype: boolean
1668
    @return: whether overall this call was successful (and we can expect
1669
         reasonable values in the respose)
1670

1671
    """
1672
    node = ninfo.name
1673
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1674

    
1675
    # main result, nresult should be a non-empty dict
1676
    test = not nresult or not isinstance(nresult, dict)
1677
    _ErrorIf(test, self.ENODERPC, node,
1678
                  "unable to verify node: no data returned")
1679
    if test:
1680
      return False
1681

    
1682
    # compares ganeti version
1683
    local_version = constants.PROTOCOL_VERSION
1684
    remote_version = nresult.get("version", None)
1685
    test = not (remote_version and
1686
                isinstance(remote_version, (list, tuple)) and
1687
                len(remote_version) == 2)
1688
    _ErrorIf(test, self.ENODERPC, node,
1689
             "connection to node returned invalid data")
1690
    if test:
1691
      return False
1692

    
1693
    test = local_version != remote_version[0]
1694
    _ErrorIf(test, self.ENODEVERSION, node,
1695
             "incompatible protocol versions: master %s,"
1696
             " node %s", local_version, remote_version[0])
1697
    if test:
1698
      return False
1699

    
1700
    # node seems compatible, we can actually try to look into its results
1701

    
1702
    # full package version
1703
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1704
                  self.ENODEVERSION, node,
1705
                  "software version mismatch: master %s, node %s",
1706
                  constants.RELEASE_VERSION, remote_version[1],
1707
                  code=self.ETYPE_WARNING)
1708

    
1709
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1710
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1711
      for hv_name, hv_result in hyp_result.iteritems():
1712
        test = hv_result is not None
1713
        _ErrorIf(test, self.ENODEHV, node,
1714
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1715

    
1716
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1717
    if ninfo.vm_capable and isinstance(hvp_result, list):
1718
      for item, hv_name, hv_result in hvp_result:
1719
        _ErrorIf(True, self.ENODEHV, node,
1720
                 "hypervisor %s parameter verify failure (source %s): %s",
1721
                 hv_name, item, hv_result)
1722

    
1723
    test = nresult.get(constants.NV_NODESETUP,
1724
                       ["Missing NODESETUP results"])
1725
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1726
             "; ".join(test))
1727

    
1728
    return True
1729

    
1730
  def _VerifyNodeTime(self, ninfo, nresult,
1731
                      nvinfo_starttime, nvinfo_endtime):
1732
    """Check the node time.
1733

1734
    @type ninfo: L{objects.Node}
1735
    @param ninfo: the node to check
1736
    @param nresult: the remote results for the node
1737
    @param nvinfo_starttime: the start time of the RPC call
1738
    @param nvinfo_endtime: the end time of the RPC call
1739

1740
    """
1741
    node = ninfo.name
1742
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1743

    
1744
    ntime = nresult.get(constants.NV_TIME, None)
1745
    try:
1746
      ntime_merged = utils.MergeTime(ntime)
1747
    except (ValueError, TypeError):
1748
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1749
      return
1750

    
1751
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1752
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1753
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1754
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1755
    else:
1756
      ntime_diff = None
1757

    
1758
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1759
             "Node time diverges by at least %s from master node time",
1760
             ntime_diff)
1761

    
1762
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1763
    """Check the node LVM results.
1764

1765
    @type ninfo: L{objects.Node}
1766
    @param ninfo: the node to check
1767
    @param nresult: the remote results for the node
1768
    @param vg_name: the configured VG name
1769

1770
    """
1771
    if vg_name is None:
1772
      return
1773

    
1774
    node = ninfo.name
1775
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1776

    
1777
    # checks vg existence and size > 20G
1778
    vglist = nresult.get(constants.NV_VGLIST, None)
1779
    test = not vglist
1780
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1781
    if not test:
1782
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1783
                                            constants.MIN_VG_SIZE)
1784
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1785

    
1786
    # check pv names
1787
    pvlist = nresult.get(constants.NV_PVLIST, None)
1788
    test = pvlist is None
1789
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1790
    if not test:
1791
      # check that ':' is not present in PV names, since it's a
1792
      # special character for lvcreate (denotes the range of PEs to
1793
      # use on the PV)
1794
      for _, pvname, owner_vg in pvlist:
1795
        test = ":" in pvname
1796
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1797
                 " '%s' of VG '%s'", pvname, owner_vg)
1798

    
1799
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1800
    """Check the node bridges.
1801

1802
    @type ninfo: L{objects.Node}
1803
    @param ninfo: the node to check
1804
    @param nresult: the remote results for the node
1805
    @param bridges: the expected list of bridges
1806

1807
    """
1808
    if not bridges:
1809
      return
1810

    
1811
    node = ninfo.name
1812
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1813

    
1814
    missing = nresult.get(constants.NV_BRIDGES, None)
1815
    test = not isinstance(missing, list)
1816
    _ErrorIf(test, self.ENODENET, node,
1817
             "did not return valid bridge information")
1818
    if not test:
1819
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1820
               utils.CommaJoin(sorted(missing)))
1821

    
1822
  def _VerifyNodeNetwork(self, ninfo, nresult):
1823
    """Check the node network connectivity results.
1824

1825
    @type ninfo: L{objects.Node}
1826
    @param ninfo: the node to check
1827
    @param nresult: the remote results for the node
1828

1829
    """
1830
    node = ninfo.name
1831
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1832

    
1833
    test = constants.NV_NODELIST not in nresult
1834
    _ErrorIf(test, self.ENODESSH, node,
1835
             "node hasn't returned node ssh connectivity data")
1836
    if not test:
1837
      if nresult[constants.NV_NODELIST]:
1838
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1839
          _ErrorIf(True, self.ENODESSH, node,
1840
                   "ssh communication with node '%s': %s", a_node, a_msg)
1841

    
1842
    test = constants.NV_NODENETTEST not in nresult
1843
    _ErrorIf(test, self.ENODENET, node,
1844
             "node hasn't returned node tcp connectivity data")
1845
    if not test:
1846
      if nresult[constants.NV_NODENETTEST]:
1847
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1848
        for anode in nlist:
1849
          _ErrorIf(True, self.ENODENET, node,
1850
                   "tcp communication with node '%s': %s",
1851
                   anode, nresult[constants.NV_NODENETTEST][anode])
1852

    
1853
    test = constants.NV_MASTERIP not in nresult
1854
    _ErrorIf(test, self.ENODENET, node,
1855
             "node hasn't returned node master IP reachability data")
1856
    if not test:
1857
      if not nresult[constants.NV_MASTERIP]:
1858
        if node == self.master_node:
1859
          msg = "the master node cannot reach the master IP (not configured?)"
1860
        else:
1861
          msg = "cannot reach the master IP"
1862
        _ErrorIf(True, self.ENODENET, node, msg)
1863

    
1864
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1865
                      diskstatus):
1866
    """Verify an instance.
1867

1868
    This function checks to see if the required block devices are
1869
    available on the instance's node.
1870

1871
    """
1872
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1873
    node_current = instanceconfig.primary_node
1874

    
1875
    node_vol_should = {}
1876
    instanceconfig.MapLVsByNode(node_vol_should)
1877

    
1878
    for node in node_vol_should:
1879
      n_img = node_image[node]
1880
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1881
        # ignore missing volumes on offline or broken nodes
1882
        continue
1883
      for volume in node_vol_should[node]:
1884
        test = volume not in n_img.volumes
1885
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1886
                 "volume %s missing on node %s", volume, node)
1887

    
1888
    if instanceconfig.admin_up:
1889
      pri_img = node_image[node_current]
1890
      test = instance not in pri_img.instances and not pri_img.offline
1891
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1892
               "instance not running on its primary node %s",
1893
               node_current)
1894

    
1895
    diskdata = [(nname, success, status, idx)
1896
                for (nname, disks) in diskstatus.items()
1897
                for idx, (success, status) in enumerate(disks)]
1898

    
1899
    for nname, success, bdev_status, idx in diskdata:
1900
      # the 'ghost node' construction in Exec() ensures that we have a
1901
      # node here
1902
      snode = node_image[nname]
1903
      bad_snode = snode.ghost or snode.offline
1904
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1905
               self.EINSTANCEFAULTYDISK, instance,
1906
               "couldn't retrieve status for disk/%s on %s: %s",
1907
               idx, nname, bdev_status)
1908
      _ErrorIf((instanceconfig.admin_up and success and
1909
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1910
               self.EINSTANCEFAULTYDISK, instance,
1911
               "disk/%s on %s is faulty", idx, nname)
1912

    
1913
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1914
    """Verify if there are any unknown volumes in the cluster.
1915

1916
    The .os, .swap and backup volumes are ignored. All other volumes are
1917
    reported as unknown.
1918

1919
    @type reserved: L{ganeti.utils.FieldSet}
1920
    @param reserved: a FieldSet of reserved volume names
1921

1922
    """
1923
    for node, n_img in node_image.items():
1924
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1925
        # skip non-healthy nodes
1926
        continue
1927
      for volume in n_img.volumes:
1928
        test = ((node not in node_vol_should or
1929
                volume not in node_vol_should[node]) and
1930
                not reserved.Matches(volume))
1931
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1932
                      "volume %s is unknown", volume)
1933

    
1934
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1935
    """Verify N+1 Memory Resilience.
1936

1937
    Check that if one single node dies we can still start all the
1938
    instances it was primary for.
1939

1940
    """
1941
    cluster_info = self.cfg.GetClusterInfo()
1942
    for node, n_img in node_image.items():
1943
      # This code checks that every node which is now listed as
1944
      # secondary has enough memory to host all instances it is
1945
      # supposed to should a single other node in the cluster fail.
1946
      # FIXME: not ready for failover to an arbitrary node
1947
      # FIXME: does not support file-backed instances
1948
      # WARNING: we currently take into account down instances as well
1949
      # as up ones, considering that even if they're down someone
1950
      # might want to start them even in the event of a node failure.
1951
      if n_img.offline:
1952
        # we're skipping offline nodes from the N+1 warning, since
1953
        # most likely we don't have good memory infromation from them;
1954
        # we already list instances living on such nodes, and that's
1955
        # enough warning
1956
        continue
1957
      for prinode, instances in n_img.sbp.items():
1958
        needed_mem = 0
1959
        for instance in instances:
1960
          bep = cluster_info.FillBE(instance_cfg[instance])
1961
          if bep[constants.BE_AUTO_BALANCE]:
1962
            needed_mem += bep[constants.BE_MEMORY]
1963
        test = n_img.mfree < needed_mem
1964
        self._ErrorIf(test, self.ENODEN1, node,
1965
                      "not enough memory to accomodate instance failovers"
1966
                      " should node %s fail (%dMiB needed, %dMiB available)",
1967
                      prinode, needed_mem, n_img.mfree)
1968

    
1969
  @classmethod
1970
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1971
                   (files_all, files_all_opt, files_mc, files_vm)):
1972
    """Verifies file checksums collected from all nodes.
1973

1974
    @param errorif: Callback for reporting errors
1975
    @param nodeinfo: List of L{objects.Node} objects
1976
    @param master_node: Name of master node
1977
    @param all_nvinfo: RPC results
1978

1979
    """
1980
    node_names = frozenset(node.name for node in nodeinfo)
1981

    
1982
    assert master_node in node_names
1983
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1984
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1985
           "Found file listed in more than one file list"
1986

    
1987
    # Define functions determining which nodes to consider for a file
1988
    file2nodefn = dict([(filename, fn)
1989
      for (files, fn) in [(files_all, None),
1990
                          (files_all_opt, None),
1991
                          (files_mc, lambda node: (node.master_candidate or
1992
                                                   node.name == master_node)),
1993
                          (files_vm, lambda node: node.vm_capable)]
1994
      for filename in files])
1995

    
1996
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1997

    
1998
    for node in nodeinfo:
1999
      nresult = all_nvinfo[node.name]
2000

    
2001
      if nresult.fail_msg or not nresult.payload:
2002
        node_files = None
2003
      else:
2004
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2005

    
2006
      test = not (node_files and isinstance(node_files, dict))
2007
      errorif(test, cls.ENODEFILECHECK, node.name,
2008
              "Node did not return file checksum data")
2009
      if test:
2010
        continue
2011

    
2012
      for (filename, checksum) in node_files.items():
2013
        # Check if the file should be considered for a node
2014
        fn = file2nodefn[filename]
2015
        if fn is None or fn(node):
2016
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
2017

    
2018
    for (filename, checksums) in fileinfo.items():
2019
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2020

    
2021
      # Nodes having the file
2022
      with_file = frozenset(node_name
2023
                            for nodes in fileinfo[filename].values()
2024
                            for node_name in nodes)
2025

    
2026
      # Nodes missing file
2027
      missing_file = node_names - with_file
2028

    
2029
      if filename in files_all_opt:
2030
        # All or no nodes
2031
        errorif(missing_file and missing_file != node_names,
2032
                cls.ECLUSTERFILECHECK, None,
2033
                "File %s is optional, but it must exist on all or no"
2034
                " nodes (not found on %s)",
2035
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2036
      else:
2037
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2038
                "File %s is missing from node(s) %s", filename,
2039
                utils.CommaJoin(utils.NiceSort(missing_file)))
2040

    
2041
      # See if there are multiple versions of the file
2042
      test = len(checksums) > 1
2043
      if test:
2044
        variants = ["variant %s on %s" %
2045
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2046
                    for (idx, (checksum, nodes)) in
2047
                      enumerate(sorted(checksums.items()))]
2048
      else:
2049
        variants = []
2050

    
2051
      errorif(test, cls.ECLUSTERFILECHECK, None,
2052
              "File %s found with %s different checksums (%s)",
2053
              filename, len(checksums), "; ".join(variants))
2054

    
2055
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2056
                      drbd_map):
2057
    """Verifies and the node DRBD status.
2058

2059
    @type ninfo: L{objects.Node}
2060
    @param ninfo: the node to check
2061
    @param nresult: the remote results for the node
2062
    @param instanceinfo: the dict of instances
2063
    @param drbd_helper: the configured DRBD usermode helper
2064
    @param drbd_map: the DRBD map as returned by
2065
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2066

2067
    """
2068
    node = ninfo.name
2069
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2070

    
2071
    if drbd_helper:
2072
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2073
      test = (helper_result == None)
2074
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2075
               "no drbd usermode helper returned")
2076
      if helper_result:
2077
        status, payload = helper_result
2078
        test = not status
2079
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2080
                 "drbd usermode helper check unsuccessful: %s", payload)
2081
        test = status and (payload != drbd_helper)
2082
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2083
                 "wrong drbd usermode helper: %s", payload)
2084

    
2085
    # compute the DRBD minors
2086
    node_drbd = {}
2087
    for minor, instance in drbd_map[node].items():
2088
      test = instance not in instanceinfo
2089
      _ErrorIf(test, self.ECLUSTERCFG, None,
2090
               "ghost instance '%s' in temporary DRBD map", instance)
2091
        # ghost instance should not be running, but otherwise we
2092
        # don't give double warnings (both ghost instance and
2093
        # unallocated minor in use)
2094
      if test:
2095
        node_drbd[minor] = (instance, False)
2096
      else:
2097
        instance = instanceinfo[instance]
2098
        node_drbd[minor] = (instance.name, instance.admin_up)
2099

    
2100
    # and now check them
2101
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2102
    test = not isinstance(used_minors, (tuple, list))
2103
    _ErrorIf(test, self.ENODEDRBD, node,
2104
             "cannot parse drbd status file: %s", str(used_minors))
2105
    if test:
2106
      # we cannot check drbd status
2107
      return
2108

    
2109
    for minor, (iname, must_exist) in node_drbd.items():
2110
      test = minor not in used_minors and must_exist
2111
      _ErrorIf(test, self.ENODEDRBD, node,
2112
               "drbd minor %d of instance %s is not active", minor, iname)
2113
    for minor in used_minors:
2114
      test = minor not in node_drbd
2115
      _ErrorIf(test, self.ENODEDRBD, node,
2116
               "unallocated drbd minor %d is in use", minor)
2117

    
2118
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2119
    """Builds the node OS structures.
2120

2121
    @type ninfo: L{objects.Node}
2122
    @param ninfo: the node to check
2123
    @param nresult: the remote results for the node
2124
    @param nimg: the node image object
2125

2126
    """
2127
    node = ninfo.name
2128
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2129

    
2130
    remote_os = nresult.get(constants.NV_OSLIST, None)
2131
    test = (not isinstance(remote_os, list) or
2132
            not compat.all(isinstance(v, list) and len(v) == 7
2133
                           for v in remote_os))
2134

    
2135
    _ErrorIf(test, self.ENODEOS, node,
2136
             "node hasn't returned valid OS data")
2137

    
2138
    nimg.os_fail = test
2139

    
2140
    if test:
2141
      return
2142

    
2143
    os_dict = {}
2144

    
2145
    for (name, os_path, status, diagnose,
2146
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2147

    
2148
      if name not in os_dict:
2149
        os_dict[name] = []
2150

    
2151
      # parameters is a list of lists instead of list of tuples due to
2152
      # JSON lacking a real tuple type, fix it:
2153
      parameters = [tuple(v) for v in parameters]
2154
      os_dict[name].append((os_path, status, diagnose,
2155
                            set(variants), set(parameters), set(api_ver)))
2156

    
2157
    nimg.oslist = os_dict
2158

    
2159
  def _VerifyNodeOS(self, ninfo, nimg, base):
2160
    """Verifies the node OS list.
2161

2162
    @type ninfo: L{objects.Node}
2163
    @param ninfo: the node to check
2164
    @param nimg: the node image object
2165
    @param base: the 'template' node we match against (e.g. from the master)
2166

2167
    """
2168
    node = ninfo.name
2169
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2170

    
2171
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2172

    
2173
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2174
    for os_name, os_data in nimg.oslist.items():
2175
      assert os_data, "Empty OS status for OS %s?!" % os_name
2176
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2177
      _ErrorIf(not f_status, self.ENODEOS, node,
2178
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2179
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2180
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2181
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2182
      # this will catched in backend too
2183
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
2184
               and not f_var, self.ENODEOS, node,
2185
               "OS %s with API at least %d does not declare any variant",
2186
               os_name, constants.OS_API_V15)
2187
      # comparisons with the 'base' image
2188
      test = os_name not in base.oslist
2189
      _ErrorIf(test, self.ENODEOS, node,
2190
               "Extra OS %s not present on reference node (%s)",
2191
               os_name, base.name)
2192
      if test:
2193
        continue
2194
      assert base.oslist[os_name], "Base node has empty OS status?"
2195
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2196
      if not b_status:
2197
        # base OS is invalid, skipping
2198
        continue
2199
      for kind, a, b in [("API version", f_api, b_api),
2200
                         ("variants list", f_var, b_var),
2201
                         ("parameters", beautify_params(f_param),
2202
                          beautify_params(b_param))]:
2203
        _ErrorIf(a != b, self.ENODEOS, node,
2204
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2205
                 kind, os_name, base.name,
2206
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2207

    
2208
    # check any missing OSes
2209
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2210
    _ErrorIf(missing, self.ENODEOS, node,
2211
             "OSes present on reference node %s but missing on this node: %s",
2212
             base.name, utils.CommaJoin(missing))
2213

    
2214
  def _VerifyOob(self, ninfo, nresult):
2215
    """Verifies out of band functionality of a node.
2216

2217
    @type ninfo: L{objects.Node}
2218
    @param ninfo: the node to check
2219
    @param nresult: the remote results for the node
2220

2221
    """
2222
    node = ninfo.name
2223
    # We just have to verify the paths on master and/or master candidates
2224
    # as the oob helper is invoked on the master
2225
    if ((ninfo.master_candidate or ninfo.master_capable) and
2226
        constants.NV_OOB_PATHS in nresult):
2227
      for path_result in nresult[constants.NV_OOB_PATHS]:
2228
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2229

    
2230
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2231
    """Verifies and updates the node volume data.
2232

2233
    This function will update a L{NodeImage}'s internal structures
2234
    with data from the remote call.
2235

2236
    @type ninfo: L{objects.Node}
2237
    @param ninfo: the node to check
2238
    @param nresult: the remote results for the node
2239
    @param nimg: the node image object
2240
    @param vg_name: the configured VG name
2241

2242
    """
2243
    node = ninfo.name
2244
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2245

    
2246
    nimg.lvm_fail = True
2247
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2248
    if vg_name is None:
2249
      pass
2250
    elif isinstance(lvdata, basestring):
2251
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2252
               utils.SafeEncode(lvdata))
2253
    elif not isinstance(lvdata, dict):
2254
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2255
    else:
2256
      nimg.volumes = lvdata
2257
      nimg.lvm_fail = False
2258

    
2259
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2260
    """Verifies and updates the node instance list.
2261

2262
    If the listing was successful, then updates this node's instance
2263
    list. Otherwise, it marks the RPC call as failed for the instance
2264
    list key.
2265

2266
    @type ninfo: L{objects.Node}
2267
    @param ninfo: the node to check
2268
    @param nresult: the remote results for the node
2269
    @param nimg: the node image object
2270

2271
    """
2272
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2273
    test = not isinstance(idata, list)
2274
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2275
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2276
    if test:
2277
      nimg.hyp_fail = True
2278
    else:
2279
      nimg.instances = idata
2280

    
2281
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2282
    """Verifies and computes a node information map
2283

2284
    @type ninfo: L{objects.Node}
2285
    @param ninfo: the node to check
2286
    @param nresult: the remote results for the node
2287
    @param nimg: the node image object
2288
    @param vg_name: the configured VG name
2289

2290
    """
2291
    node = ninfo.name
2292
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2293

    
2294
    # try to read free memory (from the hypervisor)
2295
    hv_info = nresult.get(constants.NV_HVINFO, None)
2296
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2297
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2298
    if not test:
2299
      try:
2300
        nimg.mfree = int(hv_info["memory_free"])
2301
      except (ValueError, TypeError):
2302
        _ErrorIf(True, self.ENODERPC, node,
2303
                 "node returned invalid nodeinfo, check hypervisor")
2304

    
2305
    # FIXME: devise a free space model for file based instances as well
2306
    if vg_name is not None:
2307
      test = (constants.NV_VGLIST not in nresult or
2308
              vg_name not in nresult[constants.NV_VGLIST])
2309
      _ErrorIf(test, self.ENODELVM, node,
2310
               "node didn't return data for the volume group '%s'"
2311
               " - it is either missing or broken", vg_name)
2312
      if not test:
2313
        try:
2314
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2315
        except (ValueError, TypeError):
2316
          _ErrorIf(True, self.ENODERPC, node,
2317
                   "node returned invalid LVM info, check LVM status")
2318

    
2319
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2320
    """Gets per-disk status information for all instances.
2321

2322
    @type nodelist: list of strings
2323
    @param nodelist: Node names
2324
    @type node_image: dict of (name, L{objects.Node})
2325
    @param node_image: Node objects
2326
    @type instanceinfo: dict of (name, L{objects.Instance})
2327
    @param instanceinfo: Instance objects
2328
    @rtype: {instance: {node: [(succes, payload)]}}
2329
    @return: a dictionary of per-instance dictionaries with nodes as
2330
        keys and disk information as values; the disk information is a
2331
        list of tuples (success, payload)
2332

2333
    """
2334
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2335

    
2336
    node_disks = {}
2337
    node_disks_devonly = {}
2338
    diskless_instances = set()
2339
    diskless = constants.DT_DISKLESS
2340

    
2341
    for nname in nodelist:
2342
      node_instances = list(itertools.chain(node_image[nname].pinst,
2343
                                            node_image[nname].sinst))
2344
      diskless_instances.update(inst for inst in node_instances
2345
                                if instanceinfo[inst].disk_template == diskless)
2346
      disks = [(inst, disk)
2347
               for inst in node_instances
2348
               for disk in instanceinfo[inst].disks]
2349

    
2350
      if not disks:
2351
        # No need to collect data
2352
        continue
2353

    
2354
      node_disks[nname] = disks
2355

    
2356
      # Creating copies as SetDiskID below will modify the objects and that can
2357
      # lead to incorrect data returned from nodes
2358
      devonly = [dev.Copy() for (_, dev) in disks]
2359

    
2360
      for dev in devonly:
2361
        self.cfg.SetDiskID(dev, nname)
2362

    
2363
      node_disks_devonly[nname] = devonly
2364

    
2365
    assert len(node_disks) == len(node_disks_devonly)
2366

    
2367
    # Collect data from all nodes with disks
2368
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2369
                                                          node_disks_devonly)
2370

    
2371
    assert len(result) == len(node_disks)
2372

    
2373
    instdisk = {}
2374

    
2375
    for (nname, nres) in result.items():
2376
      disks = node_disks[nname]
2377

    
2378
      if nres.offline:
2379
        # No data from this node
2380
        data = len(disks) * [(False, "node offline")]
2381
      else:
2382
        msg = nres.fail_msg
2383
        _ErrorIf(msg, self.ENODERPC, nname,
2384
                 "while getting disk information: %s", msg)
2385
        if msg:
2386
          # No data from this node
2387
          data = len(disks) * [(False, msg)]
2388
        else:
2389
          data = []
2390
          for idx, i in enumerate(nres.payload):
2391
            if isinstance(i, (tuple, list)) and len(i) == 2:
2392
              data.append(i)
2393
            else:
2394
              logging.warning("Invalid result from node %s, entry %d: %s",
2395
                              nname, idx, i)
2396
              data.append((False, "Invalid result from the remote node"))
2397

    
2398
      for ((inst, _), status) in zip(disks, data):
2399
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2400

    
2401
    # Add empty entries for diskless instances.
2402
    for inst in diskless_instances:
2403
      assert inst not in instdisk
2404
      instdisk[inst] = {}
2405

    
2406
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2407
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2408
                      compat.all(isinstance(s, (tuple, list)) and
2409
                                 len(s) == 2 for s in statuses)
2410
                      for inst, nnames in instdisk.items()
2411
                      for nname, statuses in nnames.items())
2412
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2413

    
2414
    return instdisk
2415

    
2416
  def BuildHooksEnv(self):
2417
    """Build hooks env.
2418

2419
    Cluster-Verify hooks just ran in the post phase and their failure makes
2420
    the output be logged in the verify output and the verification to fail.
2421

2422
    """
2423
    env = {
2424
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2425
      }
2426

    
2427
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2428
               for node in self.my_node_info.values())
2429

    
2430
    return env
2431

    
2432
  def BuildHooksNodes(self):
2433
    """Build hooks nodes.
2434

2435
    """
2436
    return ([], self.my_node_names)
2437

    
2438
  def Exec(self, feedback_fn):
2439
    """Verify integrity of the node group, performing various test on nodes.
2440

2441
    """
2442
    # This method has too many local variables. pylint: disable-msg=R0914
2443

    
2444
    if not self.my_node_names:
2445
      # empty node group
2446
      feedback_fn("* Empty node group, skipping verification")
2447
      return True
2448

    
2449
    self.bad = False
2450
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2451
    verbose = self.op.verbose
2452
    self._feedback_fn = feedback_fn
2453

    
2454
    vg_name = self.cfg.GetVGName()
2455
    drbd_helper = self.cfg.GetDRBDHelper()
2456
    cluster = self.cfg.GetClusterInfo()
2457
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2458
    hypervisors = cluster.enabled_hypervisors
2459
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2460

    
2461
    i_non_redundant = [] # Non redundant instances
2462
    i_non_a_balanced = [] # Non auto-balanced instances
2463
    n_offline = 0 # Count of offline nodes
2464
    n_drained = 0 # Count of nodes being drained
2465
    node_vol_should = {}
2466

    
2467
    # FIXME: verify OS list
2468

    
2469
    # File verification
2470
    filemap = _ComputeAncillaryFiles(cluster, False)
2471

    
2472
    # do local checksums
2473
    master_node = self.master_node = self.cfg.GetMasterNode()
2474
    master_ip = self.cfg.GetMasterIP()
2475

    
2476
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2477

    
2478
    # We will make nodes contact all nodes in their group, and one node from
2479
    # every other group.
2480
    # TODO: should it be a *random* node, different every time?
2481
    online_nodes = [node.name for node in node_data_list if not node.offline]
2482
    other_group_nodes = {}
2483

    
2484
    for name in sorted(self.all_node_info):
2485
      node = self.all_node_info[name]
2486
      if (node.group not in other_group_nodes
2487
          and node.group != self.group_uuid
2488
          and not node.offline):
2489
        other_group_nodes[node.group] = node.name
2490

    
2491
    node_verify_param = {
2492
      constants.NV_FILELIST:
2493
        utils.UniqueSequence(filename
2494
                             for files in filemap
2495
                             for filename in files),
2496
      constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2497
      constants.NV_HYPERVISOR: hypervisors,
2498
      constants.NV_HVPARAMS:
2499
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2500
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2501
                                 for node in node_data_list
2502
                                 if not node.offline],
2503
      constants.NV_INSTANCELIST: hypervisors,
2504
      constants.NV_VERSION: None,
2505
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2506
      constants.NV_NODESETUP: None,
2507
      constants.NV_TIME: None,
2508
      constants.NV_MASTERIP: (master_node, master_ip),
2509
      constants.NV_OSLIST: None,
2510
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2511
      }
2512

    
2513
    if vg_name is not None:
2514
      node_verify_param[constants.NV_VGLIST] = None
2515
      node_verify_param[constants.NV_LVLIST] = vg_name
2516
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2517
      node_verify_param[constants.NV_DRBDLIST] = None
2518

    
2519
    if drbd_helper:
2520
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2521

    
2522
    # bridge checks
2523
    # FIXME: this needs to be changed per node-group, not cluster-wide
2524
    bridges = set()
2525
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2526
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2527
      bridges.add(default_nicpp[constants.NIC_LINK])
2528
    for instance in self.my_inst_info.values():
2529
      for nic in instance.nics:
2530
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2531
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2532
          bridges.add(full_nic[constants.NIC_LINK])
2533

    
2534
    if bridges:
2535
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2536

    
2537
    # Build our expected cluster state
2538
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2539
                                                 name=node.name,
2540
                                                 vm_capable=node.vm_capable))
2541
                      for node in node_data_list)
2542

    
2543
    # Gather OOB paths
2544
    oob_paths = []
2545
    for node in self.all_node_info.values():
2546
      path = _SupportsOob(self.cfg, node)
2547
      if path and path not in oob_paths:
2548
        oob_paths.append(path)
2549

    
2550
    if oob_paths:
2551
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2552

    
2553
    for instance in self.my_inst_names:
2554
      inst_config = self.my_inst_info[instance]
2555

    
2556
      for nname in inst_config.all_nodes:
2557
        if nname not in node_image:
2558
          gnode = self.NodeImage(name=nname)
2559
          gnode.ghost = (nname not in self.all_node_info)
2560
          node_image[nname] = gnode
2561

    
2562
      inst_config.MapLVsByNode(node_vol_should)
2563

    
2564
      pnode = inst_config.primary_node
2565
      node_image[pnode].pinst.append(instance)
2566

    
2567
      for snode in inst_config.secondary_nodes:
2568
        nimg = node_image[snode]
2569
        nimg.sinst.append(instance)
2570
        if pnode not in nimg.sbp:
2571
          nimg.sbp[pnode] = []
2572
        nimg.sbp[pnode].append(instance)
2573

    
2574
    # At this point, we have the in-memory data structures complete,
2575
    # except for the runtime information, which we'll gather next
2576

    
2577
    # Due to the way our RPC system works, exact response times cannot be
2578
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2579
    # time before and after executing the request, we can at least have a time
2580
    # window.
2581
    nvinfo_starttime = time.time()
2582
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2583
                                           node_verify_param,
2584
                                           self.cfg.GetClusterName())
2585
    nvinfo_endtime = time.time()
2586

    
2587
    if self.extra_lv_nodes and vg_name is not None:
2588
      extra_lv_nvinfo = \
2589
          self.rpc.call_node_verify(self.extra_lv_nodes,
2590
                                    {constants.NV_LVLIST: vg_name},
2591
                                    self.cfg.GetClusterName())
2592
    else:
2593
      extra_lv_nvinfo = {}
2594

    
2595
    all_drbd_map = self.cfg.ComputeDRBDMap()
2596

    
2597
    feedback_fn("* Gathering disk information (%s nodes)" %
2598
                len(self.my_node_names))
2599
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2600
                                     self.my_inst_info)
2601

    
2602
    feedback_fn("* Verifying configuration file consistency")
2603

    
2604
    # If not all nodes are being checked, we need to make sure the master node
2605
    # and a non-checked vm_capable node are in the list.
2606
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2607
    if absent_nodes:
2608
      vf_nvinfo = all_nvinfo.copy()
2609
      vf_node_info = list(self.my_node_info.values())
2610
      additional_nodes = []
2611
      if master_node not in self.my_node_info:
2612
        additional_nodes.append(master_node)
2613
        vf_node_info.append(self.all_node_info[master_node])
2614
      # Add the first vm_capable node we find which is not included
2615
      for node in absent_nodes:
2616
        nodeinfo = self.all_node_info[node]
2617
        if nodeinfo.vm_capable and not nodeinfo.offline:
2618
          additional_nodes.append(node)
2619
          vf_node_info.append(self.all_node_info[node])
2620
          break
2621
      key = constants.NV_FILELIST
2622
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2623
                                                 {key: node_verify_param[key]},
2624
                                                 self.cfg.GetClusterName()))
2625
    else:
2626
      vf_nvinfo = all_nvinfo
2627
      vf_node_info = self.my_node_info.values()
2628

    
2629
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2630

    
2631
    feedback_fn("* Verifying node status")
2632

    
2633
    refos_img = None
2634

    
2635
    for node_i in node_data_list:
2636
      node = node_i.name
2637
      nimg = node_image[node]
2638

    
2639
      if node_i.offline:
2640
        if verbose:
2641
          feedback_fn("* Skipping offline node %s" % (node,))
2642
        n_offline += 1
2643
        continue
2644

    
2645
      if node == master_node:
2646
        ntype = "master"
2647
      elif node_i.master_candidate:
2648
        ntype = "master candidate"
2649
      elif node_i.drained:
2650
        ntype = "drained"
2651
        n_drained += 1
2652
      else:
2653
        ntype = "regular"
2654
      if verbose:
2655
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2656

    
2657
      msg = all_nvinfo[node].fail_msg
2658
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2659
      if msg:
2660
        nimg.rpc_fail = True
2661
        continue
2662

    
2663
      nresult = all_nvinfo[node].payload
2664

    
2665
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2666
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2667
      self._VerifyNodeNetwork(node_i, nresult)
2668
      self._VerifyOob(node_i, nresult)
2669

    
2670
      if nimg.vm_capable:
2671
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2672
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2673
                             all_drbd_map)
2674

    
2675
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2676
        self._UpdateNodeInstances(node_i, nresult, nimg)
2677
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2678
        self._UpdateNodeOS(node_i, nresult, nimg)
2679

    
2680
        if not nimg.os_fail:
2681
          if refos_img is None:
2682
            refos_img = nimg
2683
          self._VerifyNodeOS(node_i, nimg, refos_img)
2684
        self._VerifyNodeBridges(node_i, nresult, bridges)
2685

    
2686
        # Check whether all running instancies are primary for the node. (This
2687
        # can no longer be done from _VerifyInstance below, since some of the
2688
        # wrong instances could be from other node groups.)
2689
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2690

    
2691
        for inst in non_primary_inst:
2692
          test = inst in self.all_inst_info
2693
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2694
                   "instance should not run on node %s", node_i.name)
2695
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2696
                   "node is running unknown instance %s", inst)
2697

    
2698
    for node, result in extra_lv_nvinfo.items():
2699
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2700
                              node_image[node], vg_name)
2701

    
2702
    feedback_fn("* Verifying instance status")
2703
    for instance in self.my_inst_names:
2704
      if verbose:
2705
        feedback_fn("* Verifying instance %s" % instance)
2706
      inst_config = self.my_inst_info[instance]
2707
      self._VerifyInstance(instance, inst_config, node_image,
2708
                           instdisk[instance])
2709
      inst_nodes_offline = []
2710

    
2711
      pnode = inst_config.primary_node
2712
      pnode_img = node_image[pnode]
2713
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2714
               self.ENODERPC, pnode, "instance %s, connection to"
2715
               " primary node failed", instance)
2716

    
2717
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2718
               self.EINSTANCEBADNODE, instance,
2719
               "instance is marked as running and lives on offline node %s",
2720
               inst_config.primary_node)
2721

    
2722
      # If the instance is non-redundant we cannot survive losing its primary
2723
      # node, so we are not N+1 compliant. On the other hand we have no disk
2724
      # templates with more than one secondary so that situation is not well
2725
      # supported either.
2726
      # FIXME: does not support file-backed instances
2727
      if not inst_config.secondary_nodes:
2728
        i_non_redundant.append(instance)
2729

    
2730
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2731
               instance, "instance has multiple secondary nodes: %s",
2732
               utils.CommaJoin(inst_config.secondary_nodes),
2733
               code=self.ETYPE_WARNING)
2734

    
2735
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2736
        pnode = inst_config.primary_node
2737
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2738
        instance_groups = {}
2739

    
2740
        for node in instance_nodes:
2741
          instance_groups.setdefault(self.all_node_info[node].group,
2742
                                     []).append(node)
2743

    
2744
        pretty_list = [
2745
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2746
          # Sort so that we always list the primary node first.
2747
          for group, nodes in sorted(instance_groups.items(),
2748
                                     key=lambda (_, nodes): pnode in nodes,
2749
                                     reverse=True)]
2750

    
2751
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2752
                      instance, "instance has primary and secondary nodes in"
2753
                      " different groups: %s", utils.CommaJoin(pretty_list),
2754
                      code=self.ETYPE_WARNING)
2755

    
2756
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2757
        i_non_a_balanced.append(instance)
2758

    
2759
      for snode in inst_config.secondary_nodes:
2760
        s_img = node_image[snode]
2761
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2762
                 "instance %s, connection to secondary node failed", instance)
2763

    
2764
        if s_img.offline:
2765
          inst_nodes_offline.append(snode)
2766

    
2767
      # warn that the instance lives on offline nodes
2768
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2769
               "instance has offline secondary node(s) %s",
2770
               utils.CommaJoin(inst_nodes_offline))
2771
      # ... or ghost/non-vm_capable nodes
2772
      for node in inst_config.all_nodes:
2773
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2774
                 "instance lives on ghost node %s", node)
2775
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2776
                 instance, "instance lives on non-vm_capable node %s", node)
2777

    
2778
    feedback_fn("* Verifying orphan volumes")
2779
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2780

    
2781
    # We will get spurious "unknown volume" warnings if any node of this group
2782
    # is secondary for an instance whose primary is in another group. To avoid
2783
    # them, we find these instances and add their volumes to node_vol_should.
2784
    for inst in self.all_inst_info.values():
2785
      for secondary in inst.secondary_nodes:
2786
        if (secondary in self.my_node_info
2787
            and inst.name not in self.my_inst_info):
2788
          inst.MapLVsByNode(node_vol_should)
2789
          break
2790

    
2791
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2792

    
2793
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2794
      feedback_fn("* Verifying N+1 Memory redundancy")
2795
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2796

    
2797
    feedback_fn("* Other Notes")
2798
    if i_non_redundant:
2799
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2800
                  % len(i_non_redundant))
2801

    
2802
    if i_non_a_balanced:
2803
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2804
                  % len(i_non_a_balanced))
2805

    
2806
    if n_offline:
2807
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2808

    
2809
    if n_drained:
2810
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2811

    
2812
    return not self.bad
2813

    
2814
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2815
    """Analyze the post-hooks' result
2816

2817
    This method analyses the hook result, handles it, and sends some
2818
    nicely-formatted feedback back to the user.
2819

2820
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2821
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2822
    @param hooks_results: the results of the multi-node hooks rpc call
2823
    @param feedback_fn: function used send feedback back to the caller
2824
    @param lu_result: previous Exec result
2825
    @return: the new Exec result, based on the previous result
2826
        and hook results
2827

2828
    """
2829
    # We only really run POST phase hooks, only for non-empty groups,
2830
    # and are only interested in their results
2831
    if not self.my_node_names:
2832
      # empty node group
2833
      pass
2834
    elif phase == constants.HOOKS_PHASE_POST:
2835
      # Used to change hooks' output to proper indentation
2836
      feedback_fn("* Hooks Results")
2837
      assert hooks_results, "invalid result from hooks"
2838

    
2839
      for node_name in hooks_results:
2840
        res = hooks_results[node_name]
2841
        msg = res.fail_msg
2842
        test = msg and not res.offline
2843
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2844
                      "Communication failure in hooks execution: %s", msg)
2845
        if res.offline or msg:
2846
          # No need to investigate payload if node is offline or gave an error.
2847
          # override manually lu_result here as _ErrorIf only
2848
          # overrides self.bad
2849
          lu_result = 1
2850
          continue
2851
        for script, hkr, output in res.payload:
2852
          test = hkr == constants.HKR_FAIL
2853
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2854
                        "Script %s failed, output:", script)
2855
          if test:
2856
            output = self._HOOKS_INDENT_RE.sub("      ", output)
2857
            feedback_fn("%s" % output)
2858
            lu_result = 0
2859

    
2860
    return lu_result
2861

    
2862

    
2863
class LUClusterVerifyDisks(NoHooksLU):
2864
  """Verifies the cluster disks status.
2865

2866
  """
2867
  REQ_BGL = False
2868

    
2869
  def ExpandNames(self):
2870
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2871
    self.needed_locks = {
2872
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
2873
      }
2874

    
2875
  def Exec(self, feedback_fn):
2876
    group_names = self.glm.list_owned(locking.LEVEL_NODEGROUP)
2877

    
2878
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
2879
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
2880
                           for group in group_names])
2881

    
2882

    
2883
class LUGroupVerifyDisks(NoHooksLU):
2884
  """Verifies the status of all disks in a node group.
2885

2886
  """
2887
  REQ_BGL = False
2888

    
2889
  def ExpandNames(self):
2890
    # Raises errors.OpPrereqError on its own if group can't be found
2891
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2892

    
2893
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2894
    self.needed_locks = {
2895
      locking.LEVEL_INSTANCE: [],
2896
      locking.LEVEL_NODEGROUP: [],
2897
      locking.LEVEL_NODE: [],
2898
      }
2899

    
2900
  def DeclareLocks(self, level):
2901
    if level == locking.LEVEL_INSTANCE:
2902
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
2903

    
2904
      # Lock instances optimistically, needs verification once node and group
2905
      # locks have been acquired
2906
      self.needed_locks[locking.LEVEL_INSTANCE] = \
2907
        self.cfg.GetNodeGroupInstances(self.group_uuid)
2908

    
2909
    elif level == locking.LEVEL_NODEGROUP:
2910
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
2911

    
2912
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
2913
        set([self.group_uuid] +
2914
            # Lock all groups used by instances optimistically; this requires
2915
            # going via the node before it's locked, requiring verification
2916
            # later on
2917
            [group_uuid
2918
             for instance_name in
2919
               self.glm.list_owned(locking.LEVEL_INSTANCE)
2920
             for group_uuid in
2921
               self.cfg.GetInstanceNodeGroups(instance_name)])
2922

    
2923
    elif level == locking.LEVEL_NODE:
2924
      # This will only lock the nodes in the group to be verified which contain
2925
      # actual instances
2926
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
2927
      self._LockInstancesNodes()
2928

    
2929
      # Lock all nodes in group to be verified
2930
      assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
2931
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
2932
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
2933

    
2934
  def CheckPrereq(self):
2935
    owned_instances = frozenset(self.glm.list_owned(locking.LEVEL_INSTANCE))
2936
    owned_groups = frozenset(self.glm.list_owned(locking.LEVEL_NODEGROUP))
2937
    owned_nodes = frozenset(self.glm.list_owned(locking.LEVEL_NODE))
2938

    
2939
    assert self.group_uuid in owned_groups
2940

    
2941
    # Check if locked instances are still correct
2942
    wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2943
    if owned_instances != wanted_instances:
2944
      raise errors.OpPrereqError("Instances in node group %s changed since"
2945
                                 " locks were acquired, wanted %s, have %s;"
2946
                                 " retry the operation" %
2947
                                 (self.op.group_name,
2948
                                  utils.CommaJoin(wanted_instances),
2949
                                  utils.CommaJoin(owned_instances)),
2950
                                 errors.ECODE_STATE)
2951

    
2952
    # Get instance information
2953
    self.instances = dict((name, self.cfg.GetInstanceInfo(name))
2954
                          for name in owned_instances)
2955

    
2956
    # Check if node groups for locked instances are still correct
2957
    for (instance_name, inst) in self.instances.items():
2958
      assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
2959
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
2960
      assert owned_nodes.issuperset(inst.all_nodes), \
2961
        "Instance %s's nodes changed while we kept the lock" % instance_name
2962

    
2963
      inst_groups = self.cfg.GetInstanceNodeGroups(instance_name)
2964
      if not owned_groups.issuperset(inst_groups):
2965
        raise errors.OpPrereqError("Instance %s's node groups changed since"
2966
                                   " locks were acquired, current groups are"
2967
                                   " are '%s', owning groups '%s'; retry the"
2968
                                   " operation" %
2969
                                   (instance_name,
2970
                                    utils.CommaJoin(inst_groups),
2971
                                    utils.CommaJoin(owned_groups)),
2972
                                   errors.ECODE_STATE)
2973

    
2974
  def Exec(self, feedback_fn):
2975
    """Verify integrity of cluster disks.
2976

2977
    @rtype: tuple of three items
2978
    @return: a tuple of (dict of node-to-node_error, list of instances
2979
        which need activate-disks, dict of instance: (node, volume) for
2980
        missing volumes
2981

2982
    """
2983
    res_nodes = {}
2984
    res_instances = set()
2985
    res_missing = {}
2986

    
2987
    nv_dict = _MapInstanceDisksToNodes([inst
2988
                                        for inst in self.instances.values()
2989
                                        if inst.admin_up])
2990

    
2991
    if nv_dict:
2992
      nodes = utils.NiceSort(set(self.glm.list_owned(locking.LEVEL_NODE)) &
2993
                             set(self.cfg.GetVmCapableNodeList()))
2994

    
2995
      node_lvs = self.rpc.call_lv_list(nodes, [])
2996

    
2997
      for (node, node_res) in node_lvs.items():
2998
        if node_res.offline:
2999
          continue
3000

    
3001
        msg = node_res.fail_msg
3002
        if msg:
3003
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3004
          res_nodes[node] = msg
3005
          continue
3006

    
3007
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3008
          inst = nv_dict.pop((node, lv_name), None)
3009
          if not (lv_online or inst is None):
3010
            res_instances.add(inst)
3011

    
3012
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3013
      # better
3014
      for key, inst in nv_dict.iteritems():
3015
        res_missing.setdefault(inst, []).append(key)
3016

    
3017
    return (res_nodes, list(res_instances), res_missing)
3018

    
3019

    
3020
class LUClusterRepairDiskSizes(NoHooksLU):
3021
  """Verifies the cluster disks sizes.
3022

3023
  """
3024
  REQ_BGL = False
3025

    
3026
  def ExpandNames(self):
3027
    if self.op.instances:
3028
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3029
      self.needed_locks = {
3030
        locking.LEVEL_NODE: [],
3031
        locking.LEVEL_INSTANCE: self.wanted_names,
3032
        }
3033
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3034
    else:
3035
      self.wanted_names = None
3036
      self.needed_locks = {
3037
        locking.LEVEL_NODE: locking.ALL_SET,
3038
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3039
        }
3040
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
3041

    
3042
  def DeclareLocks(self, level):
3043
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3044
      self._LockInstancesNodes(primary_only=True)
3045

    
3046
  def CheckPrereq(self):
3047
    """Check prerequisites.
3048

3049
    This only checks the optional instance list against the existing names.
3050

3051
    """
3052
    if self.wanted_names is None:
3053
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
3054

    
3055
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
3056
                             in self.wanted_names]
3057

    
3058
  def _EnsureChildSizes(self, disk):
3059
    """Ensure children of the disk have the needed disk size.
3060

3061
    This is valid mainly for DRBD8 and fixes an issue where the
3062
    children have smaller disk size.
3063

3064
    @param disk: an L{ganeti.objects.Disk} object
3065

3066
    """
3067
    if disk.dev_type == constants.LD_DRBD8:
3068
      assert disk.children, "Empty children for DRBD8?"
3069
      fchild = disk.children[0]
3070
      mismatch = fchild.size < disk.size
3071
      if mismatch:
3072
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3073
                     fchild.size, disk.size)
3074
        fchild.size = disk.size
3075

    
3076
      # and we recurse on this child only, not on the metadev
3077
      return self._EnsureChildSizes(fchild) or mismatch
3078
    else:
3079
      return False
3080

    
3081
  def Exec(self, feedback_fn):
3082
    """Verify the size of cluster disks.
3083

3084
    """
3085
    # TODO: check child disks too
3086
    # TODO: check differences in size between primary/secondary nodes
3087
    per_node_disks = {}
3088
    for instance in self.wanted_instances:
3089
      pnode = instance.primary_node
3090
      if pnode not in per_node_disks:
3091
        per_node_disks[pnode] = []
3092
      for idx, disk in enumerate(instance.disks):
3093
        per_node_disks[pnode].append((instance, idx, disk))
3094

    
3095
    changed = []
3096
    for node, dskl in per_node_disks.items():
3097
      newl = [v[2].Copy() for v in dskl]
3098
      for dsk in newl:
3099
        self.cfg.SetDiskID(dsk, node)
3100
      result = self.rpc.call_blockdev_getsize(node, newl)
3101
      if result.fail_msg:
3102
        self.LogWarning("Failure in blockdev_getsize call to node"
3103
                        " %s, ignoring", node)
3104
        continue
3105
      if len(result.payload) != len(dskl):
3106
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3107
                        " result.payload=%s", node, len(dskl), result.payload)
3108
        self.LogWarning("Invalid result from node %s, ignoring node results",
3109
                        node)
3110
        continue
3111
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3112
        if size is None:
3113
          self.LogWarning("Disk %d of instance %s did not return size"
3114
                          " information, ignoring", idx, instance.name)
3115
          continue
3116
        if not isinstance(size, (int, long)):
3117
          self.LogWarning("Disk %d of instance %s did not return valid"
3118
                          " size information, ignoring", idx, instance.name)
3119
          continue
3120
        size = size >> 20
3121
        if size != disk.size:
3122
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3123
                       " correcting: recorded %d, actual %d", idx,
3124
                       instance.name, disk.size, size)
3125
          disk.size = size
3126
          self.cfg.Update(instance, feedback_fn)
3127
          changed.append((instance.name, idx, size))
3128
        if self._EnsureChildSizes(disk):
3129
          self.cfg.Update(instance, feedback_fn)
3130
          changed.append((instance.name, idx, disk.size))
3131
    return changed
3132

    
3133

    
3134
class LUClusterRename(LogicalUnit):
3135
  """Rename the cluster.
3136

3137
  """
3138
  HPATH = "cluster-rename"
3139
  HTYPE = constants.HTYPE_CLUSTER
3140

    
3141
  def BuildHooksEnv(self):
3142
    """Build hooks env.
3143

3144
    """
3145
    return {
3146
      "OP_TARGET": self.cfg.GetClusterName(),
3147
      "NEW_NAME": self.op.name,
3148
      }
3149

    
3150
  def BuildHooksNodes(self):
3151
    """Build hooks nodes.
3152

3153
    """
3154
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3155

    
3156
  def CheckPrereq(self):
3157
    """Verify that the passed name is a valid one.
3158

3159
    """
3160
    hostname = netutils.GetHostname(name=self.op.name,
3161
                                    family=self.cfg.GetPrimaryIPFamily())
3162

    
3163
    new_name = hostname.name
3164
    self.ip = new_ip = hostname.ip
3165
    old_name = self.cfg.GetClusterName()
3166
    old_ip = self.cfg.GetMasterIP()
3167
    if new_name == old_name and new_ip == old_ip:
3168
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3169
                                 " cluster has changed",
3170
                                 errors.ECODE_INVAL)
3171
    if new_ip != old_ip:
3172
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3173
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3174
                                   " reachable on the network" %
3175
                                   new_ip, errors.ECODE_NOTUNIQUE)
3176

    
3177
    self.op.name = new_name
3178

    
3179
  def Exec(self, feedback_fn):
3180
    """Rename the cluster.
3181

3182
    """
3183
    clustername = self.op.name
3184
    ip = self.ip
3185

    
3186
    # shutdown the master IP
3187
    master = self.cfg.GetMasterNode()
3188
    result = self.rpc.call_node_stop_master(master, False)
3189
    result.Raise("Could not disable the master role")
3190

    
3191
    try:
3192
      cluster = self.cfg.GetClusterInfo()
3193
      cluster.cluster_name = clustername
3194
      cluster.master_ip = ip
3195
      self.cfg.Update(cluster, feedback_fn)
3196

    
3197
      # update the known hosts file
3198
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3199
      node_list = self.cfg.GetOnlineNodeList()
3200
      try:
3201
        node_list.remove(master)
3202
      except ValueError:
3203
        pass
3204
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3205
    finally:
3206
      result = self.rpc.call_node_start_master(master, False, False)
3207
      msg = result.fail_msg
3208
      if msg:
3209
        self.LogWarning("Could not re-enable the master role on"
3210
                        " the master, please restart manually: %s", msg)
3211

    
3212
    return clustername
3213

    
3214

    
3215
class LUClusterSetParams(LogicalUnit):
3216
  """Change the parameters of the cluster.
3217

3218
  """
3219
  HPATH = "cluster-modify"
3220
  HTYPE = constants.HTYPE_CLUSTER
3221
  REQ_BGL = False
3222

    
3223
  def CheckArguments(self):
3224
    """Check parameters
3225

3226
    """
3227
    if self.op.uid_pool:
3228
      uidpool.CheckUidPool(self.op.uid_pool)
3229

    
3230
    if self.op.add_uids:
3231
      uidpool.CheckUidPool(self.op.add_uids)
3232

    
3233
    if self.op.remove_uids:
3234
      uidpool.CheckUidPool(self.op.remove_uids)
3235

    
3236
  def ExpandNames(self):
3237
    # FIXME: in the future maybe other cluster params won't require checking on
3238
    # all nodes to be modified.
3239
    self.needed_locks = {
3240
      locking.LEVEL_NODE: locking.ALL_SET,
3241
    }
3242
    self.share_locks[locking.LEVEL_NODE] = 1
3243

    
3244
  def BuildHooksEnv(self):
3245
    """Build hooks env.
3246

3247
    """
3248
    return {
3249
      "OP_TARGET": self.cfg.GetClusterName(),
3250
      "NEW_VG_NAME": self.op.vg_name,
3251
      }
3252

    
3253
  def BuildHooksNodes(self):
3254
    """Build hooks nodes.
3255

3256
    """
3257
    mn = self.cfg.GetMasterNode()
3258
    return ([mn], [mn])
3259

    
3260
  def CheckPrereq(self):
3261
    """Check prerequisites.
3262

3263
    This checks whether the given params don't conflict and
3264
    if the given volume group is valid.
3265

3266
    """
3267
    if self.op.vg_name is not None and not self.op.vg_name:
3268
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3269
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3270
                                   " instances exist", errors.ECODE_INVAL)
3271

    
3272
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3273
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3274
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3275
                                   " drbd-based instances exist",
3276
                                   errors.ECODE_INVAL)
3277

    
3278
    node_list = self.glm.list_owned(locking.LEVEL_NODE)
3279

    
3280
    # if vg_name not None, checks given volume group on all nodes
3281
    if self.op.vg_name:
3282
      vglist = self.rpc.call_vg_list(node_list)
3283
      for node in node_list:
3284
        msg = vglist[node].fail_msg
3285
        if msg:
3286
          # ignoring down node
3287
          self.LogWarning("Error while gathering data on node %s"
3288
                          " (ignoring node): %s", node, msg)
3289
          continue
3290
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3291
                                              self.op.vg_name,
3292
                                              constants.MIN_VG_SIZE)
3293
        if vgstatus:
3294
          raise errors.OpPrereqError("Error on node '%s': %s" %
3295
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3296

    
3297
    if self.op.drbd_helper:
3298
      # checks given drbd helper on all nodes
3299
      helpers = self.rpc.call_drbd_helper(node_list)
3300
      for node in node_list:
3301
        ninfo = self.cfg.GetNodeInfo(node)
3302
        if ninfo.offline:
3303
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3304
          continue
3305
        msg = helpers[node].fail_msg
3306
        if msg:
3307
          raise errors.OpPrereqError("Error checking drbd helper on node"
3308
                                     " '%s': %s" % (node, msg),
3309
                                     errors.ECODE_ENVIRON)
3310
        node_helper = helpers[node].payload
3311
        if node_helper != self.op.drbd_helper:
3312
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3313
                                     (node, node_helper), errors.ECODE_ENVIRON)
3314

    
3315
    self.cluster = cluster = self.cfg.GetClusterInfo()
3316
    # validate params changes
3317
    if self.op.beparams:
3318
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3319
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3320

    
3321
    if self.op.ndparams:
3322
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3323
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3324

    
3325
      # TODO: we need a more general way to handle resetting
3326
      # cluster-level parameters to default values
3327
      if self.new_ndparams["oob_program"] == "":
3328
        self.new_ndparams["oob_program"] = \
3329
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3330

    
3331
    if self.op.nicparams:
3332
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3333
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3334
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3335
      nic_errors = []
3336

    
3337
      # check all instances for consistency
3338
      for instance in self.cfg.GetAllInstancesInfo().values():
3339
        for nic_idx, nic in enumerate(instance.nics):
3340
          params_copy = copy.deepcopy(nic.nicparams)
3341
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3342

    
3343
          # check parameter syntax
3344
          try:
3345
            objects.NIC.CheckParameterSyntax(params_filled)
3346
          except errors.ConfigurationError, err:
3347
            nic_errors.append("Instance %s, nic/%d: %s" %
3348
                              (instance.name, nic_idx, err))
3349

    
3350
          # if we're moving instances to routed, check that they have an ip
3351
          target_mode = params_filled[constants.NIC_MODE]
3352
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3353
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3354
                              " address" % (instance.name, nic_idx))
3355
      if nic_errors:
3356
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3357
                                   "\n".join(nic_errors))
3358

    
3359
    # hypervisor list/parameters
3360
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3361
    if self.op.hvparams:
3362
      for hv_name, hv_dict in self.op.hvparams.items():
3363
        if hv_name not in self.new_hvparams:
3364
          self.new_hvparams[hv_name] = hv_dict
3365
        else:
3366
          self.new_hvparams[hv_name].update(hv_dict)
3367

    
3368
    # os hypervisor parameters
3369
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3370
    if self.op.os_hvp:
3371
      for os_name, hvs in self.op.os_hvp.items():
3372
        if os_name not in self.new_os_hvp:
3373
          self.new_os_hvp[os_name] = hvs
3374
        else:
3375
          for hv_name, hv_dict in hvs.items():
3376
            if hv_name not in self.new_os_hvp[os_name]:
3377
              self.new_os_hvp[os_name][hv_name] = hv_dict
3378
            else:
3379
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3380

    
3381
    # os parameters
3382
    self.new_osp = objects.FillDict(cluster.osparams, {})
3383
    if self.op.osparams:
3384
      for os_name, osp in self.op.osparams.items():
3385
        if os_name not in self.new_osp:
3386
          self.new_osp[os_name] = {}
3387

    
3388
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3389
                                                  use_none=True)
3390

    
3391
        if not self.new_osp[os_name]:
3392
          # we removed all parameters
3393
          del self.new_osp[os_name]
3394
        else:
3395
          # check the parameter validity (remote check)
3396
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3397
                         os_name, self.new_osp[os_name])
3398

    
3399
    # changes to the hypervisor list
3400
    if self.op.enabled_hypervisors is not None:
3401
      self.hv_list = self.op.enabled_hypervisors
3402
      for hv in self.hv_list:
3403
        # if the hypervisor doesn't already exist in the cluster
3404
        # hvparams, we initialize it to empty, and then (in both
3405
        # cases) we make sure to fill the defaults, as we might not
3406
        # have a complete defaults list if the hypervisor wasn't
3407
        # enabled before
3408
        if hv not in new_hvp:
3409
          new_hvp[hv] = {}
3410
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3411
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3412
    else:
3413
      self.hv_list = cluster.enabled_hypervisors
3414

    
3415
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3416
      # either the enabled list has changed, or the parameters have, validate
3417
      for hv_name, hv_params in self.new_hvparams.items():
3418
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3419
            (self.op.enabled_hypervisors and
3420
             hv_name in self.op.enabled_hypervisors)):
3421
          # either this is a new hypervisor, or its parameters have changed
3422
          hv_class = hypervisor.GetHypervisor(hv_name)
3423
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3424
          hv_class.CheckParameterSyntax(hv_params)
3425
          _CheckHVParams(self, node_list, hv_name, hv_params)
3426

    
3427
    if self.op.os_hvp:
3428
      # no need to check any newly-enabled hypervisors, since the
3429
      # defaults have already been checked in the above code-block
3430
      for os_name, os_hvp in self.new_os_hvp.items():
3431
        for hv_name, hv_params in os_hvp.items():
3432
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3433
          # we need to fill in the new os_hvp on top of the actual hv_p
3434
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3435
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3436
          hv_class = hypervisor.GetHypervisor(hv_name)
3437
          hv_class.CheckParameterSyntax(new_osp)
3438
          _CheckHVParams(self, node_list, hv_name, new_osp)
3439

    
3440
    if self.op.default_iallocator:
3441
      alloc_script = utils.FindFile(self.op.default_iallocator,
3442
                                    constants.IALLOCATOR_SEARCH_PATH,
3443
                                    os.path.isfile)
3444
      if alloc_script is None:
3445
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3446
                                   " specified" % self.op.default_iallocator,
3447
                                   errors.ECODE_INVAL)
3448

    
3449
  def Exec(self, feedback_fn):
3450
    """Change the parameters of the cluster.
3451

3452
    """
3453
    if self.op.vg_name is not None:
3454
      new_volume = self.op.vg_name
3455
      if not new_volume:
3456
        new_volume = None
3457
      if new_volume != self.cfg.GetVGName():
3458
        self.cfg.SetVGName(new_volume)
3459
      else:
3460
        feedback_fn("Cluster LVM configuration already in desired"
3461
                    " state, not changing")
3462
    if self.op.drbd_helper is not None:
3463
      new_helper = self.op.drbd_helper
3464
      if not new_helper:
3465
        new_helper = None
3466
      if new_helper != self.cfg.GetDRBDHelper():
3467
        self.cfg.SetDRBDHelper(new_helper)
3468
      else:
3469
        feedback_fn("Cluster DRBD helper already in desired state,"
3470
                    " not changing")
3471
    if self.op.hvparams:
3472
      self.cluster.hvparams = self.new_hvparams
3473
    if self.op.os_hvp:
3474
      self.cluster.os_hvp = self.new_os_hvp
3475
    if self.op.enabled_hypervisors is not None:
3476
      self.cluster.hvparams = self.new_hvparams
3477
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3478
    if self.op.beparams:
3479
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3480
    if self.op.nicparams:
3481
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3482
    if self.op.osparams:
3483
      self.cluster.osparams = self.new_osp
3484
    if self.op.ndparams:
3485
      self.cluster.ndparams = self.new_ndparams
3486

    
3487
    if self.op.candidate_pool_size is not None:
3488
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3489
      # we need to update the pool size here, otherwise the save will fail
3490
      _AdjustCandidatePool(self, [])
3491

    
3492
    if self.op.maintain_node_health is not None:
3493
      self.cluster.maintain_node_health = self.op.maintain_node_health
3494

    
3495
    if self.op.prealloc_wipe_disks is not None:
3496
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3497

    
3498
    if self.op.add_uids is not None:
3499
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3500

    
3501
    if self.op.remove_uids is not None:
3502
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3503

    
3504
    if self.op.uid_pool is not None:
3505
      self.cluster.uid_pool = self.op.uid_pool
3506

    
3507
    if self.op.default_iallocator is not None:
3508
      self.cluster.default_iallocator = self.op.default_iallocator
3509

    
3510
    if self.op.reserved_lvs is not None:
3511
      self.cluster.reserved_lvs = self.op.reserved_lvs
3512

    
3513
    def helper_os(aname, mods, desc):
3514
      desc += " OS list"
3515
      lst = getattr(self.cluster, aname)
3516
      for key, val in mods:
3517
        if key == constants.DDM_ADD:
3518
          if val in lst:
3519
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3520
          else:
3521
            lst.append(val)
3522
        elif key == constants.DDM_REMOVE:
3523
          if val in lst:
3524
            lst.remove(val)
3525
          else:
3526
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3527
        else:
3528
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3529

    
3530
    if self.op.hidden_os:
3531
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3532

    
3533
    if self.op.blacklisted_os:
3534
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3535

    
3536
    if self.op.master_netdev:
3537
      master = self.cfg.GetMasterNode()
3538
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3539
                  self.cluster.master_netdev)
3540
      result = self.rpc.call_node_stop_master(master, False)
3541
      result.Raise("Could not disable the master ip")
3542
      feedback_fn("Changing master_netdev from %s to %s" %
3543
                  (self.cluster.master_netdev, self.op.master_netdev))
3544
      self.cluster.master_netdev = self.op.master_netdev
3545

    
3546
    self.cfg.Update(self.cluster, feedback_fn)
3547

    
3548
    if self.op.master_netdev:
3549
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3550
                  self.op.master_netdev)
3551
      result = self.rpc.call_node_start_master(master, False, False)
3552
      if result.fail_msg:
3553
        self.LogWarning("Could not re-enable the master ip on"
3554
                        " the master, please restart manually: %s",
3555
                        result.fail_msg)
3556

    
3557

    
3558
def _UploadHelper(lu, nodes, fname):
3559
  """Helper for uploading a file and showing warnings.
3560

3561
  """
3562
  if os.path.exists(fname):
3563
    result = lu.rpc.call_upload_file(nodes, fname)
3564
    for to_node, to_result in result.items():
3565
      msg = to_result.fail_msg
3566
      if msg:
3567
        msg = ("Copy of file %s to node %s failed: %s" %
3568
               (fname, to_node, msg))
3569
        lu.proc.LogWarning(msg)
3570

    
3571

    
3572
def _ComputeAncillaryFiles(cluster, redist):
3573
  """Compute files external to Ganeti which need to be consistent.
3574

3575
  @type redist: boolean
3576
  @param redist: Whether to include files which need to be redistributed
3577

3578
  """
3579
  # Compute files for all nodes
3580
  files_all = set([
3581
    constants.SSH_KNOWN_HOSTS_FILE,
3582
    constants.CONFD_HMAC_KEY,
3583
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3584
    ])
3585

    
3586
  if not redist:
3587
    files_all.update(constants.ALL_CERT_FILES)
3588
    files_all.update(ssconf.SimpleStore().GetFileList())
3589

    
3590
  if cluster.modify_etc_hosts:
3591
    files_all.add(constants.ETC_HOSTS)
3592

    
3593
  # Files which must either exist on all nodes or on none
3594
  files_all_opt = set([
3595
    constants.RAPI_USERS_FILE,
3596
    ])
3597

    
3598
  # Files which should only be on master candidates
3599
  files_mc = set()
3600
  if not redist:
3601
    files_mc.add(constants.CLUSTER_CONF_FILE)
3602

    
3603
  # Files which should only be on VM-capable nodes
3604
  files_vm = set(filename
3605
    for hv_name in cluster.enabled_hypervisors
3606
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3607

    
3608
  # Filenames must be unique
3609
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3610
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3611
         "Found file listed in more than one file list"
3612

    
3613
  return (files_all, files_all_opt, files_mc, files_vm)
3614

    
3615

    
3616
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3617
  """Distribute additional files which are part of the cluster configuration.
3618

3619
  ConfigWriter takes care of distributing the config and ssconf files, but
3620
  there are more files which should be distributed to all nodes. This function
3621
  makes sure those are copied.
3622

3623
  @param lu: calling logical unit
3624
  @param additional_nodes: list of nodes not in the config to distribute to
3625
  @type additional_vm: boolean
3626
  @param additional_vm: whether the additional nodes are vm-capable or not
3627

3628
  """
3629
  # Gather target nodes
3630
  cluster = lu.cfg.GetClusterInfo()
3631
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3632

    
3633
  online_nodes = lu.cfg.GetOnlineNodeList()
3634
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3635

    
3636
  if additional_nodes is not None:
3637
    online_nodes.extend(additional_nodes)
3638
    if additional_vm:
3639
      vm_nodes.extend(additional_nodes)
3640

    
3641
  # Never distribute to master node
3642
  for nodelist in [online_nodes, vm_nodes]:
3643
    if master_info.name in nodelist:
3644
      nodelist.remove(master_info.name)
3645

    
3646
  # Gather file lists
3647
  (files_all, files_all_opt, files_mc, files_vm) = \
3648
    _ComputeAncillaryFiles(cluster, True)
3649

    
3650
  # Never re-distribute configuration file from here
3651
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3652
              constants.CLUSTER_CONF_FILE in files_vm)
3653
  assert not files_mc, "Master candidates not handled in this function"
3654

    
3655
  filemap = [
3656
    (online_nodes, files_all),
3657
    (online_nodes, files_all_opt),
3658
    (vm_nodes, files_vm),
3659
    ]
3660

    
3661
  # Upload the files
3662
  for (node_list, files) in filemap:
3663
    for fname in files:
3664
      _UploadHelper(lu, node_list, fname)
3665

    
3666

    
3667
class LUClusterRedistConf(NoHooksLU):
3668
  """Force the redistribution of cluster configuration.
3669

3670
  This is a very simple LU.
3671

3672
  """
3673
  REQ_BGL = False
3674

    
3675
  def ExpandNames(self):
3676
    self.needed_locks = {
3677
      locking.LEVEL_NODE: locking.ALL_SET,
3678
    }
3679
    self.share_locks[locking.LEVEL_NODE] = 1
3680

    
3681
  def Exec(self, feedback_fn):
3682
    """Redistribute the configuration.
3683

3684
    """
3685
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3686
    _RedistributeAncillaryFiles(self)
3687

    
3688

    
3689
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3690
  """Sleep and poll for an instance's disk to sync.
3691

3692
  """
3693
  if not instance.disks or disks is not None and not disks:
3694
    return True
3695

    
3696
  disks = _ExpandCheckDisks(instance, disks)
3697

    
3698
  if not oneshot:
3699
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3700

    
3701
  node = instance.primary_node
3702

    
3703
  for dev in disks:
3704
    lu.cfg.SetDiskID(dev, node)
3705

    
3706
  # TODO: Convert to utils.Retry
3707

    
3708
  retries = 0
3709
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3710
  while True:
3711
    max_time = 0
3712
    done = True
3713
    cumul_degraded = False
3714
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3715
    msg = rstats.fail_msg
3716
    if msg:
3717
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3718
      retries += 1
3719
      if retries >= 10:
3720
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3721
                                 " aborting." % node)
3722
      time.sleep(6)
3723
      continue
3724
    rstats = rstats.payload
3725
    retries = 0
3726
    for i, mstat in enumerate(rstats):
3727
      if mstat is None:
3728
        lu.LogWarning("Can't compute data for node %s/%s",
3729
                           node, disks[i].iv_name)
3730
        continue
3731

    
3732
      cumul_degraded = (cumul_degraded or
3733
                        (mstat.is_degraded and mstat.sync_percent is None))
3734
      if mstat.sync_percent is not None:
3735
        done = False
3736
        if mstat.estimated_time is not None:
3737
          rem_time = ("%s remaining (estimated)" %
3738
                      utils.FormatSeconds(mstat.estimated_time))
3739
          max_time = mstat.estimated_time
3740
        else:
3741
          rem_time = "no time estimate"
3742
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3743
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3744

    
3745
    # if we're done but degraded, let's do a few small retries, to
3746
    # make sure we see a stable and not transient situation; therefore
3747
    # we force restart of the loop
3748
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3749
      logging.info("Degraded disks found, %d retries left", degr_retries)
3750
      degr_retries -= 1
3751
      time.sleep(1)
3752
      continue
3753

    
3754
    if done or oneshot:
3755
      break
3756

    
3757
    time.sleep(min(60, max_time))
3758

    
3759
  if done:
3760
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3761
  return not cumul_degraded
3762

    
3763

    
3764
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3765
  """Check that mirrors are not degraded.
3766

3767
  The ldisk parameter, if True, will change the test from the
3768
  is_degraded attribute (which represents overall non-ok status for
3769
  the device(s)) to the ldisk (representing the local storage status).
3770

3771
  """
3772
  lu.cfg.SetDiskID(dev, node)
3773

    
3774
  result = True
3775

    
3776
  if on_primary or dev.AssembleOnSecondary():
3777
    rstats = lu.rpc.call_blockdev_find(node, dev)
3778
    msg = rstats.fail_msg
3779
    if msg:
3780
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3781
      result = False
3782
    elif not rstats.payload:
3783
      lu.LogWarning("Can't find disk on node %s", node)
3784
      result = False
3785
    else:
3786
      if ldisk:
3787
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3788
      else:
3789
        result = result and not rstats.payload.is_degraded
3790

    
3791
  if dev.children:
3792
    for child in dev.children:
3793
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3794

    
3795
  return result
3796

    
3797

    
3798
class LUOobCommand(NoHooksLU):
3799
  """Logical unit for OOB handling.
3800

3801
  """
3802
  REG_BGL = False
3803
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3804

    
3805
  def ExpandNames(self):
3806
    """Gather locks we need.
3807

3808
    """
3809
    if self.op.node_names:
3810
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3811
      lock_names = self.op.node_names
3812
    else:
3813
      lock_names = locking.ALL_SET
3814

    
3815
    self.needed_locks = {
3816
      locking.LEVEL_NODE: lock_names,
3817
      }
3818

    
3819
  def CheckPrereq(self):
3820
    """Check prerequisites.
3821

3822
    This checks:
3823
     - the node exists in the configuration
3824
     - OOB is supported
3825

3826
    Any errors are signaled by raising errors.OpPrereqError.
3827

3828
    """
3829
    self.nodes = []
3830
    self.master_node = self.cfg.GetMasterNode()
3831

    
3832
    assert self.op.power_delay >= 0.0
3833

    
3834
    if self.op.node_names:
3835
      if (self.op.command in self._SKIP_MASTER and
3836
          self.master_node in self.op.node_names):
3837
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3838
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3839

    
3840
        if master_oob_handler:
3841
          additional_text = ("run '%s %s %s' if you want to operate on the"
3842
                             " master regardless") % (master_oob_handler,
3843
                                                      self.op.command,
3844
                                                      self.master_node)
3845
        else:
3846
          additional_text = "it does not support out-of-band operations"
3847

    
3848
        raise errors.OpPrereqError(("Operating on the master node %s is not"
3849
                                    " allowed for %s; %s") %
3850
                                   (self.master_node, self.op.command,
3851
                                    additional_text), errors.ECODE_INVAL)
3852
    else:
3853
      self.op.node_names = self.cfg.GetNodeList()
3854
      if self.op.command in self._SKIP_MASTER:
3855
        self.op.node_names.remove(self.master_node)
3856

    
3857
    if self.op.command in self._SKIP_MASTER:
3858
      assert self.master_node not in self.op.node_names
3859

    
3860
    for node_name in self.op.node_names:
3861
      node = self.cfg.GetNodeInfo(node_name)
3862

    
3863
      if node is None:
3864
        raise errors.OpPrereqError("Node %s not found" % node_name,
3865
                                   errors.ECODE_NOENT)
3866
      else:
3867
        self.nodes.append(node)
3868

    
3869
      if (not self.op.ignore_status and
3870
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3871
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3872
                                    " not marked offline") % node_name,
3873
                                   errors.ECODE_STATE)
3874

    
3875
  def Exec(self, feedback_fn):
3876
    """Execute OOB and return result if we expect any.
3877

3878
    """
3879
    master_node = self.master_node
3880
    ret = []
3881

    
3882
    for idx, node in enumerate(utils.NiceSort(self.nodes,
3883
                                              key=lambda node: node.name)):
3884
      node_entry = [(constants.RS_NORMAL, node.name)]
3885
      ret.append(node_entry)
3886

    
3887
      oob_program = _SupportsOob(self.cfg, node)
3888

    
3889
      if not oob_program:
3890
        node_entry.append((constants.RS_UNAVAIL, None))
3891
        continue
3892

    
3893
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3894
                   self.op.command, oob_program, node.name)
3895
      result = self.rpc.call_run_oob(master_node, oob_program,
3896
                                     self.op.command, node.name,
3897
                                     self.op.timeout)
3898

    
3899
      if result.fail_msg:
3900
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3901
                        node.name, result.fail_msg)
3902
        node_entry.append((constants.RS_NODATA, None))
3903
      else:
3904
        try:
3905
          self._CheckPayload(result)
3906
        except errors.OpExecError, err:
3907
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
3908
                          node.name, err)
3909
          node_entry.append((constants.RS_NODATA, None))
3910
        else:
3911
          if self.op.command == constants.OOB_HEALTH:
3912
            # For health we should log important events
3913
            for item, status in result.payload:
3914
              if status in [constants.OOB_STATUS_WARNING,
3915
                            constants.OOB_STATUS_CRITICAL]:
3916
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
3917
                                item, node.name, status)
3918

    
3919
          if self.op.command == constants.OOB_POWER_ON:
3920
            node.powered = True
3921
          elif self.op.command == constants.OOB_POWER_OFF:
3922
            node.powered = False
3923
          elif self.op.command == constants.OOB_POWER_STATUS:
3924
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3925
            if powered != node.powered:
3926
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3927
                               " match actual power state (%s)"), node.powered,
3928
                              node.name, powered)
3929

    
3930
          # For configuration changing commands we should update the node
3931
          if self.op.command in (constants.OOB_POWER_ON,
3932
                                 constants.OOB_POWER_OFF):
3933
            self.cfg.Update(node, feedback_fn)
3934

    
3935
          node_entry.append((constants.RS_NORMAL, result.payload))
3936

    
3937
          if (self.op.command == constants.OOB_POWER_ON and
3938
              idx < len(self.nodes) - 1):
3939
            time.sleep(self.op.power_delay)
3940

    
3941
    return ret
3942

    
3943
  def _CheckPayload(self, result):
3944
    """Checks if the payload is valid.
3945

3946
    @param result: RPC result
3947
    @raises errors.OpExecError: If payload is not valid
3948

3949
    """
3950
    errs = []
3951
    if self.op.command == constants.OOB_HEALTH:
3952
      if not isinstance(result.payload, list):
3953
        errs.append("command 'health' is expected to return a list but got %s" %
3954
                    type(result.payload))
3955
      else:
3956
        for item, status in result.payload:
3957
          if status not in constants.OOB_STATUSES:
3958
            errs.append("health item '%s' has invalid status '%s'" %
3959
                        (item, status))
3960

    
3961
    if self.op.command == constants.OOB_POWER_STATUS:
3962
      if not isinstance(result.payload, dict):
3963
        errs.append("power-status is expected to return a dict but got %s" %
3964
                    type(result.payload))
3965

    
3966
    if self.op.command in [
3967
        constants.OOB_POWER_ON,
3968
        constants.OOB_POWER_OFF,
3969
        constants.OOB_POWER_CYCLE,
3970
        ]:
3971
      if result.payload is not None:
3972
        errs.append("%s is expected to not return payload but got '%s'" %
3973
                    (self.op.command, result.payload))
3974

    
3975
    if errs:
3976
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3977
                               utils.CommaJoin(errs))
3978

    
3979
class _OsQuery(_QueryBase):
3980
  FIELDS = query.OS_FIELDS
3981

    
3982
  def ExpandNames(self, lu):
3983
    # Lock all nodes in shared mode
3984
    # Temporary removal of locks, should be reverted later
3985
    # TODO: reintroduce locks when they are lighter-weight
3986
    lu.needed_locks = {}
3987
    #self.share_locks[locking.LEVEL_NODE] = 1
3988
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3989

    
3990
    # The following variables interact with _QueryBase._GetNames
3991
    if self.names:
3992
      self.wanted = self.names
3993
    else:
3994
      self.wanted = locking.ALL_SET
3995

    
3996
    self.do_locking = self.use_locking
3997

    
3998
  def DeclareLocks(self, lu, level):
3999
    pass
4000

    
4001
  @staticmethod
4002
  def _DiagnoseByOS(rlist):
4003
    """Remaps a per-node return list into an a per-os per-node dictionary
4004

4005
    @param rlist: a map with node names as keys and OS objects as values
4006

4007
    @rtype: dict
4008
    @return: a dictionary with osnames as keys and as value another
4009
        map, with nodes as keys and tuples of (path, status, diagnose,
4010
        variants, parameters, api_versions) as values, eg::
4011

4012
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4013
                                     (/srv/..., False, "invalid api")],
4014
                           "node2": [(/srv/..., True, "", [], [])]}
4015
          }
4016

4017
    """
4018
    all_os = {}
4019
    # we build here the list of nodes that didn't fail the RPC (at RPC
4020
    # level), so that nodes with a non-responding node daemon don't
4021
    # make all OSes invalid
4022
    good_nodes = [node_name for node_name in rlist
4023
                  if not rlist[node_name].fail_msg]
4024
    for node_name, nr in rlist.items():
4025
      if nr.fail_msg or not nr.payload:
4026
        continue
4027
      for (name, path, status, diagnose, variants,
4028
           params, api_versions) in nr.payload:
4029
        if name not in all_os:
4030
          # build a list of nodes for this os containing empty lists
4031
          # for each node in node_list
4032
          all_os[name] = {}
4033
          for nname in good_nodes:
4034
            all_os[name][nname] = []
4035
        # convert params from [name, help] to (name, help)
4036
        params = [tuple(v) for v in params]
4037
        all_os[name][node_name].append((path, status, diagnose,
4038
                                        variants, params, api_versions))
4039
    return all_os
4040

    
4041
  def _GetQueryData(self, lu):
4042
    """Computes the list of nodes and their attributes.
4043

4044
    """
4045
    # Locking is not used
4046
    assert not (compat.any(lu.glm.is_owned(level)
4047
                           for level in locking.LEVELS
4048
                           if level != locking.LEVEL_CLUSTER) or
4049
                self.do_locking or self.use_locking)
4050

    
4051
    valid_nodes = [node.name
4052
                   for node in lu.cfg.GetAllNodesInfo().values()
4053
                   if not node.offline and node.vm_capable]
4054
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4055
    cluster = lu.cfg.GetClusterInfo()
4056

    
4057
    data = {}
4058

    
4059
    for (os_name, os_data) in pol.items():
4060
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4061
                          hidden=(os_name in cluster.hidden_os),
4062
                          blacklisted=(os_name in cluster.blacklisted_os))
4063

    
4064
      variants = set()
4065
      parameters = set()
4066
      api_versions = set()
4067

    
4068
      for idx, osl in enumerate(os_data.values()):
4069
        info.valid = bool(info.valid and osl and osl[0][1])
4070
        if not info.valid:
4071
          break
4072

    
4073
        (node_variants, node_params, node_api) = osl[0][3:6]
4074
        if idx == 0:
4075
          # First entry
4076
          variants.update(node_variants)
4077
          parameters.update(node_params)
4078
          api_versions.update(node_api)
4079
        else:
4080
          # Filter out inconsistent values
4081
          variants.intersection_update(node_variants)
4082
          parameters.intersection_update(node_params)
4083
          api_versions.intersection_update(node_api)
4084

    
4085
      info.variants = list(variants)
4086
      info.parameters = list(parameters)
4087
      info.api_versions = list(api_versions)
4088

    
4089
      data[os_name] = info
4090

    
4091
    # Prepare data in requested order
4092
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4093
            if name in data]
4094

    
4095

    
4096
class LUOsDiagnose(NoHooksLU):
4097
  """Logical unit for OS diagnose/query.
4098

4099
  """
4100
  REQ_BGL = False
4101

    
4102
  @staticmethod
4103
  def _BuildFilter(fields, names):
4104
    """Builds a filter for querying OSes.
4105

4106
    """
4107
    name_filter = qlang.MakeSimpleFilter("name", names)
4108

    
4109
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4110
    # respective field is not requested
4111
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4112
                     for fname in ["hidden", "blacklisted"]
4113
                     if fname not in fields]
4114
    if "valid" not in fields:
4115
      status_filter.append([qlang.OP_TRUE, "valid"])
4116

    
4117
    if status_filter:
4118
      status_filter.insert(0, qlang.OP_AND)
4119
    else:
4120
      status_filter = None
4121

    
4122
    if name_filter and status_filter:
4123
      return [qlang.OP_AND, name_filter, status_filter]
4124
    elif name_filter:
4125
      return name_filter
4126
    else:
4127
      return status_filter
4128

    
4129
  def CheckArguments(self):
4130
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4131
                       self.op.output_fields, False)
4132

    
4133
  def ExpandNames(self):
4134
    self.oq.ExpandNames(self)
4135

    
4136
  def Exec(self, feedback_fn):
4137
    return self.oq.OldStyleQuery(self)
4138

    
4139

    
4140
class LUNodeRemove(LogicalUnit):
4141
  """Logical unit for removing a node.
4142

4143
  """
4144
  HPATH = "node-remove"
4145
  HTYPE = constants.HTYPE_NODE
4146

    
4147
  def BuildHooksEnv(self):
4148
    """Build hooks env.
4149

4150
    This doesn't run on the target node in the pre phase as a failed
4151
    node would then be impossible to remove.
4152

4153
    """
4154
    return {
4155
      "OP_TARGET": self.op.node_name,
4156
      "NODE_NAME": self.op.node_name,
4157
      }
4158

    
4159
  def BuildHooksNodes(self):
4160
    """Build hooks nodes.
4161

4162
    """
4163
    all_nodes = self.cfg.GetNodeList()
4164
    try:
4165
      all_nodes.remove(self.op.node_name)
4166
    except ValueError:
4167
      logging.warning("Node '%s', which is about to be removed, was not found"
4168
                      " in the list of all nodes", self.op.node_name)
4169
    return (all_nodes, all_nodes)
4170

    
4171
  def CheckPrereq(self):
4172
    """Check prerequisites.
4173

4174
    This checks:
4175
     - the node exists in the configuration
4176
     - it does not have primary or secondary instances
4177
     - it's not the master
4178

4179
    Any errors are signaled by raising errors.OpPrereqError.
4180

4181
    """
4182
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4183
    node = self.cfg.GetNodeInfo(self.op.node_name)
4184
    assert node is not None
4185

    
4186
    instance_list = self.cfg.GetInstanceList()
4187

    
4188
    masternode = self.cfg.GetMasterNode()
4189
    if node.name == masternode:
4190
      raise errors.OpPrereqError("Node is the master node, failover to another"
4191
                                 " node is required", errors.ECODE_INVAL)
4192

    
4193
    for instance_name in instance_list:
4194
      instance = self.cfg.GetInstanceInfo(instance_name)
4195
      if node.name in instance.all_nodes:
4196
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4197
                                   " please remove first" % instance_name,
4198
                                   errors.ECODE_INVAL)
4199
    self.op.node_name = node.name
4200
    self.node = node
4201

    
4202
  def Exec(self, feedback_fn):
4203
    """Removes the node from the cluster.
4204

4205
    """
4206
    node = self.node
4207
    logging.info("Stopping the node daemon and removing configs from node %s",
4208
                 node.name)
4209

    
4210
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4211

    
4212
    # Promote nodes to master candidate as needed
4213
    _AdjustCandidatePool(self, exceptions=[node.name])
4214
    self.context.RemoveNode(node.name)
4215

    
4216
    # Run post hooks on the node before it's removed
4217
    _RunPostHook(self, node.name)
4218

    
4219
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4220
    msg = result.fail_msg
4221
    if msg:
4222
      self.LogWarning("Errors encountered on the remote node while leaving"
4223
                      " the cluster: %s", msg)
4224

    
4225
    # Remove node from our /etc/hosts
4226
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4227
      master_node = self.cfg.GetMasterNode()
4228
      result = self.rpc.call_etc_hosts_modify(master_node,
4229
                                              constants.ETC_HOSTS_REMOVE,
4230
                                              node.name, None)
4231
      result.Raise("Can't update hosts file with new host data")
4232
      _RedistributeAncillaryFiles(self)
4233

    
4234

    
4235
class _NodeQuery(_QueryBase):
4236
  FIELDS = query.NODE_FIELDS
4237

    
4238
  def ExpandNames(self, lu):
4239
    lu.needed_locks = {}
4240
    lu.share_locks[locking.LEVEL_NODE] = 1
4241

    
4242
    if self.names:
4243
      self.wanted = _GetWantedNodes(lu, self.names)
4244
    else:
4245
      self.wanted = locking.ALL_SET
4246

    
4247
    self.do_locking = (self.use_locking and
4248
                       query.NQ_LIVE in self.requested_data)
4249

    
4250
    if self.do_locking:
4251
      # if we don't request only static fields, we need to lock the nodes
4252
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4253

    
4254
  def DeclareLocks(self, lu, level):
4255
    pass
4256

    
4257
  def _GetQueryData(self, lu):
4258
    """Computes the list of nodes and their attributes.
4259

4260
    """
4261
    all_info = lu.cfg.GetAllNodesInfo()
4262

    
4263
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4264

    
4265
    # Gather data as requested
4266
    if query.NQ_LIVE in self.requested_data:
4267
      # filter out non-vm_capable nodes
4268
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4269

    
4270
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4271
                                        lu.cfg.GetHypervisorType())
4272
      live_data = dict((name, nresult.payload)
4273
                       for (name, nresult) in node_data.items()
4274
                       if not nresult.fail_msg and nresult.payload)
4275
    else:
4276
      live_data = None
4277

    
4278
    if query.NQ_INST in self.requested_data:
4279
      node_to_primary = dict([(name, set()) for name in nodenames])
4280
      node_to_secondary = dict([(name, set()) for name in nodenames])
4281

    
4282
      inst_data = lu.cfg.GetAllInstancesInfo()
4283

    
4284
      for inst in inst_data.values():
4285
        if inst.primary_node in node_to_primary:
4286
          node_to_primary[inst.primary_node].add(inst.name)
4287
        for secnode in inst.secondary_nodes:
4288
          if secnode in node_to_secondary:
4289
            node_to_secondary[secnode].add(inst.name)
4290
    else:
4291
      node_to_primary = None
4292
      node_to_secondary = None
4293

    
4294
    if query.NQ_OOB in self.requested_data:
4295
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4296
                         for name, node in all_info.iteritems())
4297
    else:
4298
      oob_support = None
4299

    
4300
    if query.NQ_GROUP in self.requested_data:
4301
      groups = lu.cfg.GetAllNodeGroupsInfo()
4302
    else:
4303
      groups = {}
4304

    
4305
    return query.NodeQueryData([all_info[name] for name in nodenames],
4306
                               live_data, lu.cfg.GetMasterNode(),
4307
                               node_to_primary, node_to_secondary, groups,
4308
                               oob_support, lu.cfg.GetClusterInfo())
4309

    
4310

    
4311
class LUNodeQuery(NoHooksLU):
4312
  """Logical unit for querying nodes.
4313

4314
  """
4315
  # pylint: disable-msg=W0142
4316
  REQ_BGL = False
4317

    
4318
  def CheckArguments(self):
4319
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4320
                         self.op.output_fields, self.op.use_locking)
4321

    
4322
  def ExpandNames(self):
4323
    self.nq.ExpandNames(self)
4324

    
4325
  def Exec(self, feedback_fn):
4326
    return self.nq.OldStyleQuery(self)
4327

    
4328

    
4329
class LUNodeQueryvols(NoHooksLU):
4330
  """Logical unit for getting volumes on node(s).
4331

4332
  """
4333
  REQ_BGL = False
4334
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4335
  _FIELDS_STATIC = utils.FieldSet("node")
4336

    
4337
  def CheckArguments(self):
4338
    _CheckOutputFields(static=self._FIELDS_STATIC,
4339
                       dynamic=self._FIELDS_DYNAMIC,
4340
                       selected=self.op.output_fields)
4341

    
4342
  def ExpandNames(self):
4343
    self.needed_locks = {}
4344
    self.share_locks[locking.LEVEL_NODE] = 1
4345
    if not self.op.nodes:
4346
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4347
    else:
4348
      self.needed_locks[locking.LEVEL_NODE] = \
4349
        _GetWantedNodes(self, self.op.nodes)
4350

    
4351
  def Exec(self, feedback_fn):
4352
    """Computes the list of nodes and their attributes.
4353

4354
    """
4355
    nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4356
    volumes = self.rpc.call_node_volumes(nodenames)
4357

    
4358
    ilist = self.cfg.GetAllInstancesInfo()
4359
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4360

    
4361
    output = []
4362
    for node in nodenames:
4363
      nresult = volumes[node]
4364
      if nresult.offline:
4365
        continue
4366
      msg = nresult.fail_msg
4367
      if msg:
4368
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4369
        continue
4370

    
4371
      node_vols = sorted(nresult.payload,
4372
                         key=operator.itemgetter("dev"))
4373

    
4374
      for vol in node_vols:
4375
        node_output = []
4376
        for field in self.op.output_fields:
4377
          if field == "node":
4378
            val = node
4379
          elif field == "phys":
4380
            val = vol["dev"]
4381
          elif field == "vg":
4382
            val = vol["vg"]
4383
          elif field == "name":
4384
            val = vol["name"]
4385
          elif field == "size":
4386
            val = int(float(vol["size"]))
4387
          elif field == "instance":
4388
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4389
          else:
4390
            raise errors.ParameterError(field)
4391
          node_output.append(str(val))
4392

    
4393
        output.append(node_output)
4394

    
4395
    return output
4396

    
4397

    
4398
class LUNodeQueryStorage(NoHooksLU):
4399
  """Logical unit for getting information on storage units on node(s).
4400

4401
  """
4402
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4403
  REQ_BGL = False
4404

    
4405
  def CheckArguments(self):
4406
    _CheckOutputFields(static=self._FIELDS_STATIC,
4407
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4408
                       selected=self.op.output_fields)
4409

    
4410
  def ExpandNames(self):
4411
    self.needed_locks = {}
4412
    self.share_locks[locking.LEVEL_NODE] = 1
4413

    
4414
    if self.op.nodes:
4415
      self.needed_locks[locking.LEVEL_NODE] = \
4416
        _GetWantedNodes(self, self.op.nodes)
4417
    else:
4418
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4419

    
4420
  def Exec(self, feedback_fn):
4421
    """Computes the list of nodes and their attributes.
4422

4423
    """
4424
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4425

    
4426
    # Always get name to sort by
4427
    if constants.SF_NAME in self.op.output_fields:
4428
      fields = self.op.output_fields[:]
4429
    else:
4430
      fields = [constants.SF_NAME] + self.op.output_fields
4431

    
4432
    # Never ask for node or type as it's only known to the LU
4433
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4434
      while extra in fields:
4435
        fields.remove(extra)
4436

    
4437
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4438
    name_idx = field_idx[constants.SF_NAME]
4439

    
4440
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4441
    data = self.rpc.call_storage_list(self.nodes,
4442
                                      self.op.storage_type, st_args,
4443
                                      self.op.name, fields)
4444

    
4445
    result = []
4446

    
4447
    for node in utils.NiceSort(self.nodes):
4448
      nresult = data[node]
4449
      if nresult.offline:
4450
        continue
4451

    
4452
      msg = nresult.fail_msg
4453
      if msg:
4454
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4455
        continue
4456

    
4457
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4458

    
4459
      for name in utils.NiceSort(rows.keys()):
4460
        row = rows[name]
4461

    
4462
        out = []
4463

    
4464
        for field in self.op.output_fields:
4465
          if field == constants.SF_NODE:
4466
            val = node
4467
          elif field == constants.SF_TYPE:
4468
            val = self.op.storage_type
4469
          elif field in field_idx:
4470
            val = row[field_idx[field]]
4471
          else:
4472
            raise errors.ParameterError(field)
4473

    
4474
          out.append(val)
4475

    
4476
        result.append(out)
4477

    
4478
    return result
4479

    
4480

    
4481
class _InstanceQuery(_QueryBase):
4482
  FIELDS = query.INSTANCE_FIELDS
4483

    
4484
  def ExpandNames(self, lu):
4485
    lu.needed_locks = {}
4486
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
4487
    lu.share_locks[locking.LEVEL_NODE] = 1
4488

    
4489
    if self.names:
4490
      self.wanted = _GetWantedInstances(lu, self.names)
4491
    else:
4492
      self.wanted = locking.ALL_SET
4493

    
4494
    self.do_locking = (self.use_locking and
4495
                       query.IQ_LIVE in self.requested_data)
4496
    if self.do_locking:
4497
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4498
      lu.needed_locks[locking.LEVEL_NODE] = []
4499
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4500

    
4501
  def DeclareLocks(self, lu, level):
4502
    if level == locking.LEVEL_NODE and self.do_locking:
4503
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
4504

    
4505
  def _GetQueryData(self, lu):
4506
    """Computes the list of instances and their attributes.
4507

4508
    """
4509
    cluster = lu.cfg.GetClusterInfo()
4510
    all_info = lu.cfg.GetAllInstancesInfo()
4511

    
4512
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4513

    
4514
    instance_list = [all_info[name] for name in instance_names]
4515
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4516
                                        for inst in instance_list)))
4517
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4518
    bad_nodes = []
4519
    offline_nodes = []
4520
    wrongnode_inst = set()
4521

    
4522
    # Gather data as requested
4523
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4524
      live_data = {}
4525
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4526
      for name in nodes:
4527
        result = node_data[name]
4528
        if result.offline:
4529
          # offline nodes will be in both lists
4530
          assert result.fail_msg
4531
          offline_nodes.append(name)
4532
        if result.fail_msg:
4533
          bad_nodes.append(name)
4534
        elif result.payload:
4535
          for inst in result.payload:
4536
            if inst in all_info:
4537
              if all_info[inst].primary_node == name:
4538
                live_data.update(result.payload)
4539
              else:
4540
                wrongnode_inst.add(inst)
4541
            else:
4542
              # orphan instance; we don't list it here as we don't
4543
              # handle this case yet in the output of instance listing
4544
              logging.warning("Orphan instance '%s' found on node %s",
4545
                              inst, name)
4546
        # else no instance is alive
4547
    else:
4548
      live_data = {}
4549

    
4550
    if query.IQ_DISKUSAGE in self.requested_data:
4551
      disk_usage = dict((inst.name,
4552
                         _ComputeDiskSize(inst.disk_template,
4553
                                          [{constants.IDISK_SIZE: disk.size}
4554
                                           for disk in inst.disks]))
4555
                        for inst in instance_list)
4556
    else:
4557
      disk_usage = None
4558

    
4559
    if query.IQ_CONSOLE in self.requested_data:
4560
      consinfo = {}
4561
      for inst in instance_list:
4562
        if inst.name in live_data:
4563
          # Instance is running
4564
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4565
        else:
4566
          consinfo[inst.name] = None
4567
      assert set(consinfo.keys()) == set(instance_names)
4568
    else:
4569
      consinfo = None
4570

    
4571
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4572
                                   disk_usage, offline_nodes, bad_nodes,
4573
                                   live_data, wrongnode_inst, consinfo)
4574

    
4575

    
4576
class LUQuery(NoHooksLU):
4577
  """Query for resources/items of a certain kind.
4578

4579
  """
4580
  # pylint: disable-msg=W0142
4581
  REQ_BGL = False
4582

    
4583
  def CheckArguments(self):
4584
    qcls = _GetQueryImplementation(self.op.what)
4585

    
4586
    self.impl = qcls(self.op.filter, self.op.fields, False)
4587

    
4588
  def ExpandNames(self):
4589
    self.impl.ExpandNames(self)
4590

    
4591
  def DeclareLocks(self, level):
4592
    self.impl.DeclareLocks(self, level)
4593

    
4594
  def Exec(self, feedback_fn):
4595
    return self.impl.NewStyleQuery(self)
4596

    
4597

    
4598
class LUQueryFields(NoHooksLU):
4599
  """Query for resources/items of a certain kind.
4600

4601
  """
4602
  # pylint: disable-msg=W0142
4603
  REQ_BGL = False
4604

    
4605
  def CheckArguments(self):
4606
    self.qcls = _GetQueryImplementation(self.op.what)
4607

    
4608
  def ExpandNames(self):
4609
    self.needed_locks = {}
4610

    
4611
  def Exec(self, feedback_fn):
4612
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4613

    
4614

    
4615
class LUNodeModifyStorage(NoHooksLU):
4616
  """Logical unit for modifying a storage volume on a node.
4617

4618
  """
4619
  REQ_BGL = False
4620

    
4621
  def CheckArguments(self):
4622
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4623

    
4624
    storage_type = self.op.storage_type
4625

    
4626
    try:
4627
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4628
    except KeyError:
4629
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4630
                                 " modified" % storage_type,
4631
                                 errors.ECODE_INVAL)
4632

    
4633
    diff = set(self.op.changes.keys()) - modifiable
4634
    if diff:
4635
      raise errors.OpPrereqError("The following fields can not be modified for"
4636
                                 " storage units of type '%s': %r" %
4637
                                 (storage_type, list(diff)),
4638
                                 errors.ECODE_INVAL)
4639

    
4640
  def ExpandNames(self):
4641
    self.needed_locks = {
4642
      locking.LEVEL_NODE: self.op.node_name,
4643
      }
4644

    
4645
  def Exec(self, feedback_fn):
4646
    """Computes the list of nodes and their attributes.
4647

4648
    """
4649
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4650
    result = self.rpc.call_storage_modify(self.op.node_name,
4651
                                          self.op.storage_type, st_args,
4652
                                          self.op.name, self.op.changes)
4653
    result.Raise("Failed to modify storage unit '%s' on %s" %
4654
                 (self.op.name, self.op.node_name))
4655

    
4656

    
4657
class LUNodeAdd(LogicalUnit):
4658
  """Logical unit for adding node to the cluster.
4659

4660
  """
4661
  HPATH = "node-add"
4662
  HTYPE = constants.HTYPE_NODE
4663
  _NFLAGS = ["master_capable", "vm_capable"]
4664

    
4665
  def CheckArguments(self):
4666
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4667
    # validate/normalize the node name
4668
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4669
                                         family=self.primary_ip_family)
4670
    self.op.node_name = self.hostname.name
4671

    
4672
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4673
      raise errors.OpPrereqError("Cannot readd the master node",
4674
                                 errors.ECODE_STATE)
4675

    
4676
    if self.op.readd and self.op.group:
4677
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4678
                                 " being readded", errors.ECODE_INVAL)
4679

    
4680
  def BuildHooksEnv(self):
4681
    """Build hooks env.
4682

4683
    This will run on all nodes before, and on all nodes + the new node after.
4684

4685
    """
4686
    return {
4687
      "OP_TARGET": self.op.node_name,
4688
      "NODE_NAME": self.op.node_name,
4689
      "NODE_PIP": self.op.primary_ip,
4690
      "NODE_SIP": self.op.secondary_ip,
4691
      "MASTER_CAPABLE": str(self.op.master_capable),
4692
      "VM_CAPABLE": str(self.op.vm_capable),
4693
      }
4694

    
4695
  def BuildHooksNodes(self):
4696
    """Build hooks nodes.
4697

4698
    """
4699
    # Exclude added node
4700
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4701
    post_nodes = pre_nodes + [self.op.node_name, ]
4702

    
4703
    return (pre_nodes, post_nodes)
4704

    
4705
  def CheckPrereq(self):
4706
    """Check prerequisites.
4707

4708
    This checks:
4709
     - the new node is not already in the config
4710
     - it is resolvable
4711
     - its parameters (single/dual homed) matches the cluster
4712

4713
    Any errors are signaled by raising errors.OpPrereqError.
4714

4715
    """
4716
    cfg = self.cfg
4717
    hostname = self.hostname
4718
    node = hostname.name
4719
    primary_ip = self.op.primary_ip = hostname.ip
4720
    if self.op.secondary_ip is None:
4721
      if self.primary_ip_family == netutils.IP6Address.family:
4722
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4723
                                   " IPv4 address must be given as secondary",
4724
                                   errors.ECODE_INVAL)
4725
      self.op.secondary_ip = primary_ip
4726

    
4727
    secondary_ip = self.op.secondary_ip
4728
    if not netutils.IP4Address.IsValid(secondary_ip):
4729
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4730
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4731

    
4732
    node_list = cfg.GetNodeList()
4733
    if not self.op.readd and node in node_list:
4734
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4735
                                 node, errors.ECODE_EXISTS)
4736
    elif self.op.readd and node not in node_list:
4737
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4738
                                 errors.ECODE_NOENT)
4739

    
4740
    self.changed_primary_ip = False
4741

    
4742
    for existing_node_name in node_list:
4743
      existing_node = cfg.GetNodeInfo(existing_node_name)
4744

    
4745
      if self.op.readd and node == existing_node_name:
4746
        if existing_node.secondary_ip != secondary_ip:
4747
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4748
                                     " address configuration as before",
4749
                                     errors.ECODE_INVAL)
4750
        if existing_node.primary_ip != primary_ip:
4751
          self.changed_primary_ip = True
4752

    
4753
        continue
4754

    
4755
      if (existing_node.primary_ip == primary_ip or
4756
          existing_node.secondary_ip == primary_ip or
4757
          existing_node.primary_ip == secondary_ip or
4758
          existing_node.secondary_ip == secondary_ip):
4759
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4760
                                   " existing node %s" % existing_node.name,
4761
                                   errors.ECODE_NOTUNIQUE)
4762

    
4763
    # After this 'if' block, None is no longer a valid value for the
4764
    # _capable op attributes
4765
    if self.op.readd:
4766
      old_node = self.cfg.GetNodeInfo(node)
4767
      assert old_node is not None, "Can't retrieve locked node %s" % node
4768
      for attr in self._NFLAGS:
4769
        if getattr(self.op, attr) is None:
4770
          setattr(self.op, attr, getattr(old_node, attr))
4771
    else:
4772
      for attr in self._NFLAGS:
4773
        if getattr(self.op, attr) is None:
4774
          setattr(self.op, attr, True)
4775

    
4776
    if self.op.readd and not self.op.vm_capable:
4777
      pri, sec = cfg.GetNodeInstances(node)
4778
      if pri or sec:
4779
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4780
                                   " flag set to false, but it already holds"
4781
                                   " instances" % node,
4782
                                   errors.ECODE_STATE)
4783

    
4784
    # check that the type of the node (single versus dual homed) is the
4785
    # same as for the master
4786
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4787
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4788
    newbie_singlehomed = secondary_ip == primary_ip
4789
    if master_singlehomed != newbie_singlehomed:
4790
      if master_singlehomed:
4791
        raise errors.OpPrereqError("The master has no secondary ip but the"
4792
                                   " new node has one",
4793
                                   errors.ECODE_INVAL)
4794
      else:
4795
        raise errors.OpPrereqError("The master has a secondary ip but the"
4796
                                   " new node doesn't have one",
4797
                                   errors.ECODE_INVAL)
4798

    
4799
    # checks reachability
4800
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4801
      raise errors.OpPrereqError("Node not reachable by ping",
4802
                                 errors.ECODE_ENVIRON)
4803

    
4804
    if not newbie_singlehomed:
4805
      # check reachability from my secondary ip to newbie's secondary ip
4806
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4807
                           source=myself.secondary_ip):
4808
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4809
                                   " based ping to node daemon port",
4810
                                   errors.ECODE_ENVIRON)
4811

    
4812
    if self.op.readd:
4813
      exceptions = [node]
4814
    else:
4815
      exceptions = []
4816

    
4817
    if self.op.master_capable:
4818
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4819
    else:
4820
      self.master_candidate = False
4821

    
4822
    if self.op.readd:
4823
      self.new_node = old_node
4824
    else:
4825
      node_group = cfg.LookupNodeGroup(self.op.group)
4826
      self.new_node = objects.Node(name=node,
4827
                                   primary_ip=primary_ip,
4828
                                   secondary_ip=secondary_ip,
4829
                                   master_candidate=self.master_candidate,
4830
                                   offline=False, drained=False,
4831
                                   group=node_group)
4832

    
4833
    if self.op.ndparams:
4834
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4835

    
4836
  def Exec(self, feedback_fn):
4837
    """Adds the new node to the cluster.
4838

4839
    """
4840
    new_node = self.new_node
4841
    node = new_node.name
4842

    
4843
    # We adding a new node so we assume it's powered
4844
    new_node.powered = True
4845

    
4846
    # for re-adds, reset the offline/drained/master-candidate flags;
4847
    # we need to reset here, otherwise offline would prevent RPC calls
4848
    # later in the procedure; this also means that if the re-add
4849
    # fails, we are left with a non-offlined, broken node
4850
    if self.op.readd:
4851
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4852
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4853
      # if we demote the node, we do cleanup later in the procedure
4854
      new_node.master_candidate = self.master_candidate
4855
      if self.changed_primary_ip:
4856
        new_node.primary_ip = self.op.primary_ip
4857

    
4858
    # copy the master/vm_capable flags
4859
    for attr in self._NFLAGS:
4860
      setattr(new_node, attr, getattr(self.op, attr))
4861

    
4862
    # notify the user about any possible mc promotion
4863
    if new_node.master_candidate:
4864
      self.LogInfo("Node will be a master candidate")
4865

    
4866
    if self.op.ndparams:
4867
      new_node.ndparams = self.op.ndparams
4868
    else:
4869
      new_node.ndparams = {}
4870

    
4871
    # check connectivity
4872
    result = self.rpc.call_version([node])[node]
4873
    result.Raise("Can't get version information from node %s" % node)
4874
    if constants.PROTOCOL_VERSION == result.payload:
4875
      logging.info("Communication to node %s fine, sw version %s match",
4876
                   node, result.payload)
4877
    else:
4878
      raise errors.OpExecError("Version mismatch master version %s,"
4879
                               " node version %s" %
4880
                               (constants.PROTOCOL_VERSION, result.payload))
4881

    
4882
    # Add node to our /etc/hosts, and add key to known_hosts
4883
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4884
      master_node = self.cfg.GetMasterNode()
4885
      result = self.rpc.call_etc_hosts_modify(master_node,
4886
                                              constants.ETC_HOSTS_ADD,
4887
                                              self.hostname.name,
4888
                                              self.hostname.ip)
4889
      result.Raise("Can't update hosts file with new host data")
4890

    
4891
    if new_node.secondary_ip != new_node.primary_ip:
4892
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4893
                               False)
4894

    
4895
    node_verify_list = [self.cfg.GetMasterNode()]
4896
    node_verify_param = {
4897
      constants.NV_NODELIST: [node],
4898
      # TODO: do a node-net-test as well?
4899
    }
4900

    
4901
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4902
                                       self.cfg.GetClusterName())
4903
    for verifier in node_verify_list:
4904
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4905
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4906
      if nl_payload:
4907
        for failed in nl_payload:
4908
          feedback_fn("ssh/hostname verification failed"
4909
                      " (checking from %s): %s" %
4910
                      (verifier, nl_payload[failed]))
4911
        raise errors.OpExecError("ssh/hostname verification failed")
4912

    
4913
    if self.op.readd:
4914
      _RedistributeAncillaryFiles(self)
4915
      self.context.ReaddNode(new_node)
4916
      # make sure we redistribute the config
4917
      self.cfg.Update(new_node, feedback_fn)
4918
      # and make sure the new node will not have old files around
4919
      if not new_node.master_candidate:
4920
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4921
        msg = result.fail_msg
4922
        if msg:
4923
          self.LogWarning("Node failed to demote itself from master"
4924
                          " candidate status: %s" % msg)
4925
    else:
4926
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4927
                                  additional_vm=self.op.vm_capable)
4928
      self.context.AddNode(new_node, self.proc.GetECId())
4929

    
4930

    
4931
class LUNodeSetParams(LogicalUnit):
4932
  """Modifies the parameters of a node.
4933

4934
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4935
      to the node role (as _ROLE_*)
4936
  @cvar _R2F: a dictionary from node role to tuples of flags
4937
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4938

4939
  """
4940
  HPATH = "node-modify"
4941
  HTYPE = constants.HTYPE_NODE
4942
  REQ_BGL = False
4943
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4944
  _F2R = {
4945
    (True, False, False): _ROLE_CANDIDATE,
4946
    (False, True, False): _ROLE_DRAINED,
4947
    (False, False, True): _ROLE_OFFLINE,
4948
    (False, False, False): _ROLE_REGULAR,
4949
    }
4950
  _R2F = dict((v, k) for k, v in _F2R.items())
4951
  _FLAGS = ["master_candidate", "drained", "offline"]
4952

    
4953
  def CheckArguments(self):
4954
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4955
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4956
                self.op.master_capable, self.op.vm_capable,
4957
                self.op.secondary_ip, self.op.ndparams]
4958
    if all_mods.count(None) == len(all_mods):
4959
      raise errors.OpPrereqError("Please pass at least one modification",
4960
                                 errors.ECODE_INVAL)
4961
    if all_mods.count(True) > 1:
4962
      raise errors.OpPrereqError("Can't set the node into more than one"
4963
                                 " state at the same time",
4964
                                 errors.ECODE_INVAL)
4965

    
4966
    # Boolean value that tells us whether we might be demoting from MC
4967
    self.might_demote = (self.op.master_candidate == False or
4968
                         self.op.offline == True or
4969
                         self.op.drained == True or
4970
                         self.op.master_capable == False)
4971

    
4972
    if self.op.secondary_ip:
4973
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4974
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4975
                                   " address" % self.op.secondary_ip,
4976
                                   errors.ECODE_INVAL)
4977

    
4978
    self.lock_all = self.op.auto_promote and self.might_demote
4979
    self.lock_instances = self.op.secondary_ip is not None
4980

    
4981
  def ExpandNames(self):
4982
    if self.lock_all:
4983
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4984
    else:
4985
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4986

    
4987
    if self.lock_instances:
4988
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4989

    
4990
  def DeclareLocks(self, level):
4991
    # If we have locked all instances, before waiting to lock nodes, release
4992
    # all the ones living on nodes unrelated to the current operation.
4993
    if level == locking.LEVEL_NODE and self.lock_instances:
4994
      self.affected_instances = []
4995
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4996
        instances_keep = []
4997

    
4998
        # Build list of instances to release
4999
        for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
5000
          instance = self.context.cfg.GetInstanceInfo(instance_name)
5001
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5002
              self.op.node_name in instance.all_nodes):
5003
            instances_keep.append(instance_name)
5004
            self.affected_instances.append(instance)
5005

    
5006
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5007

    
5008
        assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
5009
                set(instances_keep))
5010

    
5011
  def BuildHooksEnv(self):
5012
    """Build hooks env.
5013

5014
    This runs on the master node.
5015

5016
    """
5017
    return {
5018
      "OP_TARGET": self.op.node_name,
5019
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5020
      "OFFLINE": str(self.op.offline),
5021
      "DRAINED": str(self.op.drained),
5022
      "MASTER_CAPABLE": str(self.op.master_capable),
5023
      "VM_CAPABLE": str(self.op.vm_capable),
5024
      }
5025

    
5026
  def BuildHooksNodes(self):
5027
    """Build hooks nodes.
5028

5029
    """
5030
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5031
    return (nl, nl)
5032

    
5033
  def CheckPrereq(self):
5034
    """Check prerequisites.
5035

5036
    This only checks the instance list against the existing names.
5037

5038
    """
5039
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5040

    
5041
    if (self.op.master_candidate is not None or
5042
        self.op.drained is not None or
5043
        self.op.offline is not None):
5044
      # we can't change the master's node flags
5045
      if self.op.node_name == self.cfg.GetMasterNode():
5046
        raise errors.OpPrereqError("The master role can be changed"
5047
                                   " only via master-failover",
5048
                                   errors.ECODE_INVAL)
5049

    
5050
    if self.op.master_candidate and not node.master_capable:
5051
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5052
                                 " it a master candidate" % node.name,
5053
                                 errors.ECODE_STATE)
5054

    
5055
    if self.op.vm_capable == False:
5056
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5057
      if ipri or isec:
5058
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5059
                                   " the vm_capable flag" % node.name,
5060
                                   errors.ECODE_STATE)
5061

    
5062
    if node.master_candidate and self.might_demote and not self.lock_all:
5063
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5064
      # check if after removing the current node, we're missing master
5065
      # candidates
5066
      (mc_remaining, mc_should, _) = \
5067
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5068
      if mc_remaining < mc_should:
5069
        raise errors.OpPrereqError("Not enough master candidates, please"
5070
                                   " pass auto promote option to allow"
5071
                                   " promotion", errors.ECODE_STATE)
5072

    
5073
    self.old_flags = old_flags = (node.master_candidate,
5074
                                  node.drained, node.offline)
5075
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5076
    self.old_role = old_role = self._F2R[old_flags]
5077

    
5078
    # Check for ineffective changes
5079
    for attr in self._FLAGS:
5080
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5081
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5082
        setattr(self.op, attr, None)
5083

    
5084
    # Past this point, any flag change to False means a transition
5085
    # away from the respective state, as only real changes are kept
5086

    
5087
    # TODO: We might query the real power state if it supports OOB
5088
    if _SupportsOob(self.cfg, node):
5089
      if self.op.offline is False and not (node.powered or
5090
                                           self.op.powered == True):
5091
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5092
                                    " offline status can be reset") %
5093
                                   self.op.node_name)
5094
    elif self.op.powered is not None:
5095
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5096
                                  " as it does not support out-of-band"
5097
                                  " handling") % self.op.node_name)
5098

    
5099
    # If we're being deofflined/drained, we'll MC ourself if needed
5100
    if (self.op.drained == False or self.op.offline == False or
5101
        (self.op.master_capable and not node.master_capable)):
5102
      if _DecideSelfPromotion(self):
5103
        self.op.master_candidate = True
5104
        self.LogInfo("Auto-promoting node to master candidate")
5105

    
5106
    # If we're no longer master capable, we'll demote ourselves from MC
5107
    if self.op.master_capable == False and node.master_candidate:
5108
      self.LogInfo("Demoting from master candidate")
5109
      self.op.master_candidate = False
5110

    
5111
    # Compute new role
5112
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5113
    if self.op.master_candidate:
5114
      new_role = self._ROLE_CANDIDATE
5115
    elif self.op.drained:
5116
      new_role = self._ROLE_DRAINED
5117
    elif self.op.offline:
5118
      new_role = self._ROLE_OFFLINE
5119
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5120
      # False is still in new flags, which means we're un-setting (the
5121
      # only) True flag
5122
      new_role = self._ROLE_REGULAR
5123
    else: # no new flags, nothing, keep old role
5124
      new_role = old_role
5125

    
5126
    self.new_role = new_role
5127

    
5128
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5129
      # Trying to transition out of offline status
5130
      result = self.rpc.call_version([node.name])[node.name]
5131
      if result.fail_msg:
5132
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5133
                                   " to report its version: %s" %
5134
                                   (node.name, result.fail_msg),
5135
                                   errors.ECODE_STATE)
5136
      else:
5137
        self.LogWarning("Transitioning node from offline to online state"
5138
                        " without using re-add. Please make sure the node"
5139
                        " is healthy!")
5140

    
5141
    if self.op.secondary_ip:
5142
      # Ok even without locking, because this can't be changed by any LU
5143
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5144
      master_singlehomed = master.secondary_ip == master.primary_ip
5145
      if master_singlehomed and self.op.secondary_ip:
5146
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5147
                                   " homed cluster", errors.ECODE_INVAL)
5148

    
5149
      if node.offline:
5150
        if self.affected_instances:
5151
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5152
                                     " node has instances (%s) configured"
5153
                                     " to use it" % self.affected_instances)
5154
      else:
5155
        # On online nodes, check that no instances are running, and that
5156
        # the node has the new ip and we can reach it.
5157
        for instance in self.affected_instances:
5158
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5159

    
5160
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5161
        if master.name != node.name:
5162
          # check reachability from master secondary ip to new secondary ip
5163
          if not netutils.TcpPing(self.op.secondary_ip,
5164
                                  constants.DEFAULT_NODED_PORT,
5165
                                  source=master.secondary_ip):
5166
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5167
                                       " based ping to node daemon port",
5168
                                       errors.ECODE_ENVIRON)
5169

    
5170
    if self.op.ndparams:
5171
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5172
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5173
      self.new_ndparams = new_ndparams
5174

    
5175
  def Exec(self, feedback_fn):
5176
    """Modifies a node.
5177

5178
    """
5179
    node = self.node
5180
    old_role = self.old_role
5181
    new_role = self.new_role
5182

    
5183
    result = []
5184

    
5185
    if self.op.ndparams:
5186
      node.ndparams = self.new_ndparams
5187

    
5188
    if self.op.powered is not None:
5189
      node.powered = self.op.powered
5190

    
5191
    for attr in ["master_capable", "vm_capable"]:
5192
      val = getattr(self.op, attr)
5193
      if val is not None:
5194
        setattr(node, attr, val)
5195
        result.append((attr, str(val)))
5196

    
5197
    if new_role != old_role:
5198
      # Tell the node to demote itself, if no longer MC and not offline
5199
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5200
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5201
        if msg:
5202
          self.LogWarning("Node failed to demote itself: %s", msg)
5203

    
5204
      new_flags = self._R2F[new_role]
5205
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5206
        if of != nf:
5207
          result.append((desc, str(nf)))
5208
      (node.master_candidate, node.drained, node.offline) = new_flags
5209

    
5210
      # we locked all nodes, we adjust the CP before updating this node
5211
      if self.lock_all:
5212
        _AdjustCandidatePool(self, [node.name])
5213

    
5214
    if self.op.secondary_ip:
5215
      node.secondary_ip = self.op.secondary_ip
5216
      result.append(("secondary_ip", self.op.secondary_ip))
5217

    
5218
    # this will trigger configuration file update, if needed
5219
    self.cfg.Update(node, feedback_fn)
5220

    
5221
    # this will trigger job queue propagation or cleanup if the mc
5222
    # flag changed
5223
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5224
      self.context.ReaddNode(node)
5225

    
5226
    return result
5227

    
5228

    
5229
class LUNodePowercycle(NoHooksLU):
5230
  """Powercycles a node.
5231

5232
  """
5233
  REQ_BGL = False
5234

    
5235
  def CheckArguments(self):
5236
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5237
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5238
      raise errors.OpPrereqError("The node is the master and the force"
5239
                                 " parameter was not set",
5240
                                 errors.ECODE_INVAL)
5241

    
5242
  def ExpandNames(self):
5243
    """Locking for PowercycleNode.
5244

5245
    This is a last-resort option and shouldn't block on other
5246
    jobs. Therefore, we grab no locks.
5247

5248
    """
5249
    self.needed_locks = {}
5250

    
5251
  def Exec(self, feedback_fn):
5252
    """Reboots a node.
5253

5254
    """
5255
    result = self.rpc.call_node_powercycle(self.op.node_name,
5256
                                           self.cfg.GetHypervisorType())
5257
    result.Raise("Failed to schedule the reboot")
5258
    return result.payload
5259

    
5260

    
5261
class LUClusterQuery(NoHooksLU):
5262
  """Query cluster configuration.
5263

5264
  """
5265
  REQ_BGL = False
5266

    
5267
  def ExpandNames(self):
5268
    self.needed_locks = {}
5269

    
5270
  def Exec(self, feedback_fn):
5271
    """Return cluster config.
5272

5273
    """
5274
    cluster = self.cfg.GetClusterInfo()
5275
    os_hvp = {}
5276

    
5277
    # Filter just for enabled hypervisors
5278
    for os_name, hv_dict in cluster.os_hvp.items():
5279
      os_hvp[os_name] = {}
5280
      for hv_name, hv_params in hv_dict.items():
5281
        if hv_name in cluster.enabled_hypervisors:
5282
          os_hvp[os_name][hv_name] = hv_params
5283

    
5284
    # Convert ip_family to ip_version
5285
    primary_ip_version = constants.IP4_VERSION
5286
    if cluster.primary_ip_family == netutils.IP6Address.family:
5287
      primary_ip_version = constants.IP6_VERSION
5288

    
5289
    result = {
5290
      "software_version": constants.RELEASE_VERSION,
5291
      "protocol_version": constants.PROTOCOL_VERSION,
5292
      "config_version": constants.CONFIG_VERSION,
5293
      "os_api_version": max(constants.OS_API_VERSIONS),
5294
      "export_version": constants.EXPORT_VERSION,
5295
      "architecture": (platform.architecture()[0], platform.machine()),
5296
      "name": cluster.cluster_name,
5297
      "master": cluster.master_node,
5298
      "default_hypervisor": cluster.enabled_hypervisors[0],
5299
      "enabled_hypervisors": cluster.enabled_hypervisors,
5300
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5301
                        for hypervisor_name in cluster.enabled_hypervisors]),
5302
      "os_hvp": os_hvp,
5303
      "beparams": cluster.beparams,
5304
      "osparams": cluster.osparams,
5305
      "nicparams": cluster.nicparams,
5306
      "ndparams": cluster.ndparams,
5307
      "candidate_pool_size": cluster.candidate_pool_size,
5308
      "master_netdev": cluster.master_netdev,
5309
      "volume_group_name": cluster.volume_group_name,
5310
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5311
      "file_storage_dir": cluster.file_storage_dir,
5312
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5313
      "maintain_node_health": cluster.maintain_node_health,
5314
      "ctime": cluster.ctime,
5315
      "mtime": cluster.mtime,
5316
      "uuid": cluster.uuid,
5317
      "tags": list(cluster.GetTags()),
5318
      "uid_pool": cluster.uid_pool,
5319
      "default_iallocator": cluster.default_iallocator,
5320
      "reserved_lvs": cluster.reserved_lvs,
5321
      "primary_ip_version": primary_ip_version,
5322
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5323
      "hidden_os": cluster.hidden_os,
5324
      "blacklisted_os": cluster.blacklisted_os,
5325
      }
5326

    
5327
    return result
5328

    
5329

    
5330
class LUClusterConfigQuery(NoHooksLU):
5331
  """Return configuration values.
5332

5333
  """
5334
  REQ_BGL = False
5335
  _FIELDS_DYNAMIC = utils.FieldSet()
5336
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5337
                                  "watcher_pause", "volume_group_name")
5338

    
5339
  def CheckArguments(self):
5340
    _CheckOutputFields(static=self._FIELDS_STATIC,
5341
                       dynamic=self._FIELDS_DYNAMIC,
5342
                       selected=self.op.output_fields)
5343

    
5344
  def ExpandNames(self):
5345
    self.needed_locks = {}
5346

    
5347
  def Exec(self, feedback_fn):
5348
    """Dump a representation of the cluster config to the standard output.
5349

5350
    """
5351
    values = []
5352
    for field in self.op.output_fields:
5353
      if field == "cluster_name":
5354
        entry = self.cfg.GetClusterName()
5355
      elif field == "master_node":
5356
        entry = self.cfg.GetMasterNode()
5357
      elif field == "drain_flag":
5358
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5359
      elif field == "watcher_pause":
5360
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5361
      elif field == "volume_group_name":
5362
        entry = self.cfg.GetVGName()
5363
      else:
5364
        raise errors.ParameterError(field)
5365
      values.append(entry)
5366
    return values
5367

    
5368

    
5369
class LUInstanceActivateDisks(NoHooksLU):
5370
  """Bring up an instance's disks.
5371

5372
  """
5373
  REQ_BGL = False
5374

    
5375
  def ExpandNames(self):
5376
    self._ExpandAndLockInstance()
5377
    self.needed_locks[locking.LEVEL_NODE] = []
5378
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5379

    
5380
  def DeclareLocks(self, level):
5381
    if level == locking.LEVEL_NODE:
5382
      self._LockInstancesNodes()
5383

    
5384
  def CheckPrereq(self):
5385
    """Check prerequisites.
5386

5387
    This checks that the instance is in the cluster.
5388

5389
    """
5390
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5391
    assert self.instance is not None, \
5392
      "Cannot retrieve locked instance %s" % self.op.instance_name
5393
    _CheckNodeOnline(self, self.instance.primary_node)
5394

    
5395
  def Exec(self, feedback_fn):
5396
    """Activate the disks.
5397

5398
    """
5399
    disks_ok, disks_info = \
5400
              _AssembleInstanceDisks(self, self.instance,
5401
                                     ignore_size=self.op.ignore_size)
5402
    if not disks_ok:
5403
      raise errors.OpExecError("Cannot activate block devices")
5404

    
5405
    return disks_info
5406

    
5407

    
5408
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5409
                           ignore_size=False):
5410
  """Prepare the block devices for an instance.
5411

5412
  This sets up the block devices on all nodes.
5413

5414
  @type lu: L{LogicalUnit}
5415
  @param lu: the logical unit on whose behalf we execute
5416
  @type instance: L{objects.Instance}
5417
  @param instance: the instance for whose disks we assemble
5418
  @type disks: list of L{objects.Disk} or None
5419
  @param disks: which disks to assemble (or all, if None)
5420
  @type ignore_secondaries: boolean
5421
  @param ignore_secondaries: if true, errors on secondary nodes
5422
      won't result in an error return from the function
5423
  @type ignore_size: boolean
5424
  @param ignore_size: if true, the current known size of the disk
5425
      will not be used during the disk activation, useful for cases
5426
      when the size is wrong
5427
  @return: False if the operation failed, otherwise a list of
5428
      (host, instance_visible_name, node_visible_name)
5429
      with the mapping from node devices to instance devices
5430

5431
  """
5432
  device_info = []
5433
  disks_ok = True
5434
  iname = instance.name
5435
  disks = _ExpandCheckDisks(instance, disks)
5436

    
5437
  # With the two passes mechanism we try to reduce the window of
5438
  # opportunity for the race condition of switching DRBD to primary
5439
  # before handshaking occured, but we do not eliminate it
5440

    
5441
  # The proper fix would be to wait (with some limits) until the
5442
  # connection has been made and drbd transitions from WFConnection
5443
  # into any other network-connected state (Connected, SyncTarget,
5444
  # SyncSource, etc.)
5445

    
5446
  # 1st pass, assemble on all nodes in secondary mode
5447
  for idx, inst_disk in enumerate(disks):
5448
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5449
      if ignore_size:
5450
        node_disk = node_disk.Copy()
5451
        node_disk.UnsetSize()
5452
      lu.cfg.SetDiskID(node_disk, node)
5453
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5454
      msg = result.fail_msg
5455
      if msg:
5456
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5457
                           " (is_primary=False, pass=1): %s",
5458
                           inst_disk.iv_name, node, msg)
5459
        if not ignore_secondaries:
5460
          disks_ok = False
5461

    
5462
  # FIXME: race condition on drbd migration to primary
5463

    
5464
  # 2nd pass, do only the primary node
5465
  for idx, inst_disk in enumerate(disks):
5466
    dev_path = None
5467

    
5468
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5469
      if node != instance.primary_node:
5470
        continue
5471
      if ignore_size:
5472
        node_disk = node_disk.Copy()
5473
        node_disk.UnsetSize()
5474
      lu.cfg.SetDiskID(node_disk, node)
5475
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5476
      msg = result.fail_msg
5477
      if msg:
5478
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5479
                           " (is_primary=True, pass=2): %s",
5480
                           inst_disk.iv_name, node, msg)
5481
        disks_ok = False
5482
      else:
5483
        dev_path = result.payload
5484

    
5485
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5486

    
5487
  # leave the disks configured for the primary node
5488
  # this is a workaround that would be fixed better by
5489
  # improving the logical/physical id handling
5490
  for disk in disks:
5491
    lu.cfg.SetDiskID(disk, instance.primary_node)
5492

    
5493
  return disks_ok, device_info
5494

    
5495

    
5496
def _StartInstanceDisks(lu, instance, force):
5497
  """Start the disks of an instance.
5498

5499
  """
5500
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5501
                                           ignore_secondaries=force)
5502
  if not disks_ok:
5503
    _ShutdownInstanceDisks(lu, instance)
5504
    if force is not None and not force:
5505
      lu.proc.LogWarning("", hint="If the message above refers to a"
5506
                         " secondary node,"
5507
                         " you can retry the operation using '--force'.")
5508
    raise errors.OpExecError("Disk consistency error")
5509

    
5510

    
5511
class LUInstanceDeactivateDisks(NoHooksLU):
5512
  """Shutdown an instance's disks.
5513

5514
  """
5515
  REQ_BGL = False
5516

    
5517
  def ExpandNames(self):
5518
    self._ExpandAndLockInstance()
5519
    self.needed_locks[locking.LEVEL_NODE] = []
5520
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5521

    
5522
  def DeclareLocks(self, level):
5523
    if level == locking.LEVEL_NODE:
5524
      self._LockInstancesNodes()
5525

    
5526
  def CheckPrereq(self):
5527
    """Check prerequisites.
5528

5529
    This checks that the instance is in the cluster.
5530

5531
    """
5532
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5533
    assert self.instance is not None, \
5534
      "Cannot retrieve locked instance %s" % self.op.instance_name
5535

    
5536
  def Exec(self, feedback_fn):
5537
    """Deactivate the disks
5538

5539
    """
5540
    instance = self.instance
5541
    if self.op.force:
5542
      _ShutdownInstanceDisks(self, instance)
5543
    else:
5544
      _SafeShutdownInstanceDisks(self, instance)
5545

    
5546

    
5547
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5548
  """Shutdown block devices of an instance.
5549

5550
  This function checks if an instance is running, before calling
5551
  _ShutdownInstanceDisks.
5552

5553
  """
5554
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5555
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5556

    
5557

    
5558
def _ExpandCheckDisks(instance, disks):
5559
  """Return the instance disks selected by the disks list
5560

5561
  @type disks: list of L{objects.Disk} or None
5562
  @param disks: selected disks
5563
  @rtype: list of L{objects.Disk}
5564
  @return: selected instance disks to act on
5565

5566
  """
5567
  if disks is None:
5568
    return instance.disks
5569
  else:
5570
    if not set(disks).issubset(instance.disks):
5571
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5572
                                   " target instance")
5573
    return disks
5574

    
5575

    
5576
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5577
  """Shutdown block devices of an instance.
5578

5579
  This does the shutdown on all nodes of the instance.
5580

5581
  If the ignore_primary is false, errors on the primary node are
5582
  ignored.
5583

5584
  """
5585
  all_result = True
5586
  disks = _ExpandCheckDisks(instance, disks)
5587

    
5588
  for disk in disks:
5589
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5590
      lu.cfg.SetDiskID(top_disk, node)
5591
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5592
      msg = result.fail_msg
5593
      if msg:
5594
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5595
                      disk.iv_name, node, msg)
5596
        if ((node == instance.primary_node and not ignore_primary) or
5597
            (node != instance.primary_node and not result.offline)):
5598
          all_result = False
5599
  return all_result
5600

    
5601

    
5602
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5603
  """Checks if a node has enough free memory.
5604

5605
  This function check if a given node has the needed amount of free
5606
  memory. In case the node has less memory or we cannot get the
5607
  information from the node, this function raise an OpPrereqError
5608
  exception.
5609

5610
  @type lu: C{LogicalUnit}
5611
  @param lu: a logical unit from which we get configuration data
5612
  @type node: C{str}
5613
  @param node: the node to check
5614
  @type reason: C{str}
5615
  @param reason: string to use in the error message
5616
  @type requested: C{int}
5617
  @param requested: the amount of memory in MiB to check for
5618
  @type hypervisor_name: C{str}
5619
  @param hypervisor_name: the hypervisor to ask for memory stats
5620
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5621
      we cannot check the node
5622

5623
  """
5624
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5625
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5626
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5627
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5628
  if not isinstance(free_mem, int):
5629
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5630
                               " was '%s'" % (node, free_mem),
5631
                               errors.ECODE_ENVIRON)
5632
  if requested > free_mem:
5633
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5634
                               " needed %s MiB, available %s MiB" %
5635
                               (node, reason, requested, free_mem),
5636
                               errors.ECODE_NORES)
5637

    
5638

    
5639
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5640
  """Checks if nodes have enough free disk space in the all VGs.
5641

5642
  This function check if all given nodes have the needed amount of
5643
  free disk. In case any node has less disk or we cannot get the
5644
  information from the node, this function raise an OpPrereqError
5645
  exception.
5646

5647
  @type lu: C{LogicalUnit}
5648
  @param lu: a logical unit from which we get configuration data
5649
  @type nodenames: C{list}
5650
  @param nodenames: the list of node names to check
5651
  @type req_sizes: C{dict}
5652
  @param req_sizes: the hash of vg and corresponding amount of disk in
5653
      MiB to check for
5654
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5655
      or we cannot check the node
5656

5657
  """
5658
  for vg, req_size in req_sizes.items():
5659
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5660

    
5661

    
5662
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5663
  """Checks if nodes have enough free disk space in the specified VG.
5664

5665
  This function check if all given nodes have the needed amount of
5666
  free disk. In case any node has less disk or we cannot get the
5667
  information from the node, this function raise an OpPrereqError
5668
  exception.
5669

5670
  @type lu: C{LogicalUnit}
5671
  @param lu: a logical unit from which we get configuration data
5672
  @type nodenames: C{list}
5673
  @param nodenames: the list of node names to check
5674
  @type vg: C{str}
5675
  @param vg: the volume group to check
5676
  @type requested: C{int}
5677
  @param requested: the amount of disk in MiB to check for
5678
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5679
      or we cannot check the node
5680

5681
  """
5682
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5683
  for node in nodenames:
5684
    info = nodeinfo[node]
5685
    info.Raise("Cannot get current information from node %s" % node,
5686
               prereq=True, ecode=errors.ECODE_ENVIRON)
5687
    vg_free = info.payload.get("vg_free", None)
5688
    if not isinstance(vg_free, int):
5689
      raise errors.OpPrereqError("Can't compute free disk space on node"
5690
                                 " %s for vg %s, result was '%s'" %
5691
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5692
    if requested > vg_free:
5693
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5694
                                 " vg %s: required %d MiB, available %d MiB" %
5695
                                 (node, vg, requested, vg_free),
5696
                                 errors.ECODE_NORES)
5697

    
5698

    
5699
class LUInstanceStartup(LogicalUnit):
5700
  """Starts an instance.
5701

5702
  """
5703
  HPATH = "instance-start"
5704
  HTYPE = constants.HTYPE_INSTANCE
5705
  REQ_BGL = False
5706

    
5707
  def CheckArguments(self):
5708
    # extra beparams
5709
    if self.op.beparams:
5710
      # fill the beparams dict
5711
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5712

    
5713
  def ExpandNames(self):
5714
    self._ExpandAndLockInstance()
5715

    
5716
  def BuildHooksEnv(self):
5717
    """Build hooks env.
5718

5719
    This runs on master, primary and secondary nodes of the instance.
5720

5721
    """
5722
    env = {
5723
      "FORCE": self.op.force,
5724
      }
5725

    
5726
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5727

    
5728
    return env
5729

    
5730
  def BuildHooksNodes(self):
5731
    """Build hooks nodes.
5732

5733
    """
5734
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5735
    return (nl, nl)
5736

    
5737
  def CheckPrereq(self):
5738
    """Check prerequisites.
5739

5740
    This checks that the instance is in the cluster.
5741

5742
    """
5743
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5744
    assert self.instance is not None, \
5745
      "Cannot retrieve locked instance %s" % self.op.instance_name
5746

    
5747
    # extra hvparams
5748
    if self.op.hvparams:
5749
      # check hypervisor parameter syntax (locally)
5750
      cluster = self.cfg.GetClusterInfo()
5751
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5752
      filled_hvp = cluster.FillHV(instance)
5753
      filled_hvp.update(self.op.hvparams)
5754
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5755
      hv_type.CheckParameterSyntax(filled_hvp)
5756
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5757

    
5758
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5759

    
5760
    if self.primary_offline and self.op.ignore_offline_nodes:
5761
      self.proc.LogWarning("Ignoring offline primary node")
5762

    
5763
      if self.op.hvparams or self.op.beparams:
5764
        self.proc.LogWarning("Overridden parameters are ignored")
5765
    else:
5766
      _CheckNodeOnline(self, instance.primary_node)
5767

    
5768
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5769

    
5770
      # check bridges existence
5771
      _CheckInstanceBridgesExist(self, instance)
5772

    
5773
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5774
                                                instance.name,
5775
                                                instance.hypervisor)
5776
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5777
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5778
      if not remote_info.payload: # not running already
5779
        _CheckNodeFreeMemory(self, instance.primary_node,
5780
                             "starting instance %s" % instance.name,
5781
                             bep[constants.BE_MEMORY], instance.hypervisor)
5782

    
5783
  def Exec(self, feedback_fn):
5784
    """Start the instance.
5785

5786
    """
5787
    instance = self.instance
5788
    force = self.op.force
5789

    
5790
    if not self.op.no_remember:
5791
      self.cfg.MarkInstanceUp(instance.name)
5792

    
5793
    if self.primary_offline:
5794
      assert self.op.ignore_offline_nodes
5795
      self.proc.LogInfo("Primary node offline, marked instance as started")
5796
    else:
5797
      node_current = instance.primary_node
5798

    
5799
      _StartInstanceDisks(self, instance, force)
5800

    
5801
      result = self.rpc.call_instance_start(node_current, instance,
5802
                                            self.op.hvparams, self.op.beparams,
5803
                                            self.op.startup_paused)
5804
      msg = result.fail_msg
5805
      if msg:
5806
        _ShutdownInstanceDisks(self, instance)
5807
        raise errors.OpExecError("Could not start instance: %s" % msg)
5808

    
5809

    
5810
class LUInstanceReboot(LogicalUnit):
5811
  """Reboot an instance.
5812

5813
  """
5814
  HPATH = "instance-reboot"
5815
  HTYPE = constants.HTYPE_INSTANCE
5816
  REQ_BGL = False
5817

    
5818
  def ExpandNames(self):
5819
    self._ExpandAndLockInstance()
5820

    
5821
  def BuildHooksEnv(self):
5822
    """Build hooks env.
5823

5824
    This runs on master, primary and secondary nodes of the instance.
5825

5826
    """
5827
    env = {
5828
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5829
      "REBOOT_TYPE": self.op.reboot_type,
5830
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5831
      }
5832

    
5833
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5834

    
5835
    return env
5836

    
5837
  def BuildHooksNodes(self):
5838
    """Build hooks nodes.
5839

5840
    """
5841
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5842
    return (nl, nl)
5843

    
5844
  def CheckPrereq(self):
5845
    """Check prerequisites.
5846

5847
    This checks that the instance is in the cluster.
5848

5849
    """
5850
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5851
    assert self.instance is not None, \
5852
      "Cannot retrieve locked instance %s" % self.op.instance_name
5853

    
5854
    _CheckNodeOnline(self, instance.primary_node)
5855

    
5856
    # check bridges existence
5857
    _CheckInstanceBridgesExist(self, instance)
5858

    
5859
  def Exec(self, feedback_fn):
5860
    """Reboot the instance.
5861

5862
    """
5863
    instance = self.instance
5864
    ignore_secondaries = self.op.ignore_secondaries
5865
    reboot_type = self.op.reboot_type
5866

    
5867
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5868
                                              instance.name,
5869
                                              instance.hypervisor)
5870
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5871
    instance_running = bool(remote_info.payload)
5872

    
5873
    node_current = instance.primary_node
5874

    
5875
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5876
                                            constants.INSTANCE_REBOOT_HARD]:
5877
      for disk in instance.disks:
5878
        self.cfg.SetDiskID(disk, node_current)
5879
      result = self.rpc.call_instance_reboot(node_current, instance,
5880
                                             reboot_type,
5881
                                             self.op.shutdown_timeout)
5882
      result.Raise("Could not reboot instance")
5883
    else:
5884
      if instance_running:
5885
        result = self.rpc.call_instance_shutdown(node_current, instance,
5886
                                                 self.op.shutdown_timeout)
5887
        result.Raise("Could not shutdown instance for full reboot")
5888
        _ShutdownInstanceDisks(self, instance)
5889
      else:
5890
        self.LogInfo("Instance %s was already stopped, starting now",
5891
                     instance.name)
5892
      _StartInstanceDisks(self, instance, ignore_secondaries)
5893
      result = self.rpc.call_instance_start(node_current, instance,
5894
                                            None, None, False)
5895
      msg = result.fail_msg
5896
      if msg:
5897
        _ShutdownInstanceDisks(self, instance)
5898
        raise errors.OpExecError("Could not start instance for"
5899
                                 " full reboot: %s" % msg)
5900

    
5901
    self.cfg.MarkInstanceUp(instance.name)
5902

    
5903

    
5904
class LUInstanceShutdown(LogicalUnit):
5905
  """Shutdown an instance.
5906

5907
  """
5908
  HPATH = "instance-stop"
5909
  HTYPE = constants.HTYPE_INSTANCE
5910
  REQ_BGL = False
5911

    
5912
  def ExpandNames(self):
5913
    self._ExpandAndLockInstance()
5914

    
5915
  def BuildHooksEnv(self):
5916
    """Build hooks env.
5917

5918
    This runs on master, primary and secondary nodes of the instance.
5919

5920
    """
5921
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5922
    env["TIMEOUT"] = self.op.timeout
5923
    return env
5924

    
5925
  def BuildHooksNodes(self):
5926
    """Build hooks nodes.
5927

5928
    """
5929
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5930
    return (nl, nl)
5931

    
5932
  def CheckPrereq(self):
5933
    """Check prerequisites.
5934

5935
    This checks that the instance is in the cluster.
5936

5937
    """
5938
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5939
    assert self.instance is not None, \
5940
      "Cannot retrieve locked instance %s" % self.op.instance_name
5941

    
5942
    self.primary_offline = \
5943
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5944

    
5945
    if self.primary_offline and self.op.ignore_offline_nodes:
5946
      self.proc.LogWarning("Ignoring offline primary node")
5947
    else:
5948
      _CheckNodeOnline(self, self.instance.primary_node)
5949

    
5950
  def Exec(self, feedback_fn):
5951
    """Shutdown the instance.
5952

5953
    """
5954
    instance = self.instance
5955
    node_current = instance.primary_node
5956
    timeout = self.op.timeout
5957

    
5958
    if not self.op.no_remember:
5959
      self.cfg.MarkInstanceDown(instance.name)
5960

    
5961
    if self.primary_offline:
5962
      assert self.op.ignore_offline_nodes
5963
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5964
    else:
5965
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5966
      msg = result.fail_msg
5967
      if msg:
5968
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5969

    
5970
      _ShutdownInstanceDisks(self, instance)
5971

    
5972

    
5973
class LUInstanceReinstall(LogicalUnit):
5974
  """Reinstall an instance.
5975

5976
  """
5977
  HPATH = "instance-reinstall"
5978
  HTYPE = constants.HTYPE_INSTANCE
5979
  REQ_BGL = False
5980

    
5981
  def ExpandNames(self):
5982
    self._ExpandAndLockInstance()
5983

    
5984
  def BuildHooksEnv(self):
5985
    """Build hooks env.
5986

5987
    This runs on master, primary and secondary nodes of the instance.
5988

5989
    """
5990
    return _BuildInstanceHookEnvByObject(self, self.instance)
5991

    
5992
  def BuildHooksNodes(self):
5993
    """Build hooks nodes.
5994

5995
    """
5996
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5997
    return (nl, nl)
5998

    
5999
  def CheckPrereq(self):
6000
    """Check prerequisites.
6001

6002
    This checks that the instance is in the cluster and is not running.
6003

6004
    """
6005
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6006
    assert instance is not None, \
6007
      "Cannot retrieve locked instance %s" % self.op.instance_name
6008
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6009
                     " offline, cannot reinstall")
6010
    for node in instance.secondary_nodes:
6011
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6012
                       " cannot reinstall")
6013

    
6014
    if instance.disk_template == constants.DT_DISKLESS:
6015
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6016
                                 self.op.instance_name,
6017
                                 errors.ECODE_INVAL)
6018
    _CheckInstanceDown(self, instance, "cannot reinstall")
6019

    
6020
    if self.op.os_type is not None:
6021
      # OS verification
6022
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6023
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6024
      instance_os = self.op.os_type
6025
    else:
6026
      instance_os = instance.os
6027

    
6028
    nodelist = list(instance.all_nodes)
6029

    
6030
    if self.op.osparams:
6031
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6032
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6033
      self.os_inst = i_osdict # the new dict (without defaults)
6034
    else:
6035
      self.os_inst = None
6036

    
6037
    self.instance = instance
6038

    
6039
  def Exec(self, feedback_fn):
6040
    """Reinstall the instance.
6041

6042
    """
6043
    inst = self.instance
6044

    
6045
    if self.op.os_type is not None:
6046
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6047
      inst.os = self.op.os_type
6048
      # Write to configuration
6049
      self.cfg.Update(inst, feedback_fn)
6050

    
6051
    _StartInstanceDisks(self, inst, None)
6052
    try:
6053
      feedback_fn("Running the instance OS create scripts...")
6054
      # FIXME: pass debug option from opcode to backend
6055
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6056
                                             self.op.debug_level,
6057
                                             osparams=self.os_inst)
6058
      result.Raise("Could not install OS for instance %s on node %s" %
6059
                   (inst.name, inst.primary_node))
6060
    finally:
6061
      _ShutdownInstanceDisks(self, inst)
6062

    
6063

    
6064
class LUInstanceRecreateDisks(LogicalUnit):
6065
  """Recreate an instance's missing disks.
6066

6067
  """
6068
  HPATH = "instance-recreate-disks"
6069
  HTYPE = constants.HTYPE_INSTANCE
6070
  REQ_BGL = False
6071

    
6072
  def CheckArguments(self):
6073
    # normalise the disk list
6074
    self.op.disks = sorted(frozenset(self.op.disks))
6075

    
6076
  def ExpandNames(self):
6077
    self._ExpandAndLockInstance()
6078
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6079
    if self.op.nodes:
6080
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6081
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6082
    else:
6083
      self.needed_locks[locking.LEVEL_NODE] = []
6084

    
6085
  def DeclareLocks(self, level):
6086
    if level == locking.LEVEL_NODE:
6087
      # if we replace the nodes, we only need to lock the old primary,
6088
      # otherwise we need to lock all nodes for disk re-creation
6089
      primary_only = bool(self.op.nodes)
6090
      self._LockInstancesNodes(primary_only=primary_only)
6091

    
6092
  def BuildHooksEnv(self):
6093
    """Build hooks env.
6094

6095
    This runs on master, primary and secondary nodes of the instance.
6096

6097
    """
6098
    return _BuildInstanceHookEnvByObject(self, self.instance)
6099

    
6100
  def BuildHooksNodes(self):
6101
    """Build hooks nodes.
6102

6103
    """
6104
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6105
    return (nl, nl)
6106

    
6107
  def CheckPrereq(self):
6108
    """Check prerequisites.
6109

6110
    This checks that the instance is in the cluster and is not running.
6111

6112
    """
6113
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6114
    assert instance is not None, \
6115
      "Cannot retrieve locked instance %s" % self.op.instance_name
6116
    if self.op.nodes:
6117
      if len(self.op.nodes) != len(instance.all_nodes):
6118
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6119
                                   " %d replacement nodes were specified" %
6120
                                   (instance.name, len(instance.all_nodes),
6121
                                    len(self.op.nodes)),
6122
                                   errors.ECODE_INVAL)
6123
      assert instance.disk_template != constants.DT_DRBD8 or \
6124
          len(self.op.nodes) == 2
6125
      assert instance.disk_template != constants.DT_PLAIN or \
6126
          len(self.op.nodes) == 1
6127
      primary_node = self.op.nodes[0]
6128
    else:
6129
      primary_node = instance.primary_node
6130
    _CheckNodeOnline(self, primary_node)
6131

    
6132
    if instance.disk_template == constants.DT_DISKLESS:
6133
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6134
                                 self.op.instance_name, errors.ECODE_INVAL)
6135
    # if we replace nodes *and* the old primary is offline, we don't
6136
    # check
6137
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6138
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6139
    if not (self.op.nodes and old_pnode.offline):
6140
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6141

    
6142
    if not self.op.disks:
6143
      self.op.disks = range(len(instance.disks))
6144
    else:
6145
      for idx in self.op.disks:
6146
        if idx >= len(instance.disks):
6147
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6148
                                     errors.ECODE_INVAL)
6149
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6150
      raise errors.OpPrereqError("Can't recreate disks partially and"
6151
                                 " change the nodes at the same time",
6152
                                 errors.ECODE_INVAL)
6153
    self.instance = instance
6154

    
6155
  def Exec(self, feedback_fn):
6156
    """Recreate the disks.
6157

6158
    """
6159
    instance = self.instance
6160

    
6161
    to_skip = []
6162
    mods = [] # keeps track of needed logical_id changes
6163

    
6164
    for idx, disk in enumerate(instance.disks):
6165
      if idx not in self.op.disks: # disk idx has not been passed in
6166
        to_skip.append(idx)
6167
        continue
6168
      # update secondaries for disks, if needed
6169
      if self.op.nodes:
6170
        if disk.dev_type == constants.LD_DRBD8:
6171
          # need to update the nodes and minors
6172
          assert len(self.op.nodes) == 2
6173
          assert len(disk.logical_id) == 6 # otherwise disk internals
6174
                                           # have changed
6175
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6176
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6177
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6178
                    new_minors[0], new_minors[1], old_secret)
6179
          assert len(disk.logical_id) == len(new_id)
6180
          mods.append((idx, new_id))
6181

    
6182
    # now that we have passed all asserts above, we can apply the mods
6183
    # in a single run (to avoid partial changes)
6184
    for idx, new_id in mods:
6185
      instance.disks[idx].logical_id = new_id
6186

    
6187
    # change primary node, if needed
6188
    if self.op.nodes:
6189
      instance.primary_node = self.op.nodes[0]
6190
      self.LogWarning("Changing the instance's nodes, you will have to"
6191
                      " remove any disks left on the older nodes manually")
6192

    
6193
    if self.op.nodes:
6194
      self.cfg.Update(instance, feedback_fn)
6195

    
6196
    _CreateDisks(self, instance, to_skip=to_skip)
6197

    
6198

    
6199
class LUInstanceRename(LogicalUnit):
6200
  """Rename an instance.
6201

6202
  """
6203
  HPATH = "instance-rename"
6204
  HTYPE = constants.HTYPE_INSTANCE
6205

    
6206
  def CheckArguments(self):
6207
    """Check arguments.
6208

6209
    """
6210
    if self.op.ip_check and not self.op.name_check:
6211
      # TODO: make the ip check more flexible and not depend on the name check
6212
      raise errors.OpPrereqError("IP address check requires a name check",
6213
                                 errors.ECODE_INVAL)
6214

    
6215
  def BuildHooksEnv(self):
6216
    """Build hooks env.
6217

6218
    This runs on master, primary and secondary nodes of the instance.
6219

6220
    """
6221
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6222
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6223
    return env
6224

    
6225
  def BuildHooksNodes(self):
6226
    """Build hooks nodes.
6227

6228
    """
6229
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6230
    return (nl, nl)
6231

    
6232
  def CheckPrereq(self):
6233
    """Check prerequisites.
6234

6235
    This checks that the instance is in the cluster and is not running.
6236

6237
    """
6238
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6239
                                                self.op.instance_name)
6240
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6241
    assert instance is not None
6242
    _CheckNodeOnline(self, instance.primary_node)
6243
    _CheckInstanceDown(self, instance, "cannot rename")
6244
    self.instance = instance
6245

    
6246
    new_name = self.op.new_name
6247
    if self.op.name_check:
6248
      hostname = netutils.GetHostname(name=new_name)
6249
      if hostname != new_name:
6250
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6251
                     hostname.name)
6252
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6253
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6254
                                    " same as given hostname '%s'") %
6255
                                    (hostname.name, self.op.new_name),
6256
                                    errors.ECODE_INVAL)
6257
      new_name = self.op.new_name = hostname.name
6258
      if (self.op.ip_check and
6259
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6260
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6261
                                   (hostname.ip, new_name),
6262
                                   errors.ECODE_NOTUNIQUE)
6263

    
6264
    instance_list = self.cfg.GetInstanceList()
6265
    if new_name in instance_list and new_name != instance.name:
6266
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6267
                                 new_name, errors.ECODE_EXISTS)
6268

    
6269
  def Exec(self, feedback_fn):
6270
    """Rename the instance.
6271

6272
    """
6273
    inst = self.instance
6274
    old_name = inst.name
6275

    
6276
    rename_file_storage = False
6277
    if (inst.disk_template in constants.DTS_FILEBASED and
6278
        self.op.new_name != inst.name):
6279
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6280
      rename_file_storage = True
6281

    
6282
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6283
    # Change the instance lock. This is definitely safe while we hold the BGL.
6284
    # Otherwise the new lock would have to be added in acquired mode.
6285
    assert self.REQ_BGL
6286
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6287
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6288

    
6289
    # re-read the instance from the configuration after rename
6290
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6291

    
6292
    if rename_file_storage:
6293
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6294
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6295
                                                     old_file_storage_dir,
6296
                                                     new_file_storage_dir)
6297
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6298
                   " (but the instance has been renamed in Ganeti)" %
6299
                   (inst.primary_node, old_file_storage_dir,
6300
                    new_file_storage_dir))
6301

    
6302
    _StartInstanceDisks(self, inst, None)
6303
    try:
6304
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6305
                                                 old_name, self.op.debug_level)
6306
      msg = result.fail_msg
6307
      if msg:
6308
        msg = ("Could not run OS rename script for instance %s on node %s"
6309
               " (but the instance has been renamed in Ganeti): %s" %
6310
               (inst.name, inst.primary_node, msg))
6311
        self.proc.LogWarning(msg)
6312
    finally:
6313
      _ShutdownInstanceDisks(self, inst)
6314

    
6315
    return inst.name
6316

    
6317

    
6318
class LUInstanceRemove(LogicalUnit):
6319
  """Remove an instance.
6320

6321
  """
6322
  HPATH = "instance-remove"
6323
  HTYPE = constants.HTYPE_INSTANCE
6324
  REQ_BGL = False
6325

    
6326
  def ExpandNames(self):
6327
    self._ExpandAndLockInstance()
6328
    self.needed_locks[locking.LEVEL_NODE] = []
6329
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6330

    
6331
  def DeclareLocks(self, level):
6332
    if level == locking.LEVEL_NODE:
6333
      self._LockInstancesNodes()
6334

    
6335
  def BuildHooksEnv(self):
6336
    """Build hooks env.
6337

6338
    This runs on master, primary and secondary nodes of the instance.
6339

6340
    """
6341
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6342
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6343
    return env
6344

    
6345
  def BuildHooksNodes(self):
6346
    """Build hooks nodes.
6347

6348
    """
6349
    nl = [self.cfg.GetMasterNode()]
6350
    nl_post = list(self.instance.all_nodes) + nl
6351
    return (nl, nl_post)
6352

    
6353
  def CheckPrereq(self):
6354
    """Check prerequisites.
6355

6356
    This checks that the instance is in the cluster.
6357

6358
    """
6359
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6360
    assert self.instance is not None, \
6361
      "Cannot retrieve locked instance %s" % self.op.instance_name
6362

    
6363
  def Exec(self, feedback_fn):
6364
    """Remove the instance.
6365

6366
    """
6367
    instance = self.instance
6368
    logging.info("Shutting down instance %s on node %s",
6369
                 instance.name, instance.primary_node)
6370

    
6371
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6372
                                             self.op.shutdown_timeout)
6373
    msg = result.fail_msg
6374
    if msg:
6375
      if self.op.ignore_failures:
6376
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6377
      else:
6378
        raise errors.OpExecError("Could not shutdown instance %s on"
6379
                                 " node %s: %s" %
6380
                                 (instance.name, instance.primary_node, msg))
6381

    
6382
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6383

    
6384

    
6385
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6386
  """Utility function to remove an instance.
6387

6388
  """
6389
  logging.info("Removing block devices for instance %s", instance.name)
6390

    
6391
  if not _RemoveDisks(lu, instance):
6392
    if not ignore_failures:
6393
      raise errors.OpExecError("Can't remove instance's disks")
6394
    feedback_fn("Warning: can't remove instance's disks")
6395

    
6396
  logging.info("Removing instance %s out of cluster config", instance.name)
6397

    
6398
  lu.cfg.RemoveInstance(instance.name)
6399

    
6400
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6401
    "Instance lock removal conflict"
6402

    
6403
  # Remove lock for the instance
6404
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6405

    
6406

    
6407
class LUInstanceQuery(NoHooksLU):
6408
  """Logical unit for querying instances.
6409

6410
  """
6411
  # pylint: disable-msg=W0142
6412
  REQ_BGL = False
6413

    
6414
  def CheckArguments(self):
6415
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6416
                             self.op.output_fields, self.op.use_locking)
6417

    
6418
  def ExpandNames(self):
6419
    self.iq.ExpandNames(self)
6420

    
6421
  def DeclareLocks(self, level):
6422
    self.iq.DeclareLocks(self, level)
6423

    
6424
  def Exec(self, feedback_fn):
6425
    return self.iq.OldStyleQuery(self)
6426

    
6427

    
6428
class LUInstanceFailover(LogicalUnit):
6429
  """Failover an instance.
6430

6431
  """
6432
  HPATH = "instance-failover"
6433
  HTYPE = constants.HTYPE_INSTANCE
6434
  REQ_BGL = False
6435

    
6436
  def CheckArguments(self):
6437
    """Check the arguments.
6438

6439
    """
6440
    self.iallocator = getattr(self.op, "iallocator", None)
6441
    self.target_node = getattr(self.op, "target_node", None)
6442

    
6443
  def ExpandNames(self):
6444
    self._ExpandAndLockInstance()
6445

    
6446
    if self.op.target_node is not None:
6447
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6448

    
6449
    self.needed_locks[locking.LEVEL_NODE] = []
6450
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6451

    
6452
    ignore_consistency = self.op.ignore_consistency
6453
    shutdown_timeout = self.op.shutdown_timeout
6454
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6455
                                       cleanup=False,
6456
                                       failover=True,
6457
                                       ignore_consistency=ignore_consistency,
6458
                                       shutdown_timeout=shutdown_timeout)
6459
    self.tasklets = [self._migrater]
6460

    
6461
  def DeclareLocks(self, level):
6462
    if level == locking.LEVEL_NODE:
6463
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6464
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6465
        if self.op.target_node is None:
6466
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6467
        else:
6468
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6469
                                                   self.op.target_node]
6470
        del self.recalculate_locks[locking.LEVEL_NODE]
6471
      else:
6472
        self._LockInstancesNodes()
6473

    
6474
  def BuildHooksEnv(self):
6475
    """Build hooks env.
6476

6477
    This runs on master, primary and secondary nodes of the instance.
6478

6479
    """
6480
    instance = self._migrater.instance
6481
    source_node = instance.primary_node
6482
    target_node = self.op.target_node
6483
    env = {
6484
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6485
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6486
      "OLD_PRIMARY": source_node,
6487
      "NEW_PRIMARY": target_node,
6488
      }
6489

    
6490
    if instance.disk_template in constants.DTS_INT_MIRROR:
6491
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6492
      env["NEW_SECONDARY"] = source_node
6493
    else:
6494
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6495

    
6496
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6497

    
6498
    return env
6499

    
6500
  def BuildHooksNodes(self):
6501
    """Build hooks nodes.
6502

6503
    """
6504
    instance = self._migrater.instance
6505
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6506
    return (nl, nl + [instance.primary_node])
6507

    
6508

    
6509
class LUInstanceMigrate(LogicalUnit):
6510
  """Migrate an instance.
6511

6512
  This is migration without shutting down, compared to the failover,
6513
  which is done with shutdown.
6514

6515
  """
6516
  HPATH = "instance-migrate"
6517
  HTYPE = constants.HTYPE_INSTANCE
6518
  REQ_BGL = False
6519

    
6520
  def ExpandNames(self):
6521
    self._ExpandAndLockInstance()
6522

    
6523
    if self.op.target_node is not None:
6524
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6525

    
6526
    self.needed_locks[locking.LEVEL_NODE] = []
6527
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6528

    
6529
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6530
                                       cleanup=self.op.cleanup,
6531
                                       failover=False,
6532
                                       fallback=self.op.allow_failover)
6533
    self.tasklets = [self._migrater]
6534

    
6535
  def DeclareLocks(self, level):
6536
    if level == locking.LEVEL_NODE:
6537
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6538
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6539
        if self.op.target_node is None:
6540
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6541
        else:
6542
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6543
                                                   self.op.target_node]
6544
        del self.recalculate_locks[locking.LEVEL_NODE]
6545
      else:
6546
        self._LockInstancesNodes()
6547

    
6548
  def BuildHooksEnv(self):
6549
    """Build hooks env.
6550

6551
    This runs on master, primary and secondary nodes of the instance.
6552

6553
    """
6554
    instance = self._migrater.instance
6555
    source_node = instance.primary_node
6556
    target_node = self.op.target_node
6557
    env = _BuildInstanceHookEnvByObject(self, instance)
6558
    env.update({
6559
      "MIGRATE_LIVE": self._migrater.live,
6560
      "MIGRATE_CLEANUP": self.op.cleanup,
6561
      "OLD_PRIMARY": source_node,
6562
      "NEW_PRIMARY": target_node,
6563
      })
6564

    
6565
    if instance.disk_template in constants.DTS_INT_MIRROR:
6566
      env["OLD_SECONDARY"] = target_node
6567
      env["NEW_SECONDARY"] = source_node
6568
    else:
6569
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6570

    
6571
    return env
6572

    
6573
  def BuildHooksNodes(self):
6574
    """Build hooks nodes.
6575

6576
    """
6577
    instance = self._migrater.instance
6578
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6579
    return (nl, nl + [instance.primary_node])
6580

    
6581

    
6582
class LUInstanceMove(LogicalUnit):
6583
  """Move an instance by data-copying.
6584

6585
  """
6586
  HPATH = "instance-move"
6587
  HTYPE = constants.HTYPE_INSTANCE
6588
  REQ_BGL = False
6589

    
6590
  def ExpandNames(self):
6591
    self._ExpandAndLockInstance()
6592
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6593
    self.op.target_node = target_node
6594
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6595
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6596

    
6597
  def DeclareLocks(self, level):
6598
    if level == locking.LEVEL_NODE:
6599
      self._LockInstancesNodes(primary_only=True)
6600

    
6601
  def BuildHooksEnv(self):
6602
    """Build hooks env.
6603

6604
    This runs on master, primary and secondary nodes of the instance.
6605

6606
    """
6607
    env = {
6608
      "TARGET_NODE": self.op.target_node,
6609
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6610
      }
6611
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6612
    return env
6613

    
6614
  def BuildHooksNodes(self):
6615
    """Build hooks nodes.
6616

6617
    """
6618
    nl = [
6619
      self.cfg.GetMasterNode(),
6620
      self.instance.primary_node,
6621
      self.op.target_node,
6622
      ]
6623
    return (nl, nl)
6624

    
6625
  def CheckPrereq(self):
6626
    """Check prerequisites.
6627

6628
    This checks that the instance is in the cluster.
6629

6630
    """
6631
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6632
    assert self.instance is not None, \
6633
      "Cannot retrieve locked instance %s" % self.op.instance_name
6634

    
6635
    node = self.cfg.GetNodeInfo(self.op.target_node)
6636
    assert node is not None, \
6637
      "Cannot retrieve locked node %s" % self.op.target_node
6638

    
6639
    self.target_node = target_node = node.name
6640

    
6641
    if target_node == instance.primary_node:
6642
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6643
                                 (instance.name, target_node),
6644
                                 errors.ECODE_STATE)
6645

    
6646
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6647

    
6648
    for idx, dsk in enumerate(instance.disks):
6649
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6650
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6651
                                   " cannot copy" % idx, errors.ECODE_STATE)
6652

    
6653
    _CheckNodeOnline(self, target_node)
6654
    _CheckNodeNotDrained(self, target_node)
6655
    _CheckNodeVmCapable(self, target_node)
6656

    
6657
    if instance.admin_up:
6658
      # check memory requirements on the secondary node
6659
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6660
                           instance.name, bep[constants.BE_MEMORY],
6661
                           instance.hypervisor)
6662
    else:
6663
      self.LogInfo("Not checking memory on the secondary node as"
6664
                   " instance will not be started")
6665

    
6666
    # check bridge existance
6667
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6668

    
6669
  def Exec(self, feedback_fn):
6670
    """Move an instance.
6671

6672
    The move is done by shutting it down on its present node, copying
6673
    the data over (slow) and starting it on the new node.
6674

6675
    """
6676
    instance = self.instance
6677

    
6678
    source_node = instance.primary_node
6679
    target_node = self.target_node
6680

    
6681
    self.LogInfo("Shutting down instance %s on source node %s",
6682
                 instance.name, source_node)
6683

    
6684
    result = self.rpc.call_instance_shutdown(source_node, instance,
6685
                                             self.op.shutdown_timeout)
6686
    msg = result.fail_msg
6687
    if msg:
6688
      if self.op.ignore_consistency:
6689
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6690
                             " Proceeding anyway. Please make sure node"
6691
                             " %s is down. Error details: %s",
6692
                             instance.name, source_node, source_node, msg)
6693
      else:
6694
        raise errors.OpExecError("Could not shutdown instance %s on"
6695
                                 " node %s: %s" %
6696
                                 (instance.name, source_node, msg))
6697

    
6698
    # create the target disks
6699
    try:
6700
      _CreateDisks(self, instance, target_node=target_node)
6701
    except errors.OpExecError:
6702
      self.LogWarning("Device creation failed, reverting...")
6703
      try:
6704
        _RemoveDisks(self, instance, target_node=target_node)
6705
      finally:
6706
        self.cfg.ReleaseDRBDMinors(instance.name)
6707
        raise
6708

    
6709
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6710

    
6711
    errs = []
6712
    # activate, get path, copy the data over
6713
    for idx, disk in enumerate(instance.disks):
6714
      self.LogInfo("Copying data for disk %d", idx)
6715
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6716
                                               instance.name, True, idx)
6717
      if result.fail_msg:
6718
        self.LogWarning("Can't assemble newly created disk %d: %s",
6719
                        idx, result.fail_msg)
6720
        errs.append(result.fail_msg)
6721
        break
6722
      dev_path = result.payload
6723
      result = self.rpc.call_blockdev_export(source_node, disk,
6724
                                             target_node, dev_path,
6725
                                             cluster_name)
6726
      if result.fail_msg:
6727
        self.LogWarning("Can't copy data over for disk %d: %s",
6728
                        idx, result.fail_msg)
6729
        errs.append(result.fail_msg)
6730
        break
6731

    
6732
    if errs:
6733
      self.LogWarning("Some disks failed to copy, aborting")
6734
      try:
6735
        _RemoveDisks(self, instance, target_node=target_node)
6736
      finally:
6737
        self.cfg.ReleaseDRBDMinors(instance.name)
6738
        raise errors.OpExecError("Errors during disk copy: %s" %
6739
                                 (",".join(errs),))
6740

    
6741
    instance.primary_node = target_node
6742
    self.cfg.Update(instance, feedback_fn)
6743

    
6744
    self.LogInfo("Removing the disks on the original node")
6745
    _RemoveDisks(self, instance, target_node=source_node)
6746

    
6747
    # Only start the instance if it's marked as up
6748
    if instance.admin_up:
6749
      self.LogInfo("Starting instance %s on node %s",
6750
                   instance.name, target_node)
6751

    
6752
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6753
                                           ignore_secondaries=True)
6754
      if not disks_ok:
6755
        _ShutdownInstanceDisks(self, instance)
6756
        raise errors.OpExecError("Can't activate the instance's disks")
6757

    
6758
      result = self.rpc.call_instance_start(target_node, instance,
6759
                                            None, None, False)
6760
      msg = result.fail_msg
6761
      if msg:
6762
        _ShutdownInstanceDisks(self, instance)
6763
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6764
                                 (instance.name, target_node, msg))
6765

    
6766

    
6767
class LUNodeMigrate(LogicalUnit):
6768
  """Migrate all instances from a node.
6769

6770
  """
6771
  HPATH = "node-migrate"
6772
  HTYPE = constants.HTYPE_NODE
6773
  REQ_BGL = False
6774

    
6775
  def CheckArguments(self):
6776
    pass
6777

    
6778
  def ExpandNames(self):
6779
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6780

    
6781
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6782
    self.needed_locks = {
6783
      locking.LEVEL_NODE: [self.op.node_name],
6784
      }
6785

    
6786
  def BuildHooksEnv(self):
6787
    """Build hooks env.
6788

6789
    This runs on the master, the primary and all the secondaries.
6790

6791
    """
6792
    return {
6793
      "NODE_NAME": self.op.node_name,
6794
      }
6795

    
6796
  def BuildHooksNodes(self):
6797
    """Build hooks nodes.
6798

6799
    """
6800
    nl = [self.cfg.GetMasterNode()]
6801
    return (nl, nl)
6802

    
6803
  def CheckPrereq(self):
6804
    pass
6805

    
6806
  def Exec(self, feedback_fn):
6807
    # Prepare jobs for migration instances
6808
    jobs = [
6809
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
6810
                                 mode=self.op.mode,
6811
                                 live=self.op.live,
6812
                                 iallocator=self.op.iallocator,
6813
                                 target_node=self.op.target_node)]
6814
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6815
      ]
6816

    
6817
    # TODO: Run iallocator in this opcode and pass correct placement options to
6818
    # OpInstanceMigrate. Since other jobs can modify the cluster between
6819
    # running the iallocator and the actual migration, a good consistency model
6820
    # will have to be found.
6821

    
6822
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
6823
            frozenset([self.op.node_name]))
6824

    
6825
    return ResultWithJobs(jobs)
6826

    
6827

    
6828
class TLMigrateInstance(Tasklet):
6829
  """Tasklet class for instance migration.
6830

6831
  @type live: boolean
6832
  @ivar live: whether the migration will be done live or non-live;
6833
      this variable is initalized only after CheckPrereq has run
6834
  @type cleanup: boolean
6835
  @ivar cleanup: Wheater we cleanup from a failed migration
6836
  @type iallocator: string
6837
  @ivar iallocator: The iallocator used to determine target_node
6838
  @type target_node: string
6839
  @ivar target_node: If given, the target_node to reallocate the instance to
6840
  @type failover: boolean
6841
  @ivar failover: Whether operation results in failover or migration
6842
  @type fallback: boolean
6843
  @ivar fallback: Whether fallback to failover is allowed if migration not
6844
                  possible
6845
  @type ignore_consistency: boolean
6846
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6847
                            and target node
6848
  @type shutdown_timeout: int
6849
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6850

6851
  """
6852
  def __init__(self, lu, instance_name, cleanup=False,
6853
               failover=False, fallback=False,
6854
               ignore_consistency=False,
6855
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6856
    """Initializes this class.
6857

6858
    """
6859
    Tasklet.__init__(self, lu)
6860

    
6861
    # Parameters
6862
    self.instance_name = instance_name
6863
    self.cleanup = cleanup
6864
    self.live = False # will be overridden later
6865
    self.failover = failover
6866
    self.fallback = fallback
6867
    self.ignore_consistency = ignore_consistency
6868
    self.shutdown_timeout = shutdown_timeout
6869

    
6870
  def CheckPrereq(self):
6871
    """Check prerequisites.
6872

6873
    This checks that the instance is in the cluster.
6874

6875
    """
6876
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6877
    instance = self.cfg.GetInstanceInfo(instance_name)
6878
    assert instance is not None
6879
    self.instance = instance
6880

    
6881
    if (not self.cleanup and not instance.admin_up and not self.failover and
6882
        self.fallback):
6883
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6884
                      " to failover")
6885
      self.failover = True
6886

    
6887
    if instance.disk_template not in constants.DTS_MIRRORED:
6888
      if self.failover:
6889
        text = "failovers"
6890
      else:
6891
        text = "migrations"
6892
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6893
                                 " %s" % (instance.disk_template, text),
6894
                                 errors.ECODE_STATE)
6895

    
6896
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6897
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6898

    
6899
      if self.lu.op.iallocator:
6900
        self._RunAllocator()
6901
      else:
6902
        # We set set self.target_node as it is required by
6903
        # BuildHooksEnv
6904
        self.target_node = self.lu.op.target_node
6905

    
6906
      # self.target_node is already populated, either directly or by the
6907
      # iallocator run
6908
      target_node = self.target_node
6909
      if self.target_node == instance.primary_node:
6910
        raise errors.OpPrereqError("Cannot migrate instance %s"
6911
                                   " to its primary (%s)" %
6912
                                   (instance.name, instance.primary_node))
6913

    
6914
      if len(self.lu.tasklets) == 1:
6915
        # It is safe to release locks only when we're the only tasklet
6916
        # in the LU
6917
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6918
                      keep=[instance.primary_node, self.target_node])
6919

    
6920
    else:
6921
      secondary_nodes = instance.secondary_nodes
6922
      if not secondary_nodes:
6923
        raise errors.ConfigurationError("No secondary node but using"
6924
                                        " %s disk template" %
6925
                                        instance.disk_template)
6926
      target_node = secondary_nodes[0]
6927
      if self.lu.op.iallocator or (self.lu.op.target_node and
6928
                                   self.lu.op.target_node != target_node):
6929
        if self.failover:
6930
          text = "failed over"
6931
        else:
6932
          text = "migrated"
6933
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6934
                                   " be %s to arbitrary nodes"
6935
                                   " (neither an iallocator nor a target"
6936
                                   " node can be passed)" %
6937
                                   (instance.disk_template, text),
6938
                                   errors.ECODE_INVAL)
6939

    
6940
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6941

    
6942
    # check memory requirements on the secondary node
6943
    if not self.failover or instance.admin_up:
6944
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6945
                           instance.name, i_be[constants.BE_MEMORY],
6946
                           instance.hypervisor)
6947
    else:
6948
      self.lu.LogInfo("Not checking memory on the secondary node as"
6949
                      " instance will not be started")
6950

    
6951
    # check bridge existance
6952
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6953

    
6954
    if not self.cleanup:
6955
      _CheckNodeNotDrained(self.lu, target_node)
6956
      if not self.failover:
6957
        result = self.rpc.call_instance_migratable(instance.primary_node,
6958
                                                   instance)
6959
        if result.fail_msg and self.fallback:
6960
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6961
                          " failover")
6962
          self.failover = True
6963
        else:
6964
          result.Raise("Can't migrate, please use failover",
6965
                       prereq=True, ecode=errors.ECODE_STATE)
6966

    
6967
    assert not (self.failover and self.cleanup)
6968

    
6969
    if not self.failover:
6970
      if self.lu.op.live is not None and self.lu.op.mode is not None:
6971
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6972
                                   " parameters are accepted",
6973
                                   errors.ECODE_INVAL)
6974
      if self.lu.op.live is not None:
6975
        if self.lu.op.live:
6976
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
6977
        else:
6978
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6979
        # reset the 'live' parameter to None so that repeated
6980
        # invocations of CheckPrereq do not raise an exception
6981
        self.lu.op.live = None
6982
      elif self.lu.op.mode is None:
6983
        # read the default value from the hypervisor
6984
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6985
                                                skip_globals=False)
6986
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6987

    
6988
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6989
    else:
6990
      # Failover is never live
6991
      self.live = False
6992

    
6993
  def _RunAllocator(self):
6994
    """Run the allocator based on input opcode.
6995

6996
    """
6997
    ial = IAllocator(self.cfg, self.rpc,
6998
                     mode=constants.IALLOCATOR_MODE_RELOC,
6999
                     name=self.instance_name,
7000
                     # TODO See why hail breaks with a single node below
7001
                     relocate_from=[self.instance.primary_node,
7002
                                    self.instance.primary_node],
7003
                     )
7004

    
7005
    ial.Run(self.lu.op.iallocator)
7006

    
7007
    if not ial.success:
7008
      raise errors.OpPrereqError("Can't compute nodes using"
7009
                                 " iallocator '%s': %s" %
7010
                                 (self.lu.op.iallocator, ial.info),
7011
                                 errors.ECODE_NORES)
7012
    if len(ial.result) != ial.required_nodes:
7013
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7014
                                 " of nodes (%s), required %s" %
7015
                                 (self.lu.op.iallocator, len(ial.result),
7016
                                  ial.required_nodes), errors.ECODE_FAULT)
7017
    self.target_node = ial.result[0]
7018
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7019
                 self.instance_name, self.lu.op.iallocator,
7020
                 utils.CommaJoin(ial.result))
7021

    
7022
  def _WaitUntilSync(self):
7023
    """Poll with custom rpc for disk sync.
7024

7025
    This uses our own step-based rpc call.
7026

7027
    """
7028
    self.feedback_fn("* wait until resync is done")
7029
    all_done = False
7030
    while not all_done:
7031
      all_done = True
7032
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7033
                                            self.nodes_ip,
7034
                                            self.instance.disks)
7035
      min_percent = 100
7036
      for node, nres in result.items():
7037
        nres.Raise("Cannot resync disks on node %s" % node)
7038
        node_done, node_percent = nres.payload
7039
        all_done = all_done and node_done
7040
        if node_percent is not None:
7041
          min_percent = min(min_percent, node_percent)
7042
      if not all_done:
7043
        if min_percent < 100:
7044
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7045
        time.sleep(2)
7046

    
7047
  def _EnsureSecondary(self, node):
7048
    """Demote a node to secondary.
7049

7050
    """
7051
    self.feedback_fn("* switching node %s to secondary mode" % node)
7052

    
7053
    for dev in self.instance.disks:
7054
      self.cfg.SetDiskID(dev, node)
7055

    
7056
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7057
                                          self.instance.disks)
7058
    result.Raise("Cannot change disk to secondary on node %s" % node)
7059

    
7060
  def _GoStandalone(self):
7061
    """Disconnect from the network.
7062

7063
    """
7064
    self.feedback_fn("* changing into standalone mode")
7065
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7066
                                               self.instance.disks)
7067
    for node, nres in result.items():
7068
      nres.Raise("Cannot disconnect disks node %s" % node)
7069

    
7070
  def _GoReconnect(self, multimaster):
7071
    """Reconnect to the network.
7072

7073
    """
7074
    if multimaster:
7075
      msg = "dual-master"
7076
    else:
7077
      msg = "single-master"
7078
    self.feedback_fn("* changing disks into %s mode" % msg)
7079
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7080
                                           self.instance.disks,
7081
                                           self.instance.name, multimaster)
7082
    for node, nres in result.items():
7083
      nres.Raise("Cannot change disks config on node %s" % node)
7084

    
7085
  def _ExecCleanup(self):
7086
    """Try to cleanup after a failed migration.
7087

7088
    The cleanup is done by:
7089
      - check that the instance is running only on one node
7090
        (and update the config if needed)
7091
      - change disks on its secondary node to secondary
7092
      - wait until disks are fully synchronized
7093
      - disconnect from the network
7094
      - change disks into single-master mode
7095
      - wait again until disks are fully synchronized
7096

7097
    """
7098
    instance = self.instance
7099
    target_node = self.target_node
7100
    source_node = self.source_node
7101

    
7102
    # check running on only one node
7103
    self.feedback_fn("* checking where the instance actually runs"
7104
                     " (if this hangs, the hypervisor might be in"
7105
                     " a bad state)")
7106
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7107
    for node, result in ins_l.items():
7108
      result.Raise("Can't contact node %s" % node)
7109

    
7110
    runningon_source = instance.name in ins_l[source_node].payload
7111
    runningon_target = instance.name in ins_l[target_node].payload
7112

    
7113
    if runningon_source and runningon_target:
7114
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7115
                               " or the hypervisor is confused; you will have"
7116
                               " to ensure manually that it runs only on one"
7117
                               " and restart this operation")
7118

    
7119
    if not (runningon_source or runningon_target):
7120
      raise errors.OpExecError("Instance does not seem to be running at all;"
7121
                               " in this case it's safer to repair by"
7122
                               " running 'gnt-instance stop' to ensure disk"
7123
                               " shutdown, and then restarting it")
7124

    
7125
    if runningon_target:
7126
      # the migration has actually succeeded, we need to update the config
7127
      self.feedback_fn("* instance running on secondary node (%s),"
7128
                       " updating config" % target_node)
7129
      instance.primary_node = target_node
7130
      self.cfg.Update(instance, self.feedback_fn)
7131
      demoted_node = source_node
7132
    else:
7133
      self.feedback_fn("* instance confirmed to be running on its"
7134
                       " primary node (%s)" % source_node)
7135
      demoted_node = target_node
7136

    
7137
    if instance.disk_template in constants.DTS_INT_MIRROR:
7138
      self._EnsureSecondary(demoted_node)
7139
      try:
7140
        self._WaitUntilSync()
7141
      except errors.OpExecError:
7142
        # we ignore here errors, since if the device is standalone, it
7143
        # won't be able to sync
7144
        pass
7145
      self._GoStandalone()
7146
      self._GoReconnect(False)
7147
      self._WaitUntilSync()
7148

    
7149
    self.feedback_fn("* done")
7150

    
7151
  def _RevertDiskStatus(self):
7152
    """Try to revert the disk status after a failed migration.
7153

7154
    """
7155
    target_node = self.target_node
7156
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7157
      return
7158

    
7159
    try:
7160
      self._EnsureSecondary(target_node)
7161
      self._GoStandalone()
7162
      self._GoReconnect(False)
7163
      self._WaitUntilSync()
7164
    except errors.OpExecError, err:
7165
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7166
                         " please try to recover the instance manually;"
7167
                         " error '%s'" % str(err))
7168

    
7169
  def _AbortMigration(self):
7170
    """Call the hypervisor code to abort a started migration.
7171

7172
    """
7173
    instance = self.instance
7174
    target_node = self.target_node
7175
    migration_info = self.migration_info
7176

    
7177
    abort_result = self.rpc.call_finalize_migration(target_node,
7178
                                                    instance,
7179
                                                    migration_info,
7180
                                                    False)
7181
    abort_msg = abort_result.fail_msg
7182
    if abort_msg:
7183
      logging.error("Aborting migration failed on target node %s: %s",
7184
                    target_node, abort_msg)
7185
      # Don't raise an exception here, as we stil have to try to revert the
7186
      # disk status, even if this step failed.
7187

    
7188
  def _ExecMigration(self):
7189
    """Migrate an instance.
7190

7191
    The migrate is done by:
7192
      - change the disks into dual-master mode
7193
      - wait until disks are fully synchronized again
7194
      - migrate the instance
7195
      - change disks on the new secondary node (the old primary) to secondary
7196
      - wait until disks are fully synchronized
7197
      - change disks into single-master mode
7198

7199
    """
7200
    instance = self.instance
7201
    target_node = self.target_node
7202
    source_node = self.source_node
7203

    
7204
    self.feedback_fn("* checking disk consistency between source and target")
7205
    for dev in instance.disks:
7206
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7207
        raise errors.OpExecError("Disk %s is degraded or not fully"
7208
                                 " synchronized on target node,"
7209
                                 " aborting migration" % dev.iv_name)
7210

    
7211
    # First get the migration information from the remote node
7212
    result = self.rpc.call_migration_info(source_node, instance)
7213
    msg = result.fail_msg
7214
    if msg:
7215
      log_err = ("Failed fetching source migration information from %s: %s" %
7216
                 (source_node, msg))
7217
      logging.error(log_err)
7218
      raise errors.OpExecError(log_err)
7219

    
7220
    self.migration_info = migration_info = result.payload
7221

    
7222
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7223
      # Then switch the disks to master/master mode
7224
      self._EnsureSecondary(target_node)
7225
      self._GoStandalone()
7226
      self._GoReconnect(True)
7227
      self._WaitUntilSync()
7228

    
7229
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7230
    result = self.rpc.call_accept_instance(target_node,
7231
                                           instance,
7232
                                           migration_info,
7233
                                           self.nodes_ip[target_node])
7234

    
7235
    msg = result.fail_msg
7236
    if msg:
7237
      logging.error("Instance pre-migration failed, trying to revert"
7238
                    " disk status: %s", msg)
7239
      self.feedback_fn("Pre-migration failed, aborting")
7240
      self._AbortMigration()
7241
      self._RevertDiskStatus()
7242
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7243
                               (instance.name, msg))
7244

    
7245
    self.feedback_fn("* migrating instance to %s" % target_node)
7246
    result = self.rpc.call_instance_migrate(source_node, instance,
7247
                                            self.nodes_ip[target_node],
7248
                                            self.live)
7249
    msg = result.fail_msg
7250
    if msg:
7251
      logging.error("Instance migration failed, trying to revert"
7252
                    " disk status: %s", msg)
7253
      self.feedback_fn("Migration failed, aborting")
7254
      self._AbortMigration()
7255
      self._RevertDiskStatus()
7256
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7257
                               (instance.name, msg))
7258

    
7259
    instance.primary_node = target_node
7260
    # distribute new instance config to the other nodes
7261
    self.cfg.Update(instance, self.feedback_fn)
7262

    
7263
    result = self.rpc.call_finalize_migration(target_node,
7264
                                              instance,
7265
                                              migration_info,
7266
                                              True)
7267
    msg = result.fail_msg
7268
    if msg:
7269
      logging.error("Instance migration succeeded, but finalization failed:"
7270
                    " %s", msg)
7271
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7272
                               msg)
7273

    
7274
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7275
      self._EnsureSecondary(source_node)
7276
      self._WaitUntilSync()
7277
      self._GoStandalone()
7278
      self._GoReconnect(False)
7279
      self._WaitUntilSync()
7280

    
7281
    self.feedback_fn("* done")
7282

    
7283
  def _ExecFailover(self):
7284
    """Failover an instance.
7285

7286
    The failover is done by shutting it down on its present node and
7287
    starting it on the secondary.
7288

7289
    """
7290
    instance = self.instance
7291
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7292

    
7293
    source_node = instance.primary_node
7294
    target_node = self.target_node
7295

    
7296
    if instance.admin_up:
7297
      self.feedback_fn("* checking disk consistency between source and target")
7298
      for dev in instance.disks:
7299
        # for drbd, these are drbd over lvm
7300
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7301
          if primary_node.offline:
7302
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7303
                             " target node %s" %
7304
                             (primary_node.name, dev.iv_name, target_node))
7305
          elif not self.ignore_consistency:
7306
            raise errors.OpExecError("Disk %s is degraded on target node,"
7307
                                     " aborting failover" % dev.iv_name)
7308
    else:
7309
      self.feedback_fn("* not checking disk consistency as instance is not"
7310
                       " running")
7311

    
7312
    self.feedback_fn("* shutting down instance on source node")
7313
    logging.info("Shutting down instance %s on node %s",
7314
                 instance.name, source_node)
7315

    
7316
    result = self.rpc.call_instance_shutdown(source_node, instance,
7317
                                             self.shutdown_timeout)
7318
    msg = result.fail_msg
7319
    if msg:
7320
      if self.ignore_consistency or primary_node.offline:
7321
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7322
                           " proceeding anyway; please make sure node"
7323
                           " %s is down; error details: %s",
7324
                           instance.name, source_node, source_node, msg)
7325
      else:
7326
        raise errors.OpExecError("Could not shutdown instance %s on"
7327
                                 " node %s: %s" %
7328
                                 (instance.name, source_node, msg))
7329

    
7330
    self.feedback_fn("* deactivating the instance's disks on source node")
7331
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7332
      raise errors.OpExecError("Can't shut down the instance's disks")
7333

    
7334
    instance.primary_node = target_node
7335
    # distribute new instance config to the other nodes
7336
    self.cfg.Update(instance, self.feedback_fn)
7337

    
7338
    # Only start the instance if it's marked as up
7339
    if instance.admin_up:
7340
      self.feedback_fn("* activating the instance's disks on target node %s" %
7341
                       target_node)
7342
      logging.info("Starting instance %s on node %s",
7343
                   instance.name, target_node)
7344

    
7345
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7346
                                           ignore_secondaries=True)
7347
      if not disks_ok:
7348
        _ShutdownInstanceDisks(self.lu, instance)
7349
        raise errors.OpExecError("Can't activate the instance's disks")
7350

    
7351
      self.feedback_fn("* starting the instance on the target node %s" %
7352
                       target_node)
7353
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7354
                                            False)
7355
      msg = result.fail_msg
7356
      if msg:
7357
        _ShutdownInstanceDisks(self.lu, instance)
7358
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7359
                                 (instance.name, target_node, msg))
7360

    
7361
  def Exec(self, feedback_fn):
7362
    """Perform the migration.
7363

7364
    """
7365
    self.feedback_fn = feedback_fn
7366
    self.source_node = self.instance.primary_node
7367

    
7368
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7369
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7370
      self.target_node = self.instance.secondary_nodes[0]
7371
      # Otherwise self.target_node has been populated either
7372
      # directly, or through an iallocator.
7373

    
7374
    self.all_nodes = [self.source_node, self.target_node]
7375
    self.nodes_ip = {
7376
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
7377
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
7378
      }
7379

    
7380
    if self.failover:
7381
      feedback_fn("Failover instance %s" % self.instance.name)
7382
      self._ExecFailover()
7383
    else:
7384
      feedback_fn("Migrating instance %s" % self.instance.name)
7385

    
7386
      if self.cleanup:
7387
        return self._ExecCleanup()
7388
      else:
7389
        return self._ExecMigration()
7390

    
7391

    
7392
def _CreateBlockDev(lu, node, instance, device, force_create,
7393
                    info, force_open):
7394
  """Create a tree of block devices on a given node.
7395

7396
  If this device type has to be created on secondaries, create it and
7397
  all its children.
7398

7399
  If not, just recurse to children keeping the same 'force' value.
7400

7401
  @param lu: the lu on whose behalf we execute
7402
  @param node: the node on which to create the device
7403
  @type instance: L{objects.Instance}
7404
  @param instance: the instance which owns the device
7405
  @type device: L{objects.Disk}
7406
  @param device: the device to create
7407
  @type force_create: boolean
7408
  @param force_create: whether to force creation of this device; this
7409
      will be change to True whenever we find a device which has
7410
      CreateOnSecondary() attribute
7411
  @param info: the extra 'metadata' we should attach to the device
7412
      (this will be represented as a LVM tag)
7413
  @type force_open: boolean
7414
  @param force_open: this parameter will be passes to the
7415
      L{backend.BlockdevCreate} function where it specifies
7416
      whether we run on primary or not, and it affects both
7417
      the child assembly and the device own Open() execution
7418

7419
  """
7420
  if device.CreateOnSecondary():
7421
    force_create = True
7422

    
7423
  if device.children:
7424
    for child in device.children:
7425
      _CreateBlockDev(lu, node, instance, child, force_create,
7426
                      info, force_open)
7427

    
7428
  if not force_create:
7429
    return
7430

    
7431
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7432

    
7433

    
7434
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7435
  """Create a single block device on a given node.
7436

7437
  This will not recurse over children of the device, so they must be
7438
  created in advance.
7439

7440
  @param lu: the lu on whose behalf we execute
7441
  @param node: the node on which to create the device
7442
  @type instance: L{objects.Instance}
7443
  @param instance: the instance which owns the device
7444
  @type device: L{objects.Disk}
7445
  @param device: the device to create
7446
  @param info: the extra 'metadata' we should attach to the device
7447
      (this will be represented as a LVM tag)
7448
  @type force_open: boolean
7449
  @param force_open: this parameter will be passes to the
7450
      L{backend.BlockdevCreate} function where it specifies
7451
      whether we run on primary or not, and it affects both
7452
      the child assembly and the device own Open() execution
7453

7454
  """
7455
  lu.cfg.SetDiskID(device, node)
7456
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7457
                                       instance.name, force_open, info)
7458
  result.Raise("Can't create block device %s on"
7459
               " node %s for instance %s" % (device, node, instance.name))
7460
  if device.physical_id is None:
7461
    device.physical_id = result.payload
7462

    
7463

    
7464
def _GenerateUniqueNames(lu, exts):
7465
  """Generate a suitable LV name.
7466

7467
  This will generate a logical volume name for the given instance.
7468

7469
  """
7470
  results = []
7471
  for val in exts:
7472
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7473
    results.append("%s%s" % (new_id, val))
7474
  return results
7475

    
7476

    
7477
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7478
                         iv_name, p_minor, s_minor):
7479
  """Generate a drbd8 device complete with its children.
7480

7481
  """
7482
  assert len(vgnames) == len(names) == 2
7483
  port = lu.cfg.AllocatePort()
7484
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7485
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7486
                          logical_id=(vgnames[0], names[0]))
7487
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7488
                          logical_id=(vgnames[1], names[1]))
7489
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7490
                          logical_id=(primary, secondary, port,
7491
                                      p_minor, s_minor,
7492
                                      shared_secret),
7493
                          children=[dev_data, dev_meta],
7494
                          iv_name=iv_name)
7495
  return drbd_dev
7496

    
7497

    
7498
def _GenerateDiskTemplate(lu, template_name,
7499
                          instance_name, primary_node,
7500
                          secondary_nodes, disk_info,
7501
                          file_storage_dir, file_driver,
7502
                          base_index, feedback_fn):
7503
  """Generate the entire disk layout for a given template type.
7504

7505
  """
7506
  #TODO: compute space requirements
7507

    
7508
  vgname = lu.cfg.GetVGName()
7509
  disk_count = len(disk_info)
7510
  disks = []
7511
  if template_name == constants.DT_DISKLESS:
7512
    pass
7513
  elif template_name == constants.DT_PLAIN:
7514
    if len(secondary_nodes) != 0:
7515
      raise errors.ProgrammerError("Wrong template configuration")
7516

    
7517
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7518
                                      for i in range(disk_count)])
7519
    for idx, disk in enumerate(disk_info):
7520
      disk_index = idx + base_index
7521
      vg = disk.get(constants.IDISK_VG, vgname)
7522
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7523
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7524
                              size=disk[constants.IDISK_SIZE],
7525
                              logical_id=(vg, names[idx]),
7526
                              iv_name="disk/%d" % disk_index,
7527
                              mode=disk[constants.IDISK_MODE])
7528
      disks.append(disk_dev)
7529
  elif template_name == constants.DT_DRBD8:
7530
    if len(secondary_nodes) != 1:
7531
      raise errors.ProgrammerError("Wrong template configuration")
7532
    remote_node = secondary_nodes[0]
7533
    minors = lu.cfg.AllocateDRBDMinor(
7534
      [primary_node, remote_node] * len(disk_info), instance_name)
7535

    
7536
    names = []
7537
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7538
                                               for i in range(disk_count)]):
7539
      names.append(lv_prefix + "_data")
7540
      names.append(lv_prefix + "_meta")
7541
    for idx, disk in enumerate(disk_info):
7542
      disk_index = idx + base_index
7543
      data_vg = disk.get(constants.IDISK_VG, vgname)
7544
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7545
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7546
                                      disk[constants.IDISK_SIZE],
7547
                                      [data_vg, meta_vg],
7548
                                      names[idx * 2:idx * 2 + 2],
7549
                                      "disk/%d" % disk_index,
7550
                                      minors[idx * 2], minors[idx * 2 + 1])
7551
      disk_dev.mode = disk[constants.IDISK_MODE]
7552
      disks.append(disk_dev)
7553
  elif template_name == constants.DT_FILE:
7554
    if len(secondary_nodes) != 0:
7555
      raise errors.ProgrammerError("Wrong template configuration")
7556

    
7557
    opcodes.RequireFileStorage()
7558

    
7559
    for idx, disk in enumerate(disk_info):
7560
      disk_index = idx + base_index
7561
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7562
                              size=disk[constants.IDISK_SIZE],
7563
                              iv_name="disk/%d" % disk_index,
7564
                              logical_id=(file_driver,
7565
                                          "%s/disk%d" % (file_storage_dir,
7566
                                                         disk_index)),
7567
                              mode=disk[constants.IDISK_MODE])
7568
      disks.append(disk_dev)
7569
  elif template_name == constants.DT_SHARED_FILE:
7570
    if len(secondary_nodes) != 0:
7571
      raise errors.ProgrammerError("Wrong template configuration")
7572

    
7573
    opcodes.RequireSharedFileStorage()
7574

    
7575
    for idx, disk in enumerate(disk_info):
7576
      disk_index = idx + base_index
7577
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7578
                              size=disk[constants.IDISK_SIZE],
7579
                              iv_name="disk/%d" % disk_index,
7580
                              logical_id=(file_driver,
7581
                                          "%s/disk%d" % (file_storage_dir,
7582
                                                         disk_index)),
7583
                              mode=disk[constants.IDISK_MODE])
7584
      disks.append(disk_dev)
7585
  elif template_name == constants.DT_BLOCK:
7586
    if len(secondary_nodes) != 0:
7587
      raise errors.ProgrammerError("Wrong template configuration")
7588

    
7589
    for idx, disk in enumerate(disk_info):
7590
      disk_index = idx + base_index
7591
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7592
                              size=disk[constants.IDISK_SIZE],
7593
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7594
                                          disk[constants.IDISK_ADOPT]),
7595
                              iv_name="disk/%d" % disk_index,
7596
                              mode=disk[constants.IDISK_MODE])
7597
      disks.append(disk_dev)
7598

    
7599
  else:
7600
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7601
  return disks
7602

    
7603

    
7604
def _GetInstanceInfoText(instance):
7605
  """Compute that text that should be added to the disk's metadata.
7606

7607
  """
7608
  return "originstname+%s" % instance.name
7609

    
7610

    
7611
def _CalcEta(time_taken, written, total_size):
7612
  """Calculates the ETA based on size written and total size.
7613

7614
  @param time_taken: The time taken so far
7615
  @param written: amount written so far
7616
  @param total_size: The total size of data to be written
7617
  @return: The remaining time in seconds
7618

7619
  """
7620
  avg_time = time_taken / float(written)
7621
  return (total_size - written) * avg_time
7622

    
7623

    
7624
def _WipeDisks(lu, instance):
7625
  """Wipes instance disks.
7626

7627
  @type lu: L{LogicalUnit}
7628
  @param lu: the logical unit on whose behalf we execute
7629
  @type instance: L{objects.Instance}
7630
  @param instance: the instance whose disks we should create
7631
  @return: the success of the wipe
7632

7633
  """
7634
  node = instance.primary_node
7635

    
7636
  for device in instance.disks:
7637
    lu.cfg.SetDiskID(device, node)
7638

    
7639
  logging.info("Pause sync of instance %s disks", instance.name)
7640
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7641

    
7642
  for idx, success in enumerate(result.payload):
7643
    if not success:
7644
      logging.warn("pause-sync of instance %s for disks %d failed",
7645
                   instance.name, idx)
7646

    
7647
  try:
7648
    for idx, device in enumerate(instance.disks):
7649
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7650
      # MAX_WIPE_CHUNK at max
7651
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7652
                            constants.MIN_WIPE_CHUNK_PERCENT)
7653
      # we _must_ make this an int, otherwise rounding errors will
7654
      # occur
7655
      wipe_chunk_size = int(wipe_chunk_size)
7656

    
7657
      lu.LogInfo("* Wiping disk %d", idx)
7658
      logging.info("Wiping disk %d for instance %s, node %s using"
7659
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7660

    
7661
      offset = 0
7662
      size = device.size
7663
      last_output = 0
7664
      start_time = time.time()
7665

    
7666
      while offset < size:
7667
        wipe_size = min(wipe_chunk_size, size - offset)
7668
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7669
                      idx, offset, wipe_size)
7670
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7671
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7672
                     (idx, offset, wipe_size))
7673
        now = time.time()
7674
        offset += wipe_size
7675
        if now - last_output >= 60:
7676
          eta = _CalcEta(now - start_time, offset, size)
7677
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7678
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7679
          last_output = now
7680
  finally:
7681
    logging.info("Resume sync of instance %s disks", instance.name)
7682

    
7683
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7684

    
7685
    for idx, success in enumerate(result.payload):
7686
      if not success:
7687
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7688
                      " look at the status and troubleshoot the issue", idx)
7689
        logging.warn("resume-sync of instance %s for disks %d failed",
7690
                     instance.name, idx)
7691

    
7692

    
7693
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7694
  """Create all disks for an instance.
7695

7696
  This abstracts away some work from AddInstance.
7697

7698
  @type lu: L{LogicalUnit}
7699
  @param lu: the logical unit on whose behalf we execute
7700
  @type instance: L{objects.Instance}
7701
  @param instance: the instance whose disks we should create
7702
  @type to_skip: list
7703
  @param to_skip: list of indices to skip
7704
  @type target_node: string
7705
  @param target_node: if passed, overrides the target node for creation
7706
  @rtype: boolean
7707
  @return: the success of the creation
7708

7709
  """
7710
  info = _GetInstanceInfoText(instance)
7711
  if target_node is None:
7712
    pnode = instance.primary_node
7713
    all_nodes = instance.all_nodes
7714
  else:
7715
    pnode = target_node
7716
    all_nodes = [pnode]
7717

    
7718
  if instance.disk_template in constants.DTS_FILEBASED:
7719
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7720
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7721

    
7722
    result.Raise("Failed to create directory '%s' on"
7723
                 " node %s" % (file_storage_dir, pnode))
7724

    
7725
  # Note: this needs to be kept in sync with adding of disks in
7726
  # LUInstanceSetParams
7727
  for idx, device in enumerate(instance.disks):
7728
    if to_skip and idx in to_skip:
7729
      continue
7730
    logging.info("Creating volume %s for instance %s",
7731
                 device.iv_name, instance.name)
7732
    #HARDCODE
7733
    for node in all_nodes:
7734
      f_create = node == pnode
7735
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7736

    
7737

    
7738
def _RemoveDisks(lu, instance, target_node=None):
7739
  """Remove all disks for an instance.
7740

7741
  This abstracts away some work from `AddInstance()` and
7742
  `RemoveInstance()`. Note that in case some of the devices couldn't
7743
  be removed, the removal will continue with the other ones (compare
7744
  with `_CreateDisks()`).
7745

7746
  @type lu: L{LogicalUnit}
7747
  @param lu: the logical unit on whose behalf we execute
7748
  @type instance: L{objects.Instance}
7749
  @param instance: the instance whose disks we should remove
7750
  @type target_node: string
7751
  @param target_node: used to override the node on which to remove the disks
7752
  @rtype: boolean
7753
  @return: the success of the removal
7754

7755
  """
7756
  logging.info("Removing block devices for instance %s", instance.name)
7757

    
7758
  all_result = True
7759
  for device in instance.disks:
7760
    if target_node:
7761
      edata = [(target_node, device)]
7762
    else:
7763
      edata = device.ComputeNodeTree(instance.primary_node)
7764
    for node, disk in edata:
7765
      lu.cfg.SetDiskID(disk, node)
7766
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7767
      if msg:
7768
        lu.LogWarning("Could not remove block device %s on node %s,"
7769
                      " continuing anyway: %s", device.iv_name, node, msg)
7770
        all_result = False
7771

    
7772
  if instance.disk_template == constants.DT_FILE:
7773
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7774
    if target_node:
7775
      tgt = target_node
7776
    else:
7777
      tgt = instance.primary_node
7778
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7779
    if result.fail_msg:
7780
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7781
                    file_storage_dir, instance.primary_node, result.fail_msg)
7782
      all_result = False
7783

    
7784
  return all_result
7785

    
7786

    
7787
def _ComputeDiskSizePerVG(disk_template, disks):
7788
  """Compute disk size requirements in the volume group
7789

7790
  """
7791
  def _compute(disks, payload):
7792
    """Universal algorithm.
7793

7794
    """
7795
    vgs = {}
7796
    for disk in disks:
7797
      vgs[disk[constants.IDISK_VG]] = \
7798
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7799

    
7800
    return vgs
7801

    
7802
  # Required free disk space as a function of disk and swap space
7803
  req_size_dict = {
7804
    constants.DT_DISKLESS: {},
7805
    constants.DT_PLAIN: _compute(disks, 0),
7806
    # 128 MB are added for drbd metadata for each disk
7807
    constants.DT_DRBD8: _compute(disks, 128),
7808
    constants.DT_FILE: {},
7809
    constants.DT_SHARED_FILE: {},
7810
  }
7811

    
7812
  if disk_template not in req_size_dict:
7813
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7814
                                 " is unknown" %  disk_template)
7815

    
7816
  return req_size_dict[disk_template]
7817

    
7818

    
7819
def _ComputeDiskSize(disk_template, disks):
7820
  """Compute disk size requirements in the volume group
7821

7822
  """
7823
  # Required free disk space as a function of disk and swap space
7824
  req_size_dict = {
7825
    constants.DT_DISKLESS: None,
7826
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7827
    # 128 MB are added for drbd metadata for each disk
7828
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7829
    constants.DT_FILE: None,
7830
    constants.DT_SHARED_FILE: 0,
7831
    constants.DT_BLOCK: 0,
7832
  }
7833

    
7834
  if disk_template not in req_size_dict:
7835
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7836
                                 " is unknown" %  disk_template)
7837

    
7838
  return req_size_dict[disk_template]
7839

    
7840

    
7841
def _FilterVmNodes(lu, nodenames):
7842
  """Filters out non-vm_capable nodes from a list.
7843

7844
  @type lu: L{LogicalUnit}
7845
  @param lu: the logical unit for which we check
7846
  @type nodenames: list
7847
  @param nodenames: the list of nodes on which we should check
7848
  @rtype: list
7849
  @return: the list of vm-capable nodes
7850

7851
  """
7852
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7853
  return [name for name in nodenames if name not in vm_nodes]
7854

    
7855

    
7856
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7857
  """Hypervisor parameter validation.
7858

7859
  This function abstract the hypervisor parameter validation to be
7860
  used in both instance create and instance modify.
7861

7862
  @type lu: L{LogicalUnit}
7863
  @param lu: the logical unit for which we check
7864
  @type nodenames: list
7865
  @param nodenames: the list of nodes on which we should check
7866
  @type hvname: string
7867
  @param hvname: the name of the hypervisor we should use
7868
  @type hvparams: dict
7869
  @param hvparams: the parameters which we need to check
7870
  @raise errors.OpPrereqError: if the parameters are not valid
7871

7872
  """
7873
  nodenames = _FilterVmNodes(lu, nodenames)
7874
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7875
                                                  hvname,
7876
                                                  hvparams)
7877
  for node in nodenames:
7878
    info = hvinfo[node]
7879
    if info.offline:
7880
      continue
7881
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7882

    
7883

    
7884
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7885
  """OS parameters validation.
7886

7887
  @type lu: L{LogicalUnit}
7888
  @param lu: the logical unit for which we check
7889
  @type required: boolean
7890
  @param required: whether the validation should fail if the OS is not
7891
      found
7892
  @type nodenames: list
7893
  @param nodenames: the list of nodes on which we should check
7894
  @type osname: string
7895
  @param osname: the name of the hypervisor we should use
7896
  @type osparams: dict
7897
  @param osparams: the parameters which we need to check
7898
  @raise errors.OpPrereqError: if the parameters are not valid
7899

7900
  """
7901
  nodenames = _FilterVmNodes(lu, nodenames)
7902
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7903
                                   [constants.OS_VALIDATE_PARAMETERS],
7904
                                   osparams)
7905
  for node, nres in result.items():
7906
    # we don't check for offline cases since this should be run only
7907
    # against the master node and/or an instance's nodes
7908
    nres.Raise("OS Parameters validation failed on node %s" % node)
7909
    if not nres.payload:
7910
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7911
                 osname, node)
7912

    
7913

    
7914
class LUInstanceCreate(LogicalUnit):
7915
  """Create an instance.
7916

7917
  """
7918
  HPATH = "instance-add"
7919
  HTYPE = constants.HTYPE_INSTANCE
7920
  REQ_BGL = False
7921

    
7922
  def CheckArguments(self):
7923
    """Check arguments.
7924

7925
    """
7926
    # do not require name_check to ease forward/backward compatibility
7927
    # for tools
7928
    if self.op.no_install and self.op.start:
7929
      self.LogInfo("No-installation mode selected, disabling startup")
7930
      self.op.start = False
7931
    # validate/normalize the instance name
7932
    self.op.instance_name = \
7933
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7934

    
7935
    if self.op.ip_check and not self.op.name_check:
7936
      # TODO: make the ip check more flexible and not depend on the name check
7937
      raise errors.OpPrereqError("Cannot do IP address check without a name"
7938
                                 " check", errors.ECODE_INVAL)
7939

    
7940
    # check nics' parameter names
7941
    for nic in self.op.nics:
7942
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7943

    
7944
    # check disks. parameter names and consistent adopt/no-adopt strategy
7945
    has_adopt = has_no_adopt = False
7946
    for disk in self.op.disks:
7947
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7948
      if constants.IDISK_ADOPT in disk:
7949
        has_adopt = True
7950
      else:
7951
        has_no_adopt = True
7952
    if has_adopt and has_no_adopt:
7953
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7954
                                 errors.ECODE_INVAL)
7955
    if has_adopt:
7956
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7957
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7958
                                   " '%s' disk template" %
7959
                                   self.op.disk_template,
7960
                                   errors.ECODE_INVAL)
7961
      if self.op.iallocator is not None:
7962
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7963
                                   " iallocator script", errors.ECODE_INVAL)
7964
      if self.op.mode == constants.INSTANCE_IMPORT:
7965
        raise errors.OpPrereqError("Disk adoption not allowed for"
7966
                                   " instance import", errors.ECODE_INVAL)
7967
    else:
7968
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
7969
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7970
                                   " but no 'adopt' parameter given" %
7971
                                   self.op.disk_template,
7972
                                   errors.ECODE_INVAL)
7973

    
7974
    self.adopt_disks = has_adopt
7975

    
7976
    # instance name verification
7977
    if self.op.name_check:
7978
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7979
      self.op.instance_name = self.hostname1.name
7980
      # used in CheckPrereq for ip ping check
7981
      self.check_ip = self.hostname1.ip
7982
    else:
7983
      self.check_ip = None
7984

    
7985
    # file storage checks
7986
    if (self.op.file_driver and
7987
        not self.op.file_driver in constants.FILE_DRIVER):
7988
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7989
                                 self.op.file_driver, errors.ECODE_INVAL)
7990

    
7991
    if self.op.disk_template == constants.DT_FILE:
7992
      opcodes.RequireFileStorage()
7993
    elif self.op.disk_template == constants.DT_SHARED_FILE:
7994
      opcodes.RequireSharedFileStorage()
7995

    
7996
    ### Node/iallocator related checks
7997
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7998

    
7999
    if self.op.pnode is not None:
8000
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8001
        if self.op.snode is None:
8002
          raise errors.OpPrereqError("The networked disk templates need"
8003
                                     " a mirror node", errors.ECODE_INVAL)
8004
      elif self.op.snode:
8005
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8006
                        " template")
8007
        self.op.snode = None
8008

    
8009
    self._cds = _GetClusterDomainSecret()
8010

    
8011
    if self.op.mode == constants.INSTANCE_IMPORT:
8012
      # On import force_variant must be True, because if we forced it at
8013
      # initial install, our only chance when importing it back is that it
8014
      # works again!
8015
      self.op.force_variant = True
8016

    
8017
      if self.op.no_install:
8018
        self.LogInfo("No-installation mode has no effect during import")
8019

    
8020
    elif self.op.mode == constants.INSTANCE_CREATE:
8021
      if self.op.os_type is None:
8022
        raise errors.OpPrereqError("No guest OS specified",
8023
                                   errors.ECODE_INVAL)
8024
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8025
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8026
                                   " installation" % self.op.os_type,
8027
                                   errors.ECODE_STATE)
8028
      if self.op.disk_template is None:
8029
        raise errors.OpPrereqError("No disk template specified",
8030
                                   errors.ECODE_INVAL)
8031

    
8032
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8033
      # Check handshake to ensure both clusters have the same domain secret
8034
      src_handshake = self.op.source_handshake
8035
      if not src_handshake:
8036
        raise errors.OpPrereqError("Missing source handshake",
8037
                                   errors.ECODE_INVAL)
8038

    
8039
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8040
                                                           src_handshake)
8041
      if errmsg:
8042
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8043
                                   errors.ECODE_INVAL)
8044

    
8045
      # Load and check source CA
8046
      self.source_x509_ca_pem = self.op.source_x509_ca
8047
      if not self.source_x509_ca_pem:
8048
        raise errors.OpPrereqError("Missing source X509 CA",
8049
                                   errors.ECODE_INVAL)
8050

    
8051
      try:
8052
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8053
                                                    self._cds)
8054
      except OpenSSL.crypto.Error, err:
8055
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8056
                                   (err, ), errors.ECODE_INVAL)
8057

    
8058
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8059
      if errcode is not None:
8060
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8061
                                   errors.ECODE_INVAL)
8062

    
8063
      self.source_x509_ca = cert
8064

    
8065
      src_instance_name = self.op.source_instance_name
8066
      if not src_instance_name:
8067
        raise errors.OpPrereqError("Missing source instance name",
8068
                                   errors.ECODE_INVAL)
8069

    
8070
      self.source_instance_name = \
8071
          netutils.GetHostname(name=src_instance_name).name
8072

    
8073
    else:
8074
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8075
                                 self.op.mode, errors.ECODE_INVAL)
8076

    
8077
  def ExpandNames(self):
8078
    """ExpandNames for CreateInstance.
8079

8080
    Figure out the right locks for instance creation.
8081

8082
    """
8083
    self.needed_locks = {}
8084

    
8085
    instance_name = self.op.instance_name
8086
    # this is just a preventive check, but someone might still add this
8087
    # instance in the meantime, and creation will fail at lock-add time
8088
    if instance_name in self.cfg.GetInstanceList():
8089
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8090
                                 instance_name, errors.ECODE_EXISTS)
8091

    
8092
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8093

    
8094
    if self.op.iallocator:
8095
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8096
    else:
8097
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8098
      nodelist = [self.op.pnode]
8099
      if self.op.snode is not None:
8100
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8101
        nodelist.append(self.op.snode)
8102
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8103

    
8104
    # in case of import lock the source node too
8105
    if self.op.mode == constants.INSTANCE_IMPORT:
8106
      src_node = self.op.src_node
8107
      src_path = self.op.src_path
8108

    
8109
      if src_path is None:
8110
        self.op.src_path = src_path = self.op.instance_name
8111

    
8112
      if src_node is None:
8113
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8114
        self.op.src_node = None
8115
        if os.path.isabs(src_path):
8116
          raise errors.OpPrereqError("Importing an instance from an absolute"
8117
                                     " path requires a source node option",
8118
                                     errors.ECODE_INVAL)
8119
      else:
8120
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8121
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8122
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8123
        if not os.path.isabs(src_path):
8124
          self.op.src_path = src_path = \
8125
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8126

    
8127
  def _RunAllocator(self):
8128
    """Run the allocator based on input opcode.
8129

8130
    """
8131
    nics = [n.ToDict() for n in self.nics]
8132
    ial = IAllocator(self.cfg, self.rpc,
8133
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8134
                     name=self.op.instance_name,
8135
                     disk_template=self.op.disk_template,
8136
                     tags=self.op.tags,
8137
                     os=self.op.os_type,
8138
                     vcpus=self.be_full[constants.BE_VCPUS],
8139
                     memory=self.be_full[constants.BE_MEMORY],
8140
                     disks=self.disks,
8141
                     nics=nics,
8142
                     hypervisor=self.op.hypervisor,
8143
                     )
8144

    
8145
    ial.Run(self.op.iallocator)
8146

    
8147
    if not ial.success:
8148
      raise errors.OpPrereqError("Can't compute nodes using"
8149
                                 " iallocator '%s': %s" %
8150
                                 (self.op.iallocator, ial.info),
8151
                                 errors.ECODE_NORES)
8152
    if len(ial.result) != ial.required_nodes:
8153
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8154
                                 " of nodes (%s), required %s" %
8155
                                 (self.op.iallocator, len(ial.result),
8156
                                  ial.required_nodes), errors.ECODE_FAULT)
8157
    self.op.pnode = ial.result[0]
8158
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8159
                 self.op.instance_name, self.op.iallocator,
8160
                 utils.CommaJoin(ial.result))
8161
    if ial.required_nodes == 2:
8162
      self.op.snode = ial.result[1]
8163

    
8164
  def BuildHooksEnv(self):
8165
    """Build hooks env.
8166

8167
    This runs on master, primary and secondary nodes of the instance.
8168

8169
    """
8170
    env = {
8171
      "ADD_MODE": self.op.mode,
8172
      }
8173
    if self.op.mode == constants.INSTANCE_IMPORT:
8174
      env["SRC_NODE"] = self.op.src_node
8175
      env["SRC_PATH"] = self.op.src_path
8176
      env["SRC_IMAGES"] = self.src_images
8177

    
8178
    env.update(_BuildInstanceHookEnv(
8179
      name=self.op.instance_name,
8180
      primary_node=self.op.pnode,
8181
      secondary_nodes=self.secondaries,
8182
      status=self.op.start,
8183
      os_type=self.op.os_type,
8184
      memory=self.be_full[constants.BE_MEMORY],
8185
      vcpus=self.be_full[constants.BE_VCPUS],
8186
      nics=_NICListToTuple(self, self.nics),
8187
      disk_template=self.op.disk_template,
8188
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8189
             for d in self.disks],
8190
      bep=self.be_full,
8191
      hvp=self.hv_full,
8192
      hypervisor_name=self.op.hypervisor,
8193
      tags=self.op.tags,
8194
    ))
8195

    
8196
    return env
8197

    
8198
  def BuildHooksNodes(self):
8199
    """Build hooks nodes.
8200

8201
    """
8202
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8203
    return nl, nl
8204

    
8205
  def _ReadExportInfo(self):
8206
    """Reads the export information from disk.
8207

8208
    It will override the opcode source node and path with the actual
8209
    information, if these two were not specified before.
8210

8211
    @return: the export information
8212

8213
    """
8214
    assert self.op.mode == constants.INSTANCE_IMPORT
8215

    
8216
    src_node = self.op.src_node
8217
    src_path = self.op.src_path
8218

    
8219
    if src_node is None:
8220
      locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
8221
      exp_list = self.rpc.call_export_list(locked_nodes)
8222
      found = False
8223
      for node in exp_list:
8224
        if exp_list[node].fail_msg:
8225
          continue
8226
        if src_path in exp_list[node].payload:
8227
          found = True
8228
          self.op.src_node = src_node = node
8229
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8230
                                                       src_path)
8231
          break
8232
      if not found:
8233
        raise errors.OpPrereqError("No export found for relative path %s" %
8234
                                    src_path, errors.ECODE_INVAL)
8235

    
8236
    _CheckNodeOnline(self, src_node)
8237
    result = self.rpc.call_export_info(src_node, src_path)
8238
    result.Raise("No export or invalid export found in dir %s" % src_path)
8239

    
8240
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8241
    if not export_info.has_section(constants.INISECT_EXP):
8242
      raise errors.ProgrammerError("Corrupted export config",
8243
                                   errors.ECODE_ENVIRON)
8244

    
8245
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8246
    if (int(ei_version) != constants.EXPORT_VERSION):
8247
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8248
                                 (ei_version, constants.EXPORT_VERSION),
8249
                                 errors.ECODE_ENVIRON)
8250
    return export_info
8251

    
8252
  def _ReadExportParams(self, einfo):
8253
    """Use export parameters as defaults.
8254

8255
    In case the opcode doesn't specify (as in override) some instance
8256
    parameters, then try to use them from the export information, if
8257
    that declares them.
8258

8259
    """
8260
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8261

    
8262
    if self.op.disk_template is None:
8263
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8264
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8265
                                          "disk_template")
8266
      else:
8267
        raise errors.OpPrereqError("No disk template specified and the export"
8268
                                   " is missing the disk_template information",
8269
                                   errors.ECODE_INVAL)
8270

    
8271
    if not self.op.disks:
8272
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8273
        disks = []
8274
        # TODO: import the disk iv_name too
8275
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8276
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8277
          disks.append({constants.IDISK_SIZE: disk_sz})
8278
        self.op.disks = disks
8279
      else:
8280
        raise errors.OpPrereqError("No disk info specified and the export"
8281
                                   " is missing the disk information",
8282
                                   errors.ECODE_INVAL)
8283

    
8284
    if (not self.op.nics and
8285
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8286
      nics = []
8287
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8288
        ndict = {}
8289
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8290
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8291
          ndict[name] = v
8292
        nics.append(ndict)
8293
      self.op.nics = nics
8294

    
8295
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8296
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8297

    
8298
    if (self.op.hypervisor is None and
8299
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8300
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8301

    
8302
    if einfo.has_section(constants.INISECT_HYP):
8303
      # use the export parameters but do not override the ones
8304
      # specified by the user
8305
      for name, value in einfo.items(constants.INISECT_HYP):
8306
        if name not in self.op.hvparams:
8307
          self.op.hvparams[name] = value
8308

    
8309
    if einfo.has_section(constants.INISECT_BEP):
8310
      # use the parameters, without overriding
8311
      for name, value in einfo.items(constants.INISECT_BEP):
8312
        if name not in self.op.beparams:
8313
          self.op.beparams[name] = value
8314
    else:
8315
      # try to read the parameters old style, from the main section
8316
      for name in constants.BES_PARAMETERS:
8317
        if (name not in self.op.beparams and
8318
            einfo.has_option(constants.INISECT_INS, name)):
8319
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8320

    
8321
    if einfo.has_section(constants.INISECT_OSP):
8322
      # use the parameters, without overriding
8323
      for name, value in einfo.items(constants.INISECT_OSP):
8324
        if name not in self.op.osparams:
8325
          self.op.osparams[name] = value
8326

    
8327
  def _RevertToDefaults(self, cluster):
8328
    """Revert the instance parameters to the default values.
8329

8330
    """
8331
    # hvparams
8332
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8333
    for name in self.op.hvparams.keys():
8334
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8335
        del self.op.hvparams[name]
8336
    # beparams
8337
    be_defs = cluster.SimpleFillBE({})
8338
    for name in self.op.beparams.keys():
8339
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8340
        del self.op.beparams[name]
8341
    # nic params
8342
    nic_defs = cluster.SimpleFillNIC({})
8343
    for nic in self.op.nics:
8344
      for name in constants.NICS_PARAMETERS:
8345
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8346
          del nic[name]
8347
    # osparams
8348
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8349
    for name in self.op.osparams.keys():
8350
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8351
        del self.op.osparams[name]
8352

    
8353
  def _CalculateFileStorageDir(self):
8354
    """Calculate final instance file storage dir.
8355

8356
    """
8357
    # file storage dir calculation/check
8358
    self.instance_file_storage_dir = None
8359
    if self.op.disk_template in constants.DTS_FILEBASED:
8360
      # build the full file storage dir path
8361
      joinargs = []
8362

    
8363
      if self.op.disk_template == constants.DT_SHARED_FILE:
8364
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8365
      else:
8366
        get_fsd_fn = self.cfg.GetFileStorageDir
8367

    
8368
      cfg_storagedir = get_fsd_fn()
8369
      if not cfg_storagedir:
8370
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8371
      joinargs.append(cfg_storagedir)
8372

    
8373
      if self.op.file_storage_dir is not None:
8374
        joinargs.append(self.op.file_storage_dir)
8375

    
8376
      joinargs.append(self.op.instance_name)
8377

    
8378
      # pylint: disable-msg=W0142
8379
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8380

    
8381
  def CheckPrereq(self):
8382
    """Check prerequisites.
8383

8384
    """
8385
    self._CalculateFileStorageDir()
8386

    
8387
    if self.op.mode == constants.INSTANCE_IMPORT:
8388
      export_info = self._ReadExportInfo()
8389
      self._ReadExportParams(export_info)
8390

    
8391
    if (not self.cfg.GetVGName() and
8392
        self.op.disk_template not in constants.DTS_NOT_LVM):
8393
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8394
                                 " instances", errors.ECODE_STATE)
8395

    
8396
    if self.op.hypervisor is None:
8397
      self.op.hypervisor = self.cfg.GetHypervisorType()
8398

    
8399
    cluster = self.cfg.GetClusterInfo()
8400
    enabled_hvs = cluster.enabled_hypervisors
8401
    if self.op.hypervisor not in enabled_hvs:
8402
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8403
                                 " cluster (%s)" % (self.op.hypervisor,
8404
                                  ",".join(enabled_hvs)),
8405
                                 errors.ECODE_STATE)
8406

    
8407
    # Check tag validity
8408
    for tag in self.op.tags:
8409
      objects.TaggableObject.ValidateTag(tag)
8410

    
8411
    # check hypervisor parameter syntax (locally)
8412
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8413
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8414
                                      self.op.hvparams)
8415
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8416
    hv_type.CheckParameterSyntax(filled_hvp)
8417
    self.hv_full = filled_hvp
8418
    # check that we don't specify global parameters on an instance
8419
    _CheckGlobalHvParams(self.op.hvparams)
8420

    
8421
    # fill and remember the beparams dict
8422
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8423
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8424

    
8425
    # build os parameters
8426
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8427

    
8428
    # now that hvp/bep are in final format, let's reset to defaults,
8429
    # if told to do so
8430
    if self.op.identify_defaults:
8431
      self._RevertToDefaults(cluster)
8432

    
8433
    # NIC buildup
8434
    self.nics = []
8435
    for idx, nic in enumerate(self.op.nics):
8436
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8437
      nic_mode = nic_mode_req
8438
      if nic_mode is None:
8439
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8440

    
8441
      # in routed mode, for the first nic, the default ip is 'auto'
8442
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8443
        default_ip_mode = constants.VALUE_AUTO
8444
      else:
8445
        default_ip_mode = constants.VALUE_NONE
8446

    
8447
      # ip validity checks
8448
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8449
      if ip is None or ip.lower() == constants.VALUE_NONE:
8450
        nic_ip = None
8451
      elif ip.lower() == constants.VALUE_AUTO:
8452
        if not self.op.name_check:
8453
          raise errors.OpPrereqError("IP address set to auto but name checks"
8454
                                     " have been skipped",
8455
                                     errors.ECODE_INVAL)
8456
        nic_ip = self.hostname1.ip
8457
      else:
8458
        if not netutils.IPAddress.IsValid(ip):
8459
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8460
                                     errors.ECODE_INVAL)
8461
        nic_ip = ip
8462

    
8463
      # TODO: check the ip address for uniqueness
8464
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8465
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8466
                                   errors.ECODE_INVAL)
8467

    
8468
      # MAC address verification
8469
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8470
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8471
        mac = utils.NormalizeAndValidateMac(mac)
8472

    
8473
        try:
8474
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8475
        except errors.ReservationError:
8476
          raise errors.OpPrereqError("MAC address %s already in use"
8477
                                     " in cluster" % mac,
8478
                                     errors.ECODE_NOTUNIQUE)
8479

    
8480
      #  Build nic parameters
8481
      link = nic.get(constants.INIC_LINK, None)
8482
      nicparams = {}
8483
      if nic_mode_req:
8484
        nicparams[constants.NIC_MODE] = nic_mode_req
8485
      if link:
8486
        nicparams[constants.NIC_LINK] = link
8487

    
8488
      check_params = cluster.SimpleFillNIC(nicparams)
8489
      objects.NIC.CheckParameterSyntax(check_params)
8490
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8491

    
8492
    # disk checks/pre-build
8493
    default_vg = self.cfg.GetVGName()
8494
    self.disks = []
8495
    for disk in self.op.disks:
8496
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8497
      if mode not in constants.DISK_ACCESS_SET:
8498
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8499
                                   mode, errors.ECODE_INVAL)
8500
      size = disk.get(constants.IDISK_SIZE, None)
8501
      if size is None:
8502
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8503
      try:
8504
        size = int(size)
8505
      except (TypeError, ValueError):
8506
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8507
                                   errors.ECODE_INVAL)
8508

    
8509
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8510
      new_disk = {
8511
        constants.IDISK_SIZE: size,
8512
        constants.IDISK_MODE: mode,
8513
        constants.IDISK_VG: data_vg,
8514
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8515
        }
8516
      if constants.IDISK_ADOPT in disk:
8517
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8518
      self.disks.append(new_disk)
8519

    
8520
    if self.op.mode == constants.INSTANCE_IMPORT:
8521

    
8522
      # Check that the new instance doesn't have less disks than the export
8523
      instance_disks = len(self.disks)
8524
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8525
      if instance_disks < export_disks:
8526
        raise errors.OpPrereqError("Not enough disks to import."
8527
                                   " (instance: %d, export: %d)" %
8528
                                   (instance_disks, export_disks),
8529
                                   errors.ECODE_INVAL)
8530

    
8531
      disk_images = []
8532
      for idx in range(export_disks):
8533
        option = "disk%d_dump" % idx
8534
        if export_info.has_option(constants.INISECT_INS, option):
8535
          # FIXME: are the old os-es, disk sizes, etc. useful?
8536
          export_name = export_info.get(constants.INISECT_INS, option)
8537
          image = utils.PathJoin(self.op.src_path, export_name)
8538
          disk_images.append(image)
8539
        else:
8540
          disk_images.append(False)
8541

    
8542
      self.src_images = disk_images
8543

    
8544
      old_name = export_info.get(constants.INISECT_INS, "name")
8545
      try:
8546
        exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8547
      except (TypeError, ValueError), err:
8548
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8549
                                   " an integer: %s" % str(err),
8550
                                   errors.ECODE_STATE)
8551
      if self.op.instance_name == old_name:
8552
        for idx, nic in enumerate(self.nics):
8553
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8554
            nic_mac_ini = "nic%d_mac" % idx
8555
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8556

    
8557
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8558

    
8559
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8560
    if self.op.ip_check:
8561
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8562
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8563
                                   (self.check_ip, self.op.instance_name),
8564
                                   errors.ECODE_NOTUNIQUE)
8565

    
8566
    #### mac address generation
8567
    # By generating here the mac address both the allocator and the hooks get
8568
    # the real final mac address rather than the 'auto' or 'generate' value.
8569
    # There is a race condition between the generation and the instance object
8570
    # creation, which means that we know the mac is valid now, but we're not
8571
    # sure it will be when we actually add the instance. If things go bad
8572
    # adding the instance will abort because of a duplicate mac, and the
8573
    # creation job will fail.
8574
    for nic in self.nics:
8575
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8576
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8577

    
8578
    #### allocator run
8579

    
8580
    if self.op.iallocator is not None:
8581
      self._RunAllocator()
8582

    
8583
    #### node related checks
8584

    
8585
    # check primary node
8586
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8587
    assert self.pnode is not None, \
8588
      "Cannot retrieve locked node %s" % self.op.pnode
8589
    if pnode.offline:
8590
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8591
                                 pnode.name, errors.ECODE_STATE)
8592
    if pnode.drained:
8593
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8594
                                 pnode.name, errors.ECODE_STATE)
8595
    if not pnode.vm_capable:
8596
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8597
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8598

    
8599
    self.secondaries = []
8600

    
8601
    # mirror node verification
8602
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8603
      if self.op.snode == pnode.name:
8604
        raise errors.OpPrereqError("The secondary node cannot be the"
8605
                                   " primary node", errors.ECODE_INVAL)
8606
      _CheckNodeOnline(self, self.op.snode)
8607
      _CheckNodeNotDrained(self, self.op.snode)
8608
      _CheckNodeVmCapable(self, self.op.snode)
8609
      self.secondaries.append(self.op.snode)
8610

    
8611
    nodenames = [pnode.name] + self.secondaries
8612

    
8613
    if not self.adopt_disks:
8614
      # Check lv size requirements, if not adopting
8615
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8616
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8617

    
8618
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8619
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8620
                                disk[constants.IDISK_ADOPT])
8621
                     for disk in self.disks])
8622
      if len(all_lvs) != len(self.disks):
8623
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8624
                                   errors.ECODE_INVAL)
8625
      for lv_name in all_lvs:
8626
        try:
8627
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8628
          # to ReserveLV uses the same syntax
8629
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8630
        except errors.ReservationError:
8631
          raise errors.OpPrereqError("LV named %s used by another instance" %
8632
                                     lv_name, errors.ECODE_NOTUNIQUE)
8633

    
8634
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8635
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8636

    
8637
      node_lvs = self.rpc.call_lv_list([pnode.name],
8638
                                       vg_names.payload.keys())[pnode.name]
8639
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8640
      node_lvs = node_lvs.payload
8641

    
8642
      delta = all_lvs.difference(node_lvs.keys())
8643
      if delta:
8644
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8645
                                   utils.CommaJoin(delta),
8646
                                   errors.ECODE_INVAL)
8647
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8648
      if online_lvs:
8649
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8650
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8651
                                   errors.ECODE_STATE)
8652
      # update the size of disk based on what is found
8653
      for dsk in self.disks:
8654
        dsk[constants.IDISK_SIZE] = \
8655
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8656
                                        dsk[constants.IDISK_ADOPT])][0]))
8657

    
8658
    elif self.op.disk_template == constants.DT_BLOCK:
8659
      # Normalize and de-duplicate device paths
8660
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8661
                       for disk in self.disks])
8662
      if len(all_disks) != len(self.disks):
8663
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8664
                                   errors.ECODE_INVAL)
8665
      baddisks = [d for d in all_disks
8666
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8667
      if baddisks:
8668
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8669
                                   " cannot be adopted" %
8670
                                   (", ".join(baddisks),
8671
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8672
                                   errors.ECODE_INVAL)
8673

    
8674
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8675
                                            list(all_disks))[pnode.name]
8676
      node_disks.Raise("Cannot get block device information from node %s" %
8677
                       pnode.name)
8678
      node_disks = node_disks.payload
8679
      delta = all_disks.difference(node_disks.keys())
8680
      if delta:
8681
        raise errors.OpPrereqError("Missing block device(s): %s" %
8682
                                   utils.CommaJoin(delta),
8683
                                   errors.ECODE_INVAL)
8684
      for dsk in self.disks:
8685
        dsk[constants.IDISK_SIZE] = \
8686
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8687

    
8688
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8689

    
8690
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8691
    # check OS parameters (remotely)
8692
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8693

    
8694
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8695

    
8696
    # memory check on primary node
8697
    if self.op.start:
8698
      _CheckNodeFreeMemory(self, self.pnode.name,
8699
                           "creating instance %s" % self.op.instance_name,
8700
                           self.be_full[constants.BE_MEMORY],
8701
                           self.op.hypervisor)
8702

    
8703
    self.dry_run_result = list(nodenames)
8704

    
8705
  def Exec(self, feedback_fn):
8706
    """Create and add the instance to the cluster.
8707

8708
    """
8709
    instance = self.op.instance_name
8710
    pnode_name = self.pnode.name
8711

    
8712
    ht_kind = self.op.hypervisor
8713
    if ht_kind in constants.HTS_REQ_PORT:
8714
      network_port = self.cfg.AllocatePort()
8715
    else:
8716
      network_port = None
8717

    
8718
    disks = _GenerateDiskTemplate(self,
8719
                                  self.op.disk_template,
8720
                                  instance, pnode_name,
8721
                                  self.secondaries,
8722
                                  self.disks,
8723
                                  self.instance_file_storage_dir,
8724
                                  self.op.file_driver,
8725
                                  0,
8726
                                  feedback_fn)
8727

    
8728
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8729
                            primary_node=pnode_name,
8730
                            nics=self.nics, disks=disks,
8731
                            disk_template=self.op.disk_template,
8732
                            admin_up=False,
8733
                            network_port=network_port,
8734
                            beparams=self.op.beparams,
8735
                            hvparams=self.op.hvparams,
8736
                            hypervisor=self.op.hypervisor,
8737
                            osparams=self.op.osparams,
8738
                            )
8739

    
8740
    if self.op.tags:
8741
      for tag in self.op.tags:
8742
        iobj.AddTag(tag)
8743

    
8744
    if self.adopt_disks:
8745
      if self.op.disk_template == constants.DT_PLAIN:
8746
        # rename LVs to the newly-generated names; we need to construct
8747
        # 'fake' LV disks with the old data, plus the new unique_id
8748
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8749
        rename_to = []
8750
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8751
          rename_to.append(t_dsk.logical_id)
8752
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8753
          self.cfg.SetDiskID(t_dsk, pnode_name)
8754
        result = self.rpc.call_blockdev_rename(pnode_name,
8755
                                               zip(tmp_disks, rename_to))
8756
        result.Raise("Failed to rename adoped LVs")
8757
    else:
8758
      feedback_fn("* creating instance disks...")
8759
      try:
8760
        _CreateDisks(self, iobj)
8761
      except errors.OpExecError:
8762
        self.LogWarning("Device creation failed, reverting...")
8763
        try:
8764
          _RemoveDisks(self, iobj)
8765
        finally:
8766
          self.cfg.ReleaseDRBDMinors(instance)
8767
          raise
8768

    
8769
    feedback_fn("adding instance %s to cluster config" % instance)
8770

    
8771
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8772

    
8773
    # Declare that we don't want to remove the instance lock anymore, as we've
8774
    # added the instance to the config
8775
    del self.remove_locks[locking.LEVEL_INSTANCE]
8776

    
8777
    if self.op.mode == constants.INSTANCE_IMPORT:
8778
      # Release unused nodes
8779
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8780
    else:
8781
      # Release all nodes
8782
      _ReleaseLocks(self, locking.LEVEL_NODE)
8783

    
8784
    disk_abort = False
8785
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8786
      feedback_fn("* wiping instance disks...")
8787
      try:
8788
        _WipeDisks(self, iobj)
8789
      except errors.OpExecError, err:
8790
        logging.exception("Wiping disks failed")
8791
        self.LogWarning("Wiping instance disks failed (%s)", err)
8792
        disk_abort = True
8793

    
8794
    if disk_abort:
8795
      # Something is already wrong with the disks, don't do anything else
8796
      pass
8797
    elif self.op.wait_for_sync:
8798
      disk_abort = not _WaitForSync(self, iobj)
8799
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8800
      # make sure the disks are not degraded (still sync-ing is ok)
8801
      time.sleep(15)
8802
      feedback_fn("* checking mirrors status")
8803
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8804
    else:
8805
      disk_abort = False
8806

    
8807
    if disk_abort:
8808
      _RemoveDisks(self, iobj)
8809
      self.cfg.RemoveInstance(iobj.name)
8810
      # Make sure the instance lock gets removed
8811
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8812
      raise errors.OpExecError("There are some degraded disks for"
8813
                               " this instance")
8814

    
8815
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8816
      if self.op.mode == constants.INSTANCE_CREATE:
8817
        if not self.op.no_install:
8818
          feedback_fn("* running the instance OS create scripts...")
8819
          # FIXME: pass debug option from opcode to backend
8820
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8821
                                                 self.op.debug_level)
8822
          result.Raise("Could not add os for instance %s"
8823
                       " on node %s" % (instance, pnode_name))
8824

    
8825
      elif self.op.mode == constants.INSTANCE_IMPORT:
8826
        feedback_fn("* running the instance OS import scripts...")
8827

    
8828
        transfers = []
8829

    
8830
        for idx, image in enumerate(self.src_images):
8831
          if not image:
8832
            continue
8833

    
8834
          # FIXME: pass debug option from opcode to backend
8835
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8836
                                             constants.IEIO_FILE, (image, ),
8837
                                             constants.IEIO_SCRIPT,
8838
                                             (iobj.disks[idx], idx),
8839
                                             None)
8840
          transfers.append(dt)
8841

    
8842
        import_result = \
8843
          masterd.instance.TransferInstanceData(self, feedback_fn,
8844
                                                self.op.src_node, pnode_name,
8845
                                                self.pnode.secondary_ip,
8846
                                                iobj, transfers)
8847
        if not compat.all(import_result):
8848
          self.LogWarning("Some disks for instance %s on node %s were not"
8849
                          " imported successfully" % (instance, pnode_name))
8850

    
8851
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8852
        feedback_fn("* preparing remote import...")
8853
        # The source cluster will stop the instance before attempting to make a
8854
        # connection. In some cases stopping an instance can take a long time,
8855
        # hence the shutdown timeout is added to the connection timeout.
8856
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8857
                           self.op.source_shutdown_timeout)
8858
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8859

    
8860
        assert iobj.primary_node == self.pnode.name
8861
        disk_results = \
8862
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8863
                                        self.source_x509_ca,
8864
                                        self._cds, timeouts)
8865
        if not compat.all(disk_results):
8866
          # TODO: Should the instance still be started, even if some disks
8867
          # failed to import (valid for local imports, too)?
8868
          self.LogWarning("Some disks for instance %s on node %s were not"
8869
                          " imported successfully" % (instance, pnode_name))
8870

    
8871
        # Run rename script on newly imported instance
8872
        assert iobj.name == instance
8873
        feedback_fn("Running rename script for %s" % instance)
8874
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8875
                                                   self.source_instance_name,
8876
                                                   self.op.debug_level)
8877
        if result.fail_msg:
8878
          self.LogWarning("Failed to run rename script for %s on node"
8879
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8880

    
8881
      else:
8882
        # also checked in the prereq part
8883
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8884
                                     % self.op.mode)
8885

    
8886
    if self.op.start:
8887
      iobj.admin_up = True
8888
      self.cfg.Update(iobj, feedback_fn)
8889
      logging.info("Starting instance %s on node %s", instance, pnode_name)
8890
      feedback_fn("* starting instance...")
8891
      result = self.rpc.call_instance_start(pnode_name, iobj,
8892
                                            None, None, False)
8893
      result.Raise("Could not start instance")
8894

    
8895
    return list(iobj.all_nodes)
8896

    
8897

    
8898
class LUInstanceConsole(NoHooksLU):
8899
  """Connect to an instance's console.
8900

8901
  This is somewhat special in that it returns the command line that
8902
  you need to run on the master node in order to connect to the
8903
  console.
8904

8905
  """
8906
  REQ_BGL = False
8907

    
8908
  def ExpandNames(self):
8909
    self._ExpandAndLockInstance()
8910

    
8911
  def CheckPrereq(self):
8912
    """Check prerequisites.
8913

8914
    This checks that the instance is in the cluster.
8915

8916
    """
8917
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8918
    assert self.instance is not None, \
8919
      "Cannot retrieve locked instance %s" % self.op.instance_name
8920
    _CheckNodeOnline(self, self.instance.primary_node)
8921

    
8922
  def Exec(self, feedback_fn):
8923
    """Connect to the console of an instance
8924

8925
    """
8926
    instance = self.instance
8927
    node = instance.primary_node
8928

    
8929
    node_insts = self.rpc.call_instance_list([node],
8930
                                             [instance.hypervisor])[node]
8931
    node_insts.Raise("Can't get node information from %s" % node)
8932

    
8933
    if instance.name not in node_insts.payload:
8934
      if instance.admin_up:
8935
        state = constants.INSTST_ERRORDOWN
8936
      else:
8937
        state = constants.INSTST_ADMINDOWN
8938
      raise errors.OpExecError("Instance %s is not running (state %s)" %
8939
                               (instance.name, state))
8940

    
8941
    logging.debug("Connecting to console of %s on %s", instance.name, node)
8942

    
8943
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8944

    
8945

    
8946
def _GetInstanceConsole(cluster, instance):
8947
  """Returns console information for an instance.
8948

8949
  @type cluster: L{objects.Cluster}
8950
  @type instance: L{objects.Instance}
8951
  @rtype: dict
8952

8953
  """
8954
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
8955
  # beparams and hvparams are passed separately, to avoid editing the
8956
  # instance and then saving the defaults in the instance itself.
8957
  hvparams = cluster.FillHV(instance)
8958
  beparams = cluster.FillBE(instance)
8959
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8960

    
8961
  assert console.instance == instance.name
8962
  assert console.Validate()
8963

    
8964
  return console.ToDict()
8965

    
8966

    
8967
class LUInstanceReplaceDisks(LogicalUnit):
8968
  """Replace the disks of an instance.
8969

8970
  """
8971
  HPATH = "mirrors-replace"
8972
  HTYPE = constants.HTYPE_INSTANCE
8973
  REQ_BGL = False
8974

    
8975
  def CheckArguments(self):
8976
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8977
                                  self.op.iallocator)
8978

    
8979
  def ExpandNames(self):
8980
    self._ExpandAndLockInstance()
8981

    
8982
    assert locking.LEVEL_NODE not in self.needed_locks
8983
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
8984

    
8985
    assert self.op.iallocator is None or self.op.remote_node is None, \
8986
      "Conflicting options"
8987

    
8988
    if self.op.remote_node is not None:
8989
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8990

    
8991
      # Warning: do not remove the locking of the new secondary here
8992
      # unless DRBD8.AddChildren is changed to work in parallel;
8993
      # currently it doesn't since parallel invocations of
8994
      # FindUnusedMinor will conflict
8995
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
8996
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8997
    else:
8998
      self.needed_locks[locking.LEVEL_NODE] = []
8999
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9000

    
9001
      if self.op.iallocator is not None:
9002
        # iallocator will select a new node in the same group
9003
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9004

    
9005
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9006
                                   self.op.iallocator, self.op.remote_node,
9007
                                   self.op.disks, False, self.op.early_release)
9008

    
9009
    self.tasklets = [self.replacer]
9010

    
9011
  def DeclareLocks(self, level):
9012
    if level == locking.LEVEL_NODEGROUP:
9013
      assert self.op.remote_node is None
9014
      assert self.op.iallocator is not None
9015
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9016

    
9017
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9018
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9019
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9020

    
9021
    elif level == locking.LEVEL_NODE:
9022
      if self.op.iallocator is not None:
9023
        assert self.op.remote_node is None
9024
        assert not self.needed_locks[locking.LEVEL_NODE]
9025

    
9026
        # Lock member nodes of all locked groups
9027
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9028
          for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
9029
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9030
      else:
9031
        self._LockInstancesNodes()
9032

    
9033
  def BuildHooksEnv(self):
9034
    """Build hooks env.
9035

9036
    This runs on the master, the primary and all the secondaries.
9037

9038
    """
9039
    instance = self.replacer.instance
9040
    env = {
9041
      "MODE": self.op.mode,
9042
      "NEW_SECONDARY": self.op.remote_node,
9043
      "OLD_SECONDARY": instance.secondary_nodes[0],
9044
      }
9045
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9046
    return env
9047

    
9048
  def BuildHooksNodes(self):
9049
    """Build hooks nodes.
9050

9051
    """
9052
    instance = self.replacer.instance
9053
    nl = [
9054
      self.cfg.GetMasterNode(),
9055
      instance.primary_node,
9056
      ]
9057
    if self.op.remote_node is not None:
9058
      nl.append(self.op.remote_node)
9059
    return nl, nl
9060

    
9061
  def CheckPrereq(self):
9062
    """Check prerequisites.
9063

9064
    """
9065
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9066
            self.op.iallocator is None)
9067

    
9068
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
9069
    if owned_groups:
9070
      groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9071
      if owned_groups != groups:
9072
        raise errors.OpExecError("Node groups used by instance '%s' changed"
9073
                                 " since lock was acquired, current list is %r,"
9074
                                 " used to be '%s'" %
9075
                                 (self.op.instance_name,
9076
                                  utils.CommaJoin(groups),
9077
                                  utils.CommaJoin(owned_groups)))
9078

    
9079
    return LogicalUnit.CheckPrereq(self)
9080

    
9081

    
9082
class TLReplaceDisks(Tasklet):
9083
  """Replaces disks for an instance.
9084

9085
  Note: Locking is not within the scope of this class.
9086

9087
  """
9088
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9089
               disks, delay_iallocator, early_release):
9090
    """Initializes this class.
9091

9092
    """
9093
    Tasklet.__init__(self, lu)
9094

    
9095
    # Parameters
9096
    self.instance_name = instance_name
9097
    self.mode = mode
9098
    self.iallocator_name = iallocator_name
9099
    self.remote_node = remote_node
9100
    self.disks = disks
9101
    self.delay_iallocator = delay_iallocator
9102
    self.early_release = early_release
9103

    
9104
    # Runtime data
9105
    self.instance = None
9106
    self.new_node = None
9107
    self.target_node = None
9108
    self.other_node = None
9109
    self.remote_node_info = None
9110
    self.node_secondary_ip = None
9111

    
9112
  @staticmethod
9113
  def CheckArguments(mode, remote_node, iallocator):
9114
    """Helper function for users of this class.
9115

9116
    """
9117
    # check for valid parameter combination
9118
    if mode == constants.REPLACE_DISK_CHG:
9119
      if remote_node is None and iallocator is None:
9120
        raise errors.OpPrereqError("When changing the secondary either an"
9121
                                   " iallocator script must be used or the"
9122
                                   " new node given", errors.ECODE_INVAL)
9123

    
9124
      if remote_node is not None and iallocator is not None:
9125
        raise errors.OpPrereqError("Give either the iallocator or the new"
9126
                                   " secondary, not both", errors.ECODE_INVAL)
9127

    
9128
    elif remote_node is not None or iallocator is not None:
9129
      # Not replacing the secondary
9130
      raise errors.OpPrereqError("The iallocator and new node options can"
9131
                                 " only be used when changing the"
9132
                                 " secondary node", errors.ECODE_INVAL)
9133

    
9134
  @staticmethod
9135
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9136
    """Compute a new secondary node using an IAllocator.
9137

9138
    """
9139
    ial = IAllocator(lu.cfg, lu.rpc,
9140
                     mode=constants.IALLOCATOR_MODE_RELOC,
9141
                     name=instance_name,
9142
                     relocate_from=relocate_from)
9143

    
9144
    ial.Run(iallocator_name)
9145

    
9146
    if not ial.success:
9147
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9148
                                 " %s" % (iallocator_name, ial.info),
9149
                                 errors.ECODE_NORES)
9150

    
9151
    if len(ial.result) != ial.required_nodes:
9152
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9153
                                 " of nodes (%s), required %s" %
9154
                                 (iallocator_name,
9155
                                  len(ial.result), ial.required_nodes),
9156
                                 errors.ECODE_FAULT)
9157

    
9158
    remote_node_name = ial.result[0]
9159

    
9160
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9161
               instance_name, remote_node_name)
9162

    
9163
    return remote_node_name
9164

    
9165
  def _FindFaultyDisks(self, node_name):
9166
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9167
                                    node_name, True)
9168

    
9169
  def _CheckDisksActivated(self, instance):
9170
    """Checks if the instance disks are activated.
9171

9172
    @param instance: The instance to check disks
9173
    @return: True if they are activated, False otherwise
9174

9175
    """
9176
    nodes = instance.all_nodes
9177

    
9178
    for idx, dev in enumerate(instance.disks):
9179
      for node in nodes:
9180
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9181
        self.cfg.SetDiskID(dev, node)
9182

    
9183
        result = self.rpc.call_blockdev_find(node, dev)
9184

    
9185
        if result.offline:
9186
          continue
9187
        elif result.fail_msg or not result.payload:
9188
          return False
9189

    
9190
    return True
9191

    
9192
  def CheckPrereq(self):
9193
    """Check prerequisites.
9194

9195
    This checks that the instance is in the cluster.
9196

9197
    """
9198
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9199
    assert instance is not None, \
9200
      "Cannot retrieve locked instance %s" % self.instance_name
9201

    
9202
    if instance.disk_template != constants.DT_DRBD8:
9203
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9204
                                 " instances", errors.ECODE_INVAL)
9205

    
9206
    if len(instance.secondary_nodes) != 1:
9207
      raise errors.OpPrereqError("The instance has a strange layout,"
9208
                                 " expected one secondary but found %d" %
9209
                                 len(instance.secondary_nodes),
9210
                                 errors.ECODE_FAULT)
9211

    
9212
    if not self.delay_iallocator:
9213
      self._CheckPrereq2()
9214

    
9215
  def _CheckPrereq2(self):
9216
    """Check prerequisites, second part.
9217

9218
    This function should always be part of CheckPrereq. It was separated and is
9219
    now called from Exec because during node evacuation iallocator was only
9220
    called with an unmodified cluster model, not taking planned changes into
9221
    account.
9222

9223
    """
9224
    instance = self.instance
9225
    secondary_node = instance.secondary_nodes[0]
9226

    
9227
    if self.iallocator_name is None:
9228
      remote_node = self.remote_node
9229
    else:
9230
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9231
                                       instance.name, instance.secondary_nodes)
9232

    
9233
    if remote_node is None:
9234
      self.remote_node_info = None
9235
    else:
9236
      assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
9237
             "Remote node '%s' is not locked" % remote_node
9238

    
9239
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9240
      assert self.remote_node_info is not None, \
9241
        "Cannot retrieve locked node %s" % remote_node
9242

    
9243
    if remote_node == self.instance.primary_node:
9244
      raise errors.OpPrereqError("The specified node is the primary node of"
9245
                                 " the instance", errors.ECODE_INVAL)
9246

    
9247
    if remote_node == secondary_node:
9248
      raise errors.OpPrereqError("The specified node is already the"
9249
                                 " secondary node of the instance",
9250
                                 errors.ECODE_INVAL)
9251

    
9252
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9253
                                    constants.REPLACE_DISK_CHG):
9254
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9255
                                 errors.ECODE_INVAL)
9256

    
9257
    if self.mode == constants.REPLACE_DISK_AUTO:
9258
      if not self._CheckDisksActivated(instance):
9259
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9260
                                   " first" % self.instance_name,
9261
                                   errors.ECODE_STATE)
9262
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9263
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9264

    
9265
      if faulty_primary and faulty_secondary:
9266
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9267
                                   " one node and can not be repaired"
9268
                                   " automatically" % self.instance_name,
9269
                                   errors.ECODE_STATE)
9270

    
9271
      if faulty_primary:
9272
        self.disks = faulty_primary
9273
        self.target_node = instance.primary_node
9274
        self.other_node = secondary_node
9275
        check_nodes = [self.target_node, self.other_node]
9276
      elif faulty_secondary:
9277
        self.disks = faulty_secondary
9278
        self.target_node = secondary_node
9279
        self.other_node = instance.primary_node
9280
        check_nodes = [self.target_node, self.other_node]
9281
      else:
9282
        self.disks = []
9283
        check_nodes = []
9284

    
9285
    else:
9286
      # Non-automatic modes
9287
      if self.mode == constants.REPLACE_DISK_PRI:
9288
        self.target_node = instance.primary_node
9289
        self.other_node = secondary_node
9290
        check_nodes = [self.target_node, self.other_node]
9291

    
9292
      elif self.mode == constants.REPLACE_DISK_SEC:
9293
        self.target_node = secondary_node
9294
        self.other_node = instance.primary_node
9295
        check_nodes = [self.target_node, self.other_node]
9296

    
9297
      elif self.mode == constants.REPLACE_DISK_CHG:
9298
        self.new_node = remote_node
9299
        self.other_node = instance.primary_node
9300
        self.target_node = secondary_node
9301
        check_nodes = [self.new_node, self.other_node]
9302

    
9303
        _CheckNodeNotDrained(self.lu, remote_node)
9304
        _CheckNodeVmCapable(self.lu, remote_node)
9305

    
9306
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9307
        assert old_node_info is not None
9308
        if old_node_info.offline and not self.early_release:
9309
          # doesn't make sense to delay the release
9310
          self.early_release = True
9311
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9312
                          " early-release mode", secondary_node)
9313

    
9314
      else:
9315
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9316
                                     self.mode)
9317

    
9318
      # If not specified all disks should be replaced
9319
      if not self.disks:
9320
        self.disks = range(len(self.instance.disks))
9321

    
9322
    for node in check_nodes:
9323
      _CheckNodeOnline(self.lu, node)
9324

    
9325
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9326
                                                          self.other_node,
9327
                                                          self.target_node]
9328
                              if node_name is not None)
9329

    
9330
    # Release unneeded node locks
9331
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9332

    
9333
    # Release any owned node group
9334
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9335
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9336

    
9337
    # Check whether disks are valid
9338
    for disk_idx in self.disks:
9339
      instance.FindDisk(disk_idx)
9340

    
9341
    # Get secondary node IP addresses
9342
    self.node_secondary_ip = \
9343
      dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
9344
           for node_name in touched_nodes)
9345

    
9346
  def Exec(self, feedback_fn):
9347
    """Execute disk replacement.
9348

9349
    This dispatches the disk replacement to the appropriate handler.
9350

9351
    """
9352
    if self.delay_iallocator:
9353
      self._CheckPrereq2()
9354

    
9355
    if __debug__:
9356
      # Verify owned locks before starting operation
9357
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9358
      assert set(owned_locks) == set(self.node_secondary_ip), \
9359
          ("Incorrect node locks, owning %s, expected %s" %
9360
           (owned_locks, self.node_secondary_ip.keys()))
9361

    
9362
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9363
      assert list(owned_locks) == [self.instance_name], \
9364
          "Instance '%s' not locked" % self.instance_name
9365

    
9366
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9367
          "Should not own any node group lock at this point"
9368

    
9369
    if not self.disks:
9370
      feedback_fn("No disks need replacement")
9371
      return
9372

    
9373
    feedback_fn("Replacing disk(s) %s for %s" %
9374
                (utils.CommaJoin(self.disks), self.instance.name))
9375

    
9376
    activate_disks = (not self.instance.admin_up)
9377

    
9378
    # Activate the instance disks if we're replacing them on a down instance
9379
    if activate_disks:
9380
      _StartInstanceDisks(self.lu, self.instance, True)
9381

    
9382
    try:
9383
      # Should we replace the secondary node?
9384
      if self.new_node is not None:
9385
        fn = self._ExecDrbd8Secondary
9386
      else:
9387
        fn = self._ExecDrbd8DiskOnly
9388

    
9389
      result = fn(feedback_fn)
9390
    finally:
9391
      # Deactivate the instance disks if we're replacing them on a
9392
      # down instance
9393
      if activate_disks:
9394
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9395

    
9396
    if __debug__:
9397
      # Verify owned locks
9398
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9399
      nodes = frozenset(self.node_secondary_ip)
9400
      assert ((self.early_release and not owned_locks) or
9401
              (not self.early_release and not (set(owned_locks) - nodes))), \
9402
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9403
         " nodes=%r" % (self.early_release, owned_locks, nodes))
9404

    
9405
    return result
9406

    
9407
  def _CheckVolumeGroup(self, nodes):
9408
    self.lu.LogInfo("Checking volume groups")
9409

    
9410
    vgname = self.cfg.GetVGName()
9411

    
9412
    # Make sure volume group exists on all involved nodes
9413
    results = self.rpc.call_vg_list(nodes)
9414
    if not results:
9415
      raise errors.OpExecError("Can't list volume groups on the nodes")
9416

    
9417
    for node in nodes:
9418
      res = results[node]
9419
      res.Raise("Error checking node %s" % node)
9420
      if vgname not in res.payload:
9421
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9422
                                 (vgname, node))
9423

    
9424
  def _CheckDisksExistence(self, nodes):
9425
    # Check disk existence
9426
    for idx, dev in enumerate(self.instance.disks):
9427
      if idx not in self.disks:
9428
        continue
9429

    
9430
      for node in nodes:
9431
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9432
        self.cfg.SetDiskID(dev, node)
9433

    
9434
        result = self.rpc.call_blockdev_find(node, dev)
9435

    
9436
        msg = result.fail_msg
9437
        if msg or not result.payload:
9438
          if not msg:
9439
            msg = "disk not found"
9440
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9441
                                   (idx, node, msg))
9442

    
9443
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9444
    for idx, dev in enumerate(self.instance.disks):
9445
      if idx not in self.disks:
9446
        continue
9447

    
9448
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9449
                      (idx, node_name))
9450

    
9451
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9452
                                   ldisk=ldisk):
9453
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9454
                                 " replace disks for instance %s" %
9455
                                 (node_name, self.instance.name))
9456

    
9457
  def _CreateNewStorage(self, node_name):
9458
    """Create new storage on the primary or secondary node.
9459

9460
    This is only used for same-node replaces, not for changing the
9461
    secondary node, hence we don't want to modify the existing disk.
9462

9463
    """
9464
    iv_names = {}
9465

    
9466
    for idx, dev in enumerate(self.instance.disks):
9467
      if idx not in self.disks:
9468
        continue
9469

    
9470
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9471

    
9472
      self.cfg.SetDiskID(dev, node_name)
9473

    
9474
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9475
      names = _GenerateUniqueNames(self.lu, lv_names)
9476

    
9477
      vg_data = dev.children[0].logical_id[0]
9478
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9479
                             logical_id=(vg_data, names[0]))
9480
      vg_meta = dev.children[1].logical_id[0]
9481
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9482
                             logical_id=(vg_meta, names[1]))
9483

    
9484
      new_lvs = [lv_data, lv_meta]
9485
      old_lvs = [child.Copy() for child in dev.children]
9486
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9487

    
9488
      # we pass force_create=True to force the LVM creation
9489
      for new_lv in new_lvs:
9490
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9491
                        _GetInstanceInfoText(self.instance), False)
9492

    
9493
    return iv_names
9494

    
9495
  def _CheckDevices(self, node_name, iv_names):
9496
    for name, (dev, _, _) in iv_names.iteritems():
9497
      self.cfg.SetDiskID(dev, node_name)
9498

    
9499
      result = self.rpc.call_blockdev_find(node_name, dev)
9500

    
9501
      msg = result.fail_msg
9502
      if msg or not result.payload:
9503
        if not msg:
9504
          msg = "disk not found"
9505
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9506
                                 (name, msg))
9507

    
9508
      if result.payload.is_degraded:
9509
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9510

    
9511
  def _RemoveOldStorage(self, node_name, iv_names):
9512
    for name, (_, old_lvs, _) in iv_names.iteritems():
9513
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9514

    
9515
      for lv in old_lvs:
9516
        self.cfg.SetDiskID(lv, node_name)
9517

    
9518
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9519
        if msg:
9520
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9521
                             hint="remove unused LVs manually")
9522

    
9523
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
9524
    """Replace a disk on the primary or secondary for DRBD 8.
9525

9526
    The algorithm for replace is quite complicated:
9527

9528
      1. for each disk to be replaced:
9529

9530
        1. create new LVs on the target node with unique names
9531
        1. detach old LVs from the drbd device
9532
        1. rename old LVs to name_replaced.<time_t>
9533
        1. rename new LVs to old LVs
9534
        1. attach the new LVs (with the old names now) to the drbd device
9535

9536
      1. wait for sync across all devices
9537

9538
      1. for each modified disk:
9539

9540
        1. remove old LVs (which have the name name_replaces.<time_t>)
9541

9542
    Failures are not very well handled.
9543

9544
    """
9545
    steps_total = 6
9546

    
9547
    # Step: check device activation
9548
    self.lu.LogStep(1, steps_total, "Check device existence")
9549
    self._CheckDisksExistence([self.other_node, self.target_node])
9550
    self._CheckVolumeGroup([self.target_node, self.other_node])
9551

    
9552
    # Step: check other node consistency
9553
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9554
    self._CheckDisksConsistency(self.other_node,
9555
                                self.other_node == self.instance.primary_node,
9556
                                False)
9557

    
9558
    # Step: create new storage
9559
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9560
    iv_names = self._CreateNewStorage(self.target_node)
9561

    
9562
    # Step: for each lv, detach+rename*2+attach
9563
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9564
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9565
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9566

    
9567
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9568
                                                     old_lvs)
9569
      result.Raise("Can't detach drbd from local storage on node"
9570
                   " %s for device %s" % (self.target_node, dev.iv_name))
9571
      #dev.children = []
9572
      #cfg.Update(instance)
9573

    
9574
      # ok, we created the new LVs, so now we know we have the needed
9575
      # storage; as such, we proceed on the target node to rename
9576
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9577
      # using the assumption that logical_id == physical_id (which in
9578
      # turn is the unique_id on that node)
9579

    
9580
      # FIXME(iustin): use a better name for the replaced LVs
9581
      temp_suffix = int(time.time())
9582
      ren_fn = lambda d, suff: (d.physical_id[0],
9583
                                d.physical_id[1] + "_replaced-%s" % suff)
9584

    
9585
      # Build the rename list based on what LVs exist on the node
9586
      rename_old_to_new = []
9587
      for to_ren in old_lvs:
9588
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9589
        if not result.fail_msg and result.payload:
9590
          # device exists
9591
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9592

    
9593
      self.lu.LogInfo("Renaming the old LVs on the target node")
9594
      result = self.rpc.call_blockdev_rename(self.target_node,
9595
                                             rename_old_to_new)
9596
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9597

    
9598
      # Now we rename the new LVs to the old LVs
9599
      self.lu.LogInfo("Renaming the new LVs on the target node")
9600
      rename_new_to_old = [(new, old.physical_id)
9601
                           for old, new in zip(old_lvs, new_lvs)]
9602
      result = self.rpc.call_blockdev_rename(self.target_node,
9603
                                             rename_new_to_old)
9604
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9605

    
9606
      # Intermediate steps of in memory modifications
9607
      for old, new in zip(old_lvs, new_lvs):
9608
        new.logical_id = old.logical_id
9609
        self.cfg.SetDiskID(new, self.target_node)
9610

    
9611
      # We need to modify old_lvs so that removal later removes the
9612
      # right LVs, not the newly added ones; note that old_lvs is a
9613
      # copy here
9614
      for disk in old_lvs:
9615
        disk.logical_id = ren_fn(disk, temp_suffix)
9616
        self.cfg.SetDiskID(disk, self.target_node)
9617

    
9618
      # Now that the new lvs have the old name, we can add them to the device
9619
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9620
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9621
                                                  new_lvs)
9622
      msg = result.fail_msg
9623
      if msg:
9624
        for new_lv in new_lvs:
9625
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9626
                                               new_lv).fail_msg
9627
          if msg2:
9628
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9629
                               hint=("cleanup manually the unused logical"
9630
                                     "volumes"))
9631
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9632

    
9633
    cstep = 5
9634
    if self.early_release:
9635
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9636
      cstep += 1
9637
      self._RemoveOldStorage(self.target_node, iv_names)
9638
      # WARNING: we release both node locks here, do not do other RPCs
9639
      # than WaitForSync to the primary node
9640
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9641
                    names=[self.target_node, self.other_node])
9642

    
9643
    # Wait for sync
9644
    # This can fail as the old devices are degraded and _WaitForSync
9645
    # does a combined result over all disks, so we don't check its return value
9646
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9647
    cstep += 1
9648
    _WaitForSync(self.lu, self.instance)
9649

    
9650
    # Check all devices manually
9651
    self._CheckDevices(self.instance.primary_node, iv_names)
9652

    
9653
    # Step: remove old storage
9654
    if not self.early_release:
9655
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9656
      cstep += 1
9657
      self._RemoveOldStorage(self.target_node, iv_names)
9658

    
9659
  def _ExecDrbd8Secondary(self, feedback_fn):
9660
    """Replace the secondary node for DRBD 8.
9661

9662
    The algorithm for replace is quite complicated:
9663
      - for all disks of the instance:
9664
        - create new LVs on the new node with same names
9665
        - shutdown the drbd device on the old secondary
9666
        - disconnect the drbd network on the primary
9667
        - create the drbd device on the new secondary
9668
        - network attach the drbd on the primary, using an artifice:
9669
          the drbd code for Attach() will connect to the network if it
9670
          finds a device which is connected to the good local disks but
9671
          not network enabled
9672
      - wait for sync across all devices
9673
      - remove all disks from the old secondary
9674

9675
    Failures are not very well handled.
9676

9677
    """
9678
    steps_total = 6
9679

    
9680
    # Step: check device activation
9681
    self.lu.LogStep(1, steps_total, "Check device existence")
9682
    self._CheckDisksExistence([self.instance.primary_node])
9683
    self._CheckVolumeGroup([self.instance.primary_node])
9684

    
9685
    # Step: check other node consistency
9686
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9687
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9688

    
9689
    # Step: create new storage
9690
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9691
    for idx, dev in enumerate(self.instance.disks):
9692
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9693
                      (self.new_node, idx))
9694
      # we pass force_create=True to force LVM creation
9695
      for new_lv in dev.children:
9696
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9697
                        _GetInstanceInfoText(self.instance), False)
9698

    
9699
    # Step 4: dbrd minors and drbd setups changes
9700
    # after this, we must manually remove the drbd minors on both the
9701
    # error and the success paths
9702
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9703
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9704
                                         for dev in self.instance.disks],
9705
                                        self.instance.name)
9706
    logging.debug("Allocated minors %r", minors)
9707

    
9708
    iv_names = {}
9709
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9710
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9711
                      (self.new_node, idx))
9712
      # create new devices on new_node; note that we create two IDs:
9713
      # one without port, so the drbd will be activated without
9714
      # networking information on the new node at this stage, and one
9715
      # with network, for the latter activation in step 4
9716
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9717
      if self.instance.primary_node == o_node1:
9718
        p_minor = o_minor1
9719
      else:
9720
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9721
        p_minor = o_minor2
9722

    
9723
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9724
                      p_minor, new_minor, o_secret)
9725
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9726
                    p_minor, new_minor, o_secret)
9727

    
9728
      iv_names[idx] = (dev, dev.children, new_net_id)
9729
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9730
                    new_net_id)
9731
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9732
                              logical_id=new_alone_id,
9733
                              children=dev.children,
9734
                              size=dev.size)
9735
      try:
9736
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9737
                              _GetInstanceInfoText(self.instance), False)
9738
      except errors.GenericError:
9739
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9740
        raise
9741

    
9742
    # We have new devices, shutdown the drbd on the old secondary
9743
    for idx, dev in enumerate(self.instance.disks):
9744
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9745
      self.cfg.SetDiskID(dev, self.target_node)
9746
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9747
      if msg:
9748
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9749
                           "node: %s" % (idx, msg),
9750
                           hint=("Please cleanup this device manually as"
9751
                                 " soon as possible"))
9752

    
9753
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9754
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9755
                                               self.node_secondary_ip,
9756
                                               self.instance.disks)\
9757
                                              [self.instance.primary_node]
9758

    
9759
    msg = result.fail_msg
9760
    if msg:
9761
      # detaches didn't succeed (unlikely)
9762
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9763
      raise errors.OpExecError("Can't detach the disks from the network on"
9764
                               " old node: %s" % (msg,))
9765

    
9766
    # if we managed to detach at least one, we update all the disks of
9767
    # the instance to point to the new secondary
9768
    self.lu.LogInfo("Updating instance configuration")
9769
    for dev, _, new_logical_id in iv_names.itervalues():
9770
      dev.logical_id = new_logical_id
9771
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9772

    
9773
    self.cfg.Update(self.instance, feedback_fn)
9774

    
9775
    # and now perform the drbd attach
9776
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9777
                    " (standalone => connected)")
9778
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9779
                                            self.new_node],
9780
                                           self.node_secondary_ip,
9781
                                           self.instance.disks,
9782
                                           self.instance.name,
9783
                                           False)
9784
    for to_node, to_result in result.items():
9785
      msg = to_result.fail_msg
9786
      if msg:
9787
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9788
                           to_node, msg,
9789
                           hint=("please do a gnt-instance info to see the"
9790
                                 " status of disks"))
9791
    cstep = 5
9792
    if self.early_release:
9793
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9794
      cstep += 1
9795
      self._RemoveOldStorage(self.target_node, iv_names)
9796
      # WARNING: we release all node locks here, do not do other RPCs
9797
      # than WaitForSync to the primary node
9798
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9799
                    names=[self.instance.primary_node,
9800
                           self.target_node,
9801
                           self.new_node])
9802

    
9803
    # Wait for sync
9804
    # This can fail as the old devices are degraded and _WaitForSync
9805
    # does a combined result over all disks, so we don't check its return value
9806
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9807
    cstep += 1
9808
    _WaitForSync(self.lu, self.instance)
9809

    
9810
    # Check all devices manually
9811
    self._CheckDevices(self.instance.primary_node, iv_names)
9812

    
9813
    # Step: remove old storage
9814
    if not self.early_release:
9815
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9816
      self._RemoveOldStorage(self.target_node, iv_names)
9817

    
9818

    
9819
class LURepairNodeStorage(NoHooksLU):
9820
  """Repairs the volume group on a node.
9821

9822
  """
9823
  REQ_BGL = False
9824

    
9825
  def CheckArguments(self):
9826
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9827

    
9828
    storage_type = self.op.storage_type
9829

    
9830
    if (constants.SO_FIX_CONSISTENCY not in
9831
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9832
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9833
                                 " repaired" % storage_type,
9834
                                 errors.ECODE_INVAL)
9835

    
9836
  def ExpandNames(self):
9837
    self.needed_locks = {
9838
      locking.LEVEL_NODE: [self.op.node_name],
9839
      }
9840

    
9841
  def _CheckFaultyDisks(self, instance, node_name):
9842
    """Ensure faulty disks abort the opcode or at least warn."""
9843
    try:
9844
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9845
                                  node_name, True):
9846
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9847
                                   " node '%s'" % (instance.name, node_name),
9848
                                   errors.ECODE_STATE)
9849
    except errors.OpPrereqError, err:
9850
      if self.op.ignore_consistency:
9851
        self.proc.LogWarning(str(err.args[0]))
9852
      else:
9853
        raise
9854

    
9855
  def CheckPrereq(self):
9856
    """Check prerequisites.
9857

9858
    """
9859
    # Check whether any instance on this node has faulty disks
9860
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9861
      if not inst.admin_up:
9862
        continue
9863
      check_nodes = set(inst.all_nodes)
9864
      check_nodes.discard(self.op.node_name)
9865
      for inst_node_name in check_nodes:
9866
        self._CheckFaultyDisks(inst, inst_node_name)
9867

    
9868
  def Exec(self, feedback_fn):
9869
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9870
                (self.op.name, self.op.node_name))
9871

    
9872
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9873
    result = self.rpc.call_storage_execute(self.op.node_name,
9874
                                           self.op.storage_type, st_args,
9875
                                           self.op.name,
9876
                                           constants.SO_FIX_CONSISTENCY)
9877
    result.Raise("Failed to repair storage unit '%s' on %s" %
9878
                 (self.op.name, self.op.node_name))
9879

    
9880

    
9881
class LUNodeEvacuate(NoHooksLU):
9882
  """Evacuates instances off a list of nodes.
9883

9884
  """
9885
  REQ_BGL = False
9886

    
9887
  def CheckArguments(self):
9888
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9889

    
9890
  def ExpandNames(self):
9891
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9892

    
9893
    if self.op.remote_node is not None:
9894
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9895
      assert self.op.remote_node
9896

    
9897
      if self.op.remote_node == self.op.node_name:
9898
        raise errors.OpPrereqError("Can not use evacuated node as a new"
9899
                                   " secondary node", errors.ECODE_INVAL)
9900

    
9901
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
9902
        raise errors.OpPrereqError("Without the use of an iallocator only"
9903
                                   " secondary instances can be evacuated",
9904
                                   errors.ECODE_INVAL)
9905

    
9906
    # Declare locks
9907
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9908
    self.needed_locks = {
9909
      locking.LEVEL_INSTANCE: [],
9910
      locking.LEVEL_NODEGROUP: [],
9911
      locking.LEVEL_NODE: [],
9912
      }
9913

    
9914
    if self.op.remote_node is None:
9915
      # Iallocator will choose any node(s) in the same group
9916
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
9917
    else:
9918
      group_nodes = frozenset([self.op.remote_node])
9919

    
9920
    # Determine nodes to be locked
9921
    self.lock_nodes = set([self.op.node_name]) | group_nodes
9922

    
9923
  def _DetermineInstances(self):
9924
    """Builds list of instances to operate on.
9925

9926
    """
9927
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
9928

    
9929
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
9930
      # Primary instances only
9931
      inst_fn = _GetNodePrimaryInstances
9932
      assert self.op.remote_node is None, \
9933
        "Evacuating primary instances requires iallocator"
9934
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
9935
      # Secondary instances only
9936
      inst_fn = _GetNodeSecondaryInstances
9937
    else:
9938
      # All instances
9939
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
9940
      inst_fn = _GetNodeInstances
9941

    
9942
    return inst_fn(self.cfg, self.op.node_name)
9943

    
9944
  def DeclareLocks(self, level):
9945
    if level == locking.LEVEL_INSTANCE:
9946
      # Lock instances optimistically, needs verification once node and group
9947
      # locks have been acquired
9948
      self.needed_locks[locking.LEVEL_INSTANCE] = \
9949
        set(i.name for i in self._DetermineInstances())
9950

    
9951
    elif level == locking.LEVEL_NODEGROUP:
9952
      # Lock node groups optimistically, needs verification once nodes have
9953
      # been acquired
9954
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9955
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
9956

    
9957
    elif level == locking.LEVEL_NODE:
9958
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
9959

    
9960
  def CheckPrereq(self):
9961
    # Verify locks
9962
    owned_instances = self.glm.list_owned(locking.LEVEL_INSTANCE)
9963
    owned_nodes = self.glm.list_owned(locking.LEVEL_NODE)
9964
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
9965

    
9966
    assert owned_nodes == self.lock_nodes
9967

    
9968
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
9969
    if owned_groups != wanted_groups:
9970
      raise errors.OpExecError("Node groups changed since locks were acquired,"
9971
                               " current groups are '%s', used to be '%s'" %
9972
                               (utils.CommaJoin(wanted_groups),
9973
                                utils.CommaJoin(owned_groups)))
9974

    
9975
    # Determine affected instances
9976
    self.instances = self._DetermineInstances()
9977
    self.instance_names = [i.name for i in self.instances]
9978

    
9979
    if set(self.instance_names) != owned_instances:
9980
      raise errors.OpExecError("Instances on node '%s' changed since locks"
9981
                               " were acquired, current instances are '%s',"
9982
                               " used to be '%s'" %
9983
                               (self.op.node_name,
9984
                                utils.CommaJoin(self.instance_names),
9985
                                utils.CommaJoin(owned_instances)))
9986

    
9987
    if self.instance_names:
9988
      self.LogInfo("Evacuating instances from node '%s': %s",
9989
                   self.op.node_name,
9990
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
9991
    else:
9992
      self.LogInfo("No instances to evacuate from node '%s'",
9993
                   self.op.node_name)
9994

    
9995
    if self.op.remote_node is not None:
9996
      for i in self.instances:
9997
        if i.primary_node == self.op.remote_node:
9998
          raise errors.OpPrereqError("Node %s is the primary node of"
9999
                                     " instance %s, cannot use it as"
10000
                                     " secondary" %
10001
                                     (self.op.remote_node, i.name),
10002
                                     errors.ECODE_INVAL)
10003

    
10004
  def Exec(self, feedback_fn):
10005
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10006

    
10007
    if not self.instance_names:
10008
      # No instances to evacuate
10009
      jobs = []
10010

    
10011
    elif self.op.iallocator is not None:
10012
      # TODO: Implement relocation to other group
10013
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10014
                       evac_mode=self.op.mode,
10015
                       instances=list(self.instance_names))
10016

    
10017
      ial.Run(self.op.iallocator)
10018

    
10019
      if not ial.success:
10020
        raise errors.OpPrereqError("Can't compute node evacuation using"
10021
                                   " iallocator '%s': %s" %
10022
                                   (self.op.iallocator, ial.info),
10023
                                   errors.ECODE_NORES)
10024

    
10025
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10026

    
10027
    elif self.op.remote_node is not None:
10028
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10029
      jobs = [
10030
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10031
                                        remote_node=self.op.remote_node,
10032
                                        disks=[],
10033
                                        mode=constants.REPLACE_DISK_CHG,
10034
                                        early_release=self.op.early_release)]
10035
        for instance_name in self.instance_names
10036
        ]
10037

    
10038
    else:
10039
      raise errors.ProgrammerError("No iallocator or remote node")
10040

    
10041
    return ResultWithJobs(jobs)
10042

    
10043

    
10044
def _SetOpEarlyRelease(early_release, op):
10045
  """Sets C{early_release} flag on opcodes if available.
10046

10047
  """
10048
  try:
10049
    op.early_release = early_release
10050
  except AttributeError:
10051
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10052

    
10053
  return op
10054

    
10055

    
10056
def _NodeEvacDest(use_nodes, group, nodes):
10057
  """Returns group or nodes depending on caller's choice.
10058

10059
  """
10060
  if use_nodes:
10061
    return utils.CommaJoin(nodes)
10062
  else:
10063
    return group
10064

    
10065

    
10066
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10067
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10068

10069
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10070
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10071

10072
  @type lu: L{LogicalUnit}
10073
  @param lu: Logical unit instance
10074
  @type alloc_result: tuple/list
10075
  @param alloc_result: Result from iallocator
10076
  @type early_release: bool
10077
  @param early_release: Whether to release locks early if possible
10078
  @type use_nodes: bool
10079
  @param use_nodes: Whether to display node names instead of groups
10080

10081
  """
10082
  (moved, failed, jobs) = alloc_result
10083

    
10084
  if failed:
10085
    lu.LogWarning("Unable to evacuate instances %s",
10086
                  utils.CommaJoin("%s (%s)" % (name, reason)
10087
                                  for (name, reason) in failed))
10088

    
10089
  if moved:
10090
    lu.LogInfo("Instances to be moved: %s",
10091
               utils.CommaJoin("%s (to %s)" %
10092
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10093
                               for (name, group, nodes) in moved))
10094

    
10095
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10096
              map(opcodes.OpCode.LoadOpCode, ops))
10097
          for ops in jobs]
10098

    
10099

    
10100
class LUInstanceGrowDisk(LogicalUnit):
10101
  """Grow a disk of an instance.
10102

10103
  """
10104
  HPATH = "disk-grow"
10105
  HTYPE = constants.HTYPE_INSTANCE
10106
  REQ_BGL = False
10107

    
10108
  def ExpandNames(self):
10109
    self._ExpandAndLockInstance()
10110
    self.needed_locks[locking.LEVEL_NODE] = []
10111
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10112

    
10113
  def DeclareLocks(self, level):
10114
    if level == locking.LEVEL_NODE:
10115
      self._LockInstancesNodes()
10116

    
10117
  def BuildHooksEnv(self):
10118
    """Build hooks env.
10119

10120
    This runs on the master, the primary and all the secondaries.
10121

10122
    """
10123
    env = {
10124
      "DISK": self.op.disk,
10125
      "AMOUNT": self.op.amount,
10126
      }
10127
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10128
    return env
10129

    
10130
  def BuildHooksNodes(self):
10131
    """Build hooks nodes.
10132

10133
    """
10134
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10135
    return (nl, nl)
10136

    
10137
  def CheckPrereq(self):
10138
    """Check prerequisites.
10139

10140
    This checks that the instance is in the cluster.
10141

10142
    """
10143
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10144
    assert instance is not None, \
10145
      "Cannot retrieve locked instance %s" % self.op.instance_name
10146
    nodenames = list(instance.all_nodes)
10147
    for node in nodenames:
10148
      _CheckNodeOnline(self, node)
10149

    
10150
    self.instance = instance
10151

    
10152
    if instance.disk_template not in constants.DTS_GROWABLE:
10153
      raise errors.OpPrereqError("Instance's disk layout does not support"
10154
                                 " growing", errors.ECODE_INVAL)
10155

    
10156
    self.disk = instance.FindDisk(self.op.disk)
10157

    
10158
    if instance.disk_template not in (constants.DT_FILE,
10159
                                      constants.DT_SHARED_FILE):
10160
      # TODO: check the free disk space for file, when that feature will be
10161
      # supported
10162
      _CheckNodesFreeDiskPerVG(self, nodenames,
10163
                               self.disk.ComputeGrowth(self.op.amount))
10164

    
10165
  def Exec(self, feedback_fn):
10166
    """Execute disk grow.
10167

10168
    """
10169
    instance = self.instance
10170
    disk = self.disk
10171

    
10172
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10173
    if not disks_ok:
10174
      raise errors.OpExecError("Cannot activate block device to grow")
10175

    
10176
    # First run all grow ops in dry-run mode
10177
    for node in instance.all_nodes:
10178
      self.cfg.SetDiskID(disk, node)
10179
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10180
      result.Raise("Grow request failed to node %s" % node)
10181

    
10182
    # We know that (as far as we can test) operations across different
10183
    # nodes will succeed, time to run it for real
10184
    for node in instance.all_nodes:
10185
      self.cfg.SetDiskID(disk, node)
10186
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10187
      result.Raise("Grow request failed to node %s" % node)
10188

    
10189
      # TODO: Rewrite code to work properly
10190
      # DRBD goes into sync mode for a short amount of time after executing the
10191
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10192
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10193
      # time is a work-around.
10194
      time.sleep(5)
10195

    
10196
    disk.RecordGrow(self.op.amount)
10197
    self.cfg.Update(instance, feedback_fn)
10198
    if self.op.wait_for_sync:
10199
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10200
      if disk_abort:
10201
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10202
                             " status; please check the instance")
10203
      if not instance.admin_up:
10204
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10205
    elif not instance.admin_up:
10206
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10207
                           " not supposed to be running because no wait for"
10208
                           " sync mode was requested")
10209

    
10210

    
10211
class LUInstanceQueryData(NoHooksLU):
10212
  """Query runtime instance data.
10213

10214
  """
10215
  REQ_BGL = False
10216

    
10217
  def ExpandNames(self):
10218
    self.needed_locks = {}
10219

    
10220
    # Use locking if requested or when non-static information is wanted
10221
    if not (self.op.static or self.op.use_locking):
10222
      self.LogWarning("Non-static data requested, locks need to be acquired")
10223
      self.op.use_locking = True
10224

    
10225
    if self.op.instances or not self.op.use_locking:
10226
      # Expand instance names right here
10227
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10228
    else:
10229
      # Will use acquired locks
10230
      self.wanted_names = None
10231

    
10232
    if self.op.use_locking:
10233
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10234

    
10235
      if self.wanted_names is None:
10236
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10237
      else:
10238
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10239

    
10240
      self.needed_locks[locking.LEVEL_NODE] = []
10241
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10242
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10243

    
10244
  def DeclareLocks(self, level):
10245
    if self.op.use_locking and level == locking.LEVEL_NODE:
10246
      self._LockInstancesNodes()
10247

    
10248
  def CheckPrereq(self):
10249
    """Check prerequisites.
10250

10251
    This only checks the optional instance list against the existing names.
10252

10253
    """
10254
    if self.wanted_names is None:
10255
      assert self.op.use_locking, "Locking was not used"
10256
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
10257

    
10258
    self.wanted_instances = [self.cfg.GetInstanceInfo(name)
10259
                             for name in self.wanted_names]
10260

    
10261
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10262
    """Returns the status of a block device
10263

10264
    """
10265
    if self.op.static or not node:
10266
      return None
10267

    
10268
    self.cfg.SetDiskID(dev, node)
10269

    
10270
    result = self.rpc.call_blockdev_find(node, dev)
10271
    if result.offline:
10272
      return None
10273

    
10274
    result.Raise("Can't compute disk status for %s" % instance_name)
10275

    
10276
    status = result.payload
10277
    if status is None:
10278
      return None
10279

    
10280
    return (status.dev_path, status.major, status.minor,
10281
            status.sync_percent, status.estimated_time,
10282
            status.is_degraded, status.ldisk_status)
10283

    
10284
  def _ComputeDiskStatus(self, instance, snode, dev):
10285
    """Compute block device status.
10286

10287
    """
10288
    if dev.dev_type in constants.LDS_DRBD:
10289
      # we change the snode then (otherwise we use the one passed in)
10290
      if dev.logical_id[0] == instance.primary_node:
10291
        snode = dev.logical_id[1]
10292
      else:
10293
        snode = dev.logical_id[0]
10294

    
10295
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10296
                                              instance.name, dev)
10297
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10298

    
10299
    if dev.children:
10300
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10301
                                        instance, snode),
10302
                         dev.children)
10303
    else:
10304
      dev_children = []
10305

    
10306
    return {
10307
      "iv_name": dev.iv_name,
10308
      "dev_type": dev.dev_type,
10309
      "logical_id": dev.logical_id,
10310
      "physical_id": dev.physical_id,
10311
      "pstatus": dev_pstatus,
10312
      "sstatus": dev_sstatus,
10313
      "children": dev_children,
10314
      "mode": dev.mode,
10315
      "size": dev.size,
10316
      }
10317

    
10318
  def Exec(self, feedback_fn):
10319
    """Gather and return data"""
10320
    result = {}
10321

    
10322
    cluster = self.cfg.GetClusterInfo()
10323

    
10324
    for instance in self.wanted_instances:
10325
      pnode = self.cfg.GetNodeInfo(instance.primary_node)
10326

    
10327
      if self.op.static or pnode.offline:
10328
        remote_state = None
10329
        if pnode.offline:
10330
          self.LogWarning("Primary node %s is marked offline, returning static"
10331
                          " information only for instance %s" %
10332
                          (pnode.name, instance.name))
10333
      else:
10334
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10335
                                                  instance.name,
10336
                                                  instance.hypervisor)
10337
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10338
        remote_info = remote_info.payload
10339
        if remote_info and "state" in remote_info:
10340
          remote_state = "up"
10341
        else:
10342
          remote_state = "down"
10343

    
10344
      if instance.admin_up:
10345
        config_state = "up"
10346
      else:
10347
        config_state = "down"
10348

    
10349
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10350
                  instance.disks)
10351

    
10352
      result[instance.name] = {
10353
        "name": instance.name,
10354
        "config_state": config_state,
10355
        "run_state": remote_state,
10356
        "pnode": instance.primary_node,
10357
        "snodes": instance.secondary_nodes,
10358
        "os": instance.os,
10359
        # this happens to be the same format used for hooks
10360
        "nics": _NICListToTuple(self, instance.nics),
10361
        "disk_template": instance.disk_template,
10362
        "disks": disks,
10363
        "hypervisor": instance.hypervisor,
10364
        "network_port": instance.network_port,
10365
        "hv_instance": instance.hvparams,
10366
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10367
        "be_instance": instance.beparams,
10368
        "be_actual": cluster.FillBE(instance),
10369
        "os_instance": instance.osparams,
10370
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10371
        "serial_no": instance.serial_no,
10372
        "mtime": instance.mtime,
10373
        "ctime": instance.ctime,
10374
        "uuid": instance.uuid,
10375
        }
10376

    
10377
    return result
10378

    
10379

    
10380
class LUInstanceSetParams(LogicalUnit):
10381
  """Modifies an instances's parameters.
10382

10383
  """
10384
  HPATH = "instance-modify"
10385
  HTYPE = constants.HTYPE_INSTANCE
10386
  REQ_BGL = False
10387

    
10388
  def CheckArguments(self):
10389
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10390
            self.op.hvparams or self.op.beparams or self.op.os_name):
10391
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10392

    
10393
    if self.op.hvparams:
10394
      _CheckGlobalHvParams(self.op.hvparams)
10395

    
10396
    # Disk validation
10397
    disk_addremove = 0
10398
    for disk_op, disk_dict in self.op.disks:
10399
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10400
      if disk_op == constants.DDM_REMOVE:
10401
        disk_addremove += 1
10402
        continue
10403
      elif disk_op == constants.DDM_ADD:
10404
        disk_addremove += 1
10405
      else:
10406
        if not isinstance(disk_op, int):
10407
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10408
        if not isinstance(disk_dict, dict):
10409
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10410
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10411

    
10412
      if disk_op == constants.DDM_ADD:
10413
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10414
        if mode not in constants.DISK_ACCESS_SET:
10415
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10416
                                     errors.ECODE_INVAL)
10417
        size = disk_dict.get(constants.IDISK_SIZE, None)
10418
        if size is None:
10419
          raise errors.OpPrereqError("Required disk parameter size missing",
10420
                                     errors.ECODE_INVAL)
10421
        try:
10422
          size = int(size)
10423
        except (TypeError, ValueError), err:
10424
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10425
                                     str(err), errors.ECODE_INVAL)
10426
        disk_dict[constants.IDISK_SIZE] = size
10427
      else:
10428
        # modification of disk
10429
        if constants.IDISK_SIZE in disk_dict:
10430
          raise errors.OpPrereqError("Disk size change not possible, use"
10431
                                     " grow-disk", errors.ECODE_INVAL)
10432

    
10433
    if disk_addremove > 1:
10434
      raise errors.OpPrereqError("Only one disk add or remove operation"
10435
                                 " supported at a time", errors.ECODE_INVAL)
10436

    
10437
    if self.op.disks and self.op.disk_template is not None:
10438
      raise errors.OpPrereqError("Disk template conversion and other disk"
10439
                                 " changes not supported at the same time",
10440
                                 errors.ECODE_INVAL)
10441

    
10442
    if (self.op.disk_template and
10443
        self.op.disk_template in constants.DTS_INT_MIRROR and
10444
        self.op.remote_node is None):
10445
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10446
                                 " one requires specifying a secondary node",
10447
                                 errors.ECODE_INVAL)
10448

    
10449
    # NIC validation
10450
    nic_addremove = 0
10451
    for nic_op, nic_dict in self.op.nics:
10452
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10453
      if nic_op == constants.DDM_REMOVE:
10454
        nic_addremove += 1
10455
        continue
10456
      elif nic_op == constants.DDM_ADD:
10457
        nic_addremove += 1
10458
      else:
10459
        if not isinstance(nic_op, int):
10460
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10461
        if not isinstance(nic_dict, dict):
10462
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10463
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10464

    
10465
      # nic_dict should be a dict
10466
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10467
      if nic_ip is not None:
10468
        if nic_ip.lower() == constants.VALUE_NONE:
10469
          nic_dict[constants.INIC_IP] = None
10470
        else:
10471
          if not netutils.IPAddress.IsValid(nic_ip):
10472
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10473
                                       errors.ECODE_INVAL)
10474

    
10475
      nic_bridge = nic_dict.get("bridge", None)
10476
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10477
      if nic_bridge and nic_link:
10478
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10479
                                   " at the same time", errors.ECODE_INVAL)
10480
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10481
        nic_dict["bridge"] = None
10482
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10483
        nic_dict[constants.INIC_LINK] = None
10484

    
10485
      if nic_op == constants.DDM_ADD:
10486
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10487
        if nic_mac is None:
10488
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10489

    
10490
      if constants.INIC_MAC in nic_dict:
10491
        nic_mac = nic_dict[constants.INIC_MAC]
10492
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10493
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10494

    
10495
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10496
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10497
                                     " modifying an existing nic",
10498
                                     errors.ECODE_INVAL)
10499

    
10500
    if nic_addremove > 1:
10501
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10502
                                 " supported at a time", errors.ECODE_INVAL)
10503

    
10504
  def ExpandNames(self):
10505
    self._ExpandAndLockInstance()
10506
    self.needed_locks[locking.LEVEL_NODE] = []
10507
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10508

    
10509
  def DeclareLocks(self, level):
10510
    if level == locking.LEVEL_NODE:
10511
      self._LockInstancesNodes()
10512
      if self.op.disk_template and self.op.remote_node:
10513
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10514
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10515

    
10516
  def BuildHooksEnv(self):
10517
    """Build hooks env.
10518

10519
    This runs on the master, primary and secondaries.
10520

10521
    """
10522
    args = dict()
10523
    if constants.BE_MEMORY in self.be_new:
10524
      args["memory"] = self.be_new[constants.BE_MEMORY]
10525
    if constants.BE_VCPUS in self.be_new:
10526
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10527
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10528
    # information at all.
10529
    if self.op.nics:
10530
      args["nics"] = []
10531
      nic_override = dict(self.op.nics)
10532
      for idx, nic in enumerate(self.instance.nics):
10533
        if idx in nic_override:
10534
          this_nic_override = nic_override[idx]
10535
        else:
10536
          this_nic_override = {}
10537
        if constants.INIC_IP in this_nic_override:
10538
          ip = this_nic_override[constants.INIC_IP]
10539
        else:
10540
          ip = nic.ip
10541
        if constants.INIC_MAC in this_nic_override:
10542
          mac = this_nic_override[constants.INIC_MAC]
10543
        else:
10544
          mac = nic.mac
10545
        if idx in self.nic_pnew:
10546
          nicparams = self.nic_pnew[idx]
10547
        else:
10548
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10549
        mode = nicparams[constants.NIC_MODE]
10550
        link = nicparams[constants.NIC_LINK]
10551
        args["nics"].append((ip, mac, mode, link))
10552
      if constants.DDM_ADD in nic_override:
10553
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10554
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10555
        nicparams = self.nic_pnew[constants.DDM_ADD]
10556
        mode = nicparams[constants.NIC_MODE]
10557
        link = nicparams[constants.NIC_LINK]
10558
        args["nics"].append((ip, mac, mode, link))
10559
      elif constants.DDM_REMOVE in nic_override:
10560
        del args["nics"][-1]
10561

    
10562
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10563
    if self.op.disk_template:
10564
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10565

    
10566
    return env
10567

    
10568
  def BuildHooksNodes(self):
10569
    """Build hooks nodes.
10570

10571
    """
10572
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10573
    return (nl, nl)
10574

    
10575
  def CheckPrereq(self):
10576
    """Check prerequisites.
10577

10578
    This only checks the instance list against the existing names.
10579

10580
    """
10581
    # checking the new params on the primary/secondary nodes
10582

    
10583
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10584
    cluster = self.cluster = self.cfg.GetClusterInfo()
10585
    assert self.instance is not None, \
10586
      "Cannot retrieve locked instance %s" % self.op.instance_name
10587
    pnode = instance.primary_node
10588
    nodelist = list(instance.all_nodes)
10589

    
10590
    # OS change
10591
    if self.op.os_name and not self.op.force:
10592
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10593
                      self.op.force_variant)
10594
      instance_os = self.op.os_name
10595
    else:
10596
      instance_os = instance.os
10597

    
10598
    if self.op.disk_template:
10599
      if instance.disk_template == self.op.disk_template:
10600
        raise errors.OpPrereqError("Instance already has disk template %s" %
10601
                                   instance.disk_template, errors.ECODE_INVAL)
10602

    
10603
      if (instance.disk_template,
10604
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10605
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10606
                                   " %s to %s" % (instance.disk_template,
10607
                                                  self.op.disk_template),
10608
                                   errors.ECODE_INVAL)
10609
      _CheckInstanceDown(self, instance, "cannot change disk template")
10610
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10611
        if self.op.remote_node == pnode:
10612
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10613
                                     " as the primary node of the instance" %
10614
                                     self.op.remote_node, errors.ECODE_STATE)
10615
        _CheckNodeOnline(self, self.op.remote_node)
10616
        _CheckNodeNotDrained(self, self.op.remote_node)
10617
        # FIXME: here we assume that the old instance type is DT_PLAIN
10618
        assert instance.disk_template == constants.DT_PLAIN
10619
        disks = [{constants.IDISK_SIZE: d.size,
10620
                  constants.IDISK_VG: d.logical_id[0]}
10621
                 for d in instance.disks]
10622
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10623
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10624

    
10625
    # hvparams processing
10626
    if self.op.hvparams:
10627
      hv_type = instance.hypervisor
10628
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10629
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10630
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10631

    
10632
      # local check
10633
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10634
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10635
      self.hv_new = hv_new # the new actual values
10636
      self.hv_inst = i_hvdict # the new dict (without defaults)
10637
    else:
10638
      self.hv_new = self.hv_inst = {}
10639

    
10640
    # beparams processing
10641
    if self.op.beparams:
10642
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10643
                                   use_none=True)
10644
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10645
      be_new = cluster.SimpleFillBE(i_bedict)
10646
      self.be_new = be_new # the new actual values
10647
      self.be_inst = i_bedict # the new dict (without defaults)
10648
    else:
10649
      self.be_new = self.be_inst = {}
10650
    be_old = cluster.FillBE(instance)
10651

    
10652
    # osparams processing
10653
    if self.op.osparams:
10654
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10655
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10656
      self.os_inst = i_osdict # the new dict (without defaults)
10657
    else:
10658
      self.os_inst = {}
10659

    
10660
    self.warn = []
10661

    
10662
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10663
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10664
      mem_check_list = [pnode]
10665
      if be_new[constants.BE_AUTO_BALANCE]:
10666
        # either we changed auto_balance to yes or it was from before
10667
        mem_check_list.extend(instance.secondary_nodes)
10668
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10669
                                                  instance.hypervisor)
10670
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10671
                                         instance.hypervisor)
10672
      pninfo = nodeinfo[pnode]
10673
      msg = pninfo.fail_msg
10674
      if msg:
10675
        # Assume the primary node is unreachable and go ahead
10676
        self.warn.append("Can't get info from primary node %s: %s" %
10677
                         (pnode,  msg))
10678
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
10679
        self.warn.append("Node data from primary node %s doesn't contain"
10680
                         " free memory information" % pnode)
10681
      elif instance_info.fail_msg:
10682
        self.warn.append("Can't get instance runtime information: %s" %
10683
                        instance_info.fail_msg)
10684
      else:
10685
        if instance_info.payload:
10686
          current_mem = int(instance_info.payload["memory"])
10687
        else:
10688
          # Assume instance not running
10689
          # (there is a slight race condition here, but it's not very probable,
10690
          # and we have no other way to check)
10691
          current_mem = 0
10692
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10693
                    pninfo.payload["memory_free"])
10694
        if miss_mem > 0:
10695
          raise errors.OpPrereqError("This change will prevent the instance"
10696
                                     " from starting, due to %d MB of memory"
10697
                                     " missing on its primary node" % miss_mem,
10698
                                     errors.ECODE_NORES)
10699

    
10700
      if be_new[constants.BE_AUTO_BALANCE]:
10701
        for node, nres in nodeinfo.items():
10702
          if node not in instance.secondary_nodes:
10703
            continue
10704
          nres.Raise("Can't get info from secondary node %s" % node,
10705
                     prereq=True, ecode=errors.ECODE_STATE)
10706
          if not isinstance(nres.payload.get("memory_free", None), int):
10707
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10708
                                       " memory information" % node,
10709
                                       errors.ECODE_STATE)
10710
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10711
            raise errors.OpPrereqError("This change will prevent the instance"
10712
                                       " from failover to its secondary node"
10713
                                       " %s, due to not enough memory" % node,
10714
                                       errors.ECODE_STATE)
10715

    
10716
    # NIC processing
10717
    self.nic_pnew = {}
10718
    self.nic_pinst = {}
10719
    for nic_op, nic_dict in self.op.nics:
10720
      if nic_op == constants.DDM_REMOVE:
10721
        if not instance.nics:
10722
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10723
                                     errors.ECODE_INVAL)
10724
        continue
10725
      if nic_op != constants.DDM_ADD:
10726
        # an existing nic
10727
        if not instance.nics:
10728
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10729
                                     " no NICs" % nic_op,
10730
                                     errors.ECODE_INVAL)
10731
        if nic_op < 0 or nic_op >= len(instance.nics):
10732
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10733
                                     " are 0 to %d" %
10734
                                     (nic_op, len(instance.nics) - 1),
10735
                                     errors.ECODE_INVAL)
10736
        old_nic_params = instance.nics[nic_op].nicparams
10737
        old_nic_ip = instance.nics[nic_op].ip
10738
      else:
10739
        old_nic_params = {}
10740
        old_nic_ip = None
10741

    
10742
      update_params_dict = dict([(key, nic_dict[key])
10743
                                 for key in constants.NICS_PARAMETERS
10744
                                 if key in nic_dict])
10745

    
10746
      if "bridge" in nic_dict:
10747
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10748

    
10749
      new_nic_params = _GetUpdatedParams(old_nic_params,
10750
                                         update_params_dict)
10751
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10752
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10753
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10754
      self.nic_pinst[nic_op] = new_nic_params
10755
      self.nic_pnew[nic_op] = new_filled_nic_params
10756
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10757

    
10758
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10759
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10760
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10761
        if msg:
10762
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10763
          if self.op.force:
10764
            self.warn.append(msg)
10765
          else:
10766
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10767
      if new_nic_mode == constants.NIC_MODE_ROUTED:
10768
        if constants.INIC_IP in nic_dict:
10769
          nic_ip = nic_dict[constants.INIC_IP]
10770
        else:
10771
          nic_ip = old_nic_ip
10772
        if nic_ip is None:
10773
          raise errors.OpPrereqError("Cannot set the nic ip to None"
10774
                                     " on a routed nic", errors.ECODE_INVAL)
10775
      if constants.INIC_MAC in nic_dict:
10776
        nic_mac = nic_dict[constants.INIC_MAC]
10777
        if nic_mac is None:
10778
          raise errors.OpPrereqError("Cannot set the nic mac to None",
10779
                                     errors.ECODE_INVAL)
10780
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10781
          # otherwise generate the mac
10782
          nic_dict[constants.INIC_MAC] = \
10783
            self.cfg.GenerateMAC(self.proc.GetECId())
10784
        else:
10785
          # or validate/reserve the current one
10786
          try:
10787
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10788
          except errors.ReservationError:
10789
            raise errors.OpPrereqError("MAC address %s already in use"
10790
                                       " in cluster" % nic_mac,
10791
                                       errors.ECODE_NOTUNIQUE)
10792

    
10793
    # DISK processing
10794
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10795
      raise errors.OpPrereqError("Disk operations not supported for"
10796
                                 " diskless instances",
10797
                                 errors.ECODE_INVAL)
10798
    for disk_op, _ in self.op.disks:
10799
      if disk_op == constants.DDM_REMOVE:
10800
        if len(instance.disks) == 1:
10801
          raise errors.OpPrereqError("Cannot remove the last disk of"
10802
                                     " an instance", errors.ECODE_INVAL)
10803
        _CheckInstanceDown(self, instance, "cannot remove disks")
10804

    
10805
      if (disk_op == constants.DDM_ADD and
10806
          len(instance.disks) >= constants.MAX_DISKS):
10807
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10808
                                   " add more" % constants.MAX_DISKS,
10809
                                   errors.ECODE_STATE)
10810
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10811
        # an existing disk
10812
        if disk_op < 0 or disk_op >= len(instance.disks):
10813
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10814
                                     " are 0 to %d" %
10815
                                     (disk_op, len(instance.disks)),
10816
                                     errors.ECODE_INVAL)
10817

    
10818
    return
10819

    
10820
  def _ConvertPlainToDrbd(self, feedback_fn):
10821
    """Converts an instance from plain to drbd.
10822

10823
    """
10824
    feedback_fn("Converting template to drbd")
10825
    instance = self.instance
10826
    pnode = instance.primary_node
10827
    snode = self.op.remote_node
10828

    
10829
    # create a fake disk info for _GenerateDiskTemplate
10830
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10831
                  constants.IDISK_VG: d.logical_id[0]}
10832
                 for d in instance.disks]
10833
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10834
                                      instance.name, pnode, [snode],
10835
                                      disk_info, None, None, 0, feedback_fn)
10836
    info = _GetInstanceInfoText(instance)
10837
    feedback_fn("Creating aditional volumes...")
10838
    # first, create the missing data and meta devices
10839
    for disk in new_disks:
10840
      # unfortunately this is... not too nice
10841
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10842
                            info, True)
10843
      for child in disk.children:
10844
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10845
    # at this stage, all new LVs have been created, we can rename the
10846
    # old ones
10847
    feedback_fn("Renaming original volumes...")
10848
    rename_list = [(o, n.children[0].logical_id)
10849
                   for (o, n) in zip(instance.disks, new_disks)]
10850
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10851
    result.Raise("Failed to rename original LVs")
10852

    
10853
    feedback_fn("Initializing DRBD devices...")
10854
    # all child devices are in place, we can now create the DRBD devices
10855
    for disk in new_disks:
10856
      for node in [pnode, snode]:
10857
        f_create = node == pnode
10858
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10859

    
10860
    # at this point, the instance has been modified
10861
    instance.disk_template = constants.DT_DRBD8
10862
    instance.disks = new_disks
10863
    self.cfg.Update(instance, feedback_fn)
10864

    
10865
    # disks are created, waiting for sync
10866
    disk_abort = not _WaitForSync(self, instance,
10867
                                  oneshot=not self.op.wait_for_sync)
10868
    if disk_abort:
10869
      raise errors.OpExecError("There are some degraded disks for"
10870
                               " this instance, please cleanup manually")
10871

    
10872
  def _ConvertDrbdToPlain(self, feedback_fn):
10873
    """Converts an instance from drbd to plain.
10874

10875
    """
10876
    instance = self.instance
10877
    assert len(instance.secondary_nodes) == 1
10878
    pnode = instance.primary_node
10879
    snode = instance.secondary_nodes[0]
10880
    feedback_fn("Converting template to plain")
10881

    
10882
    old_disks = instance.disks
10883
    new_disks = [d.children[0] for d in old_disks]
10884

    
10885
    # copy over size and mode
10886
    for parent, child in zip(old_disks, new_disks):
10887
      child.size = parent.size
10888
      child.mode = parent.mode
10889

    
10890
    # update instance structure
10891
    instance.disks = new_disks
10892
    instance.disk_template = constants.DT_PLAIN
10893
    self.cfg.Update(instance, feedback_fn)
10894

    
10895
    feedback_fn("Removing volumes on the secondary node...")
10896
    for disk in old_disks:
10897
      self.cfg.SetDiskID(disk, snode)
10898
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10899
      if msg:
10900
        self.LogWarning("Could not remove block device %s on node %s,"
10901
                        " continuing anyway: %s", disk.iv_name, snode, msg)
10902

    
10903
    feedback_fn("Removing unneeded volumes on the primary node...")
10904
    for idx, disk in enumerate(old_disks):
10905
      meta = disk.children[1]
10906
      self.cfg.SetDiskID(meta, pnode)
10907
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10908
      if msg:
10909
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
10910
                        " continuing anyway: %s", idx, pnode, msg)
10911

    
10912
  def Exec(self, feedback_fn):
10913
    """Modifies an instance.
10914

10915
    All parameters take effect only at the next restart of the instance.
10916

10917
    """
10918
    # Process here the warnings from CheckPrereq, as we don't have a
10919
    # feedback_fn there.
10920
    for warn in self.warn:
10921
      feedback_fn("WARNING: %s" % warn)
10922

    
10923
    result = []
10924
    instance = self.instance
10925
    # disk changes
10926
    for disk_op, disk_dict in self.op.disks:
10927
      if disk_op == constants.DDM_REMOVE:
10928
        # remove the last disk
10929
        device = instance.disks.pop()
10930
        device_idx = len(instance.disks)
10931
        for node, disk in device.ComputeNodeTree(instance.primary_node):
10932
          self.cfg.SetDiskID(disk, node)
10933
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10934
          if msg:
10935
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
10936
                            " continuing anyway", device_idx, node, msg)
10937
        result.append(("disk/%d" % device_idx, "remove"))
10938
      elif disk_op == constants.DDM_ADD:
10939
        # add a new disk
10940
        if instance.disk_template in (constants.DT_FILE,
10941
                                        constants.DT_SHARED_FILE):
10942
          file_driver, file_path = instance.disks[0].logical_id
10943
          file_path = os.path.dirname(file_path)
10944
        else:
10945
          file_driver = file_path = None
10946
        disk_idx_base = len(instance.disks)
10947
        new_disk = _GenerateDiskTemplate(self,
10948
                                         instance.disk_template,
10949
                                         instance.name, instance.primary_node,
10950
                                         instance.secondary_nodes,
10951
                                         [disk_dict],
10952
                                         file_path,
10953
                                         file_driver,
10954
                                         disk_idx_base, feedback_fn)[0]
10955
        instance.disks.append(new_disk)
10956
        info = _GetInstanceInfoText(instance)
10957

    
10958
        logging.info("Creating volume %s for instance %s",
10959
                     new_disk.iv_name, instance.name)
10960
        # Note: this needs to be kept in sync with _CreateDisks
10961
        #HARDCODE
10962
        for node in instance.all_nodes:
10963
          f_create = node == instance.primary_node
10964
          try:
10965
            _CreateBlockDev(self, node, instance, new_disk,
10966
                            f_create, info, f_create)
10967
          except errors.OpExecError, err:
10968
            self.LogWarning("Failed to create volume %s (%s) on"
10969
                            " node %s: %s",
10970
                            new_disk.iv_name, new_disk, node, err)
10971
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10972
                       (new_disk.size, new_disk.mode)))
10973
      else:
10974
        # change a given disk
10975
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10976
        result.append(("disk.mode/%d" % disk_op,
10977
                       disk_dict[constants.IDISK_MODE]))
10978

    
10979
    if self.op.disk_template:
10980
      r_shut = _ShutdownInstanceDisks(self, instance)
10981
      if not r_shut:
10982
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10983
                                 " proceed with disk template conversion")
10984
      mode = (instance.disk_template, self.op.disk_template)
10985
      try:
10986
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
10987
      except:
10988
        self.cfg.ReleaseDRBDMinors(instance.name)
10989
        raise
10990
      result.append(("disk_template", self.op.disk_template))
10991

    
10992
    # NIC changes
10993
    for nic_op, nic_dict in self.op.nics:
10994
      if nic_op == constants.DDM_REMOVE:
10995
        # remove the last nic
10996
        del instance.nics[-1]
10997
        result.append(("nic.%d" % len(instance.nics), "remove"))
10998
      elif nic_op == constants.DDM_ADD:
10999
        # mac and bridge should be set, by now
11000
        mac = nic_dict[constants.INIC_MAC]
11001
        ip = nic_dict.get(constants.INIC_IP, None)
11002
        nicparams = self.nic_pinst[constants.DDM_ADD]
11003
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11004
        instance.nics.append(new_nic)
11005
        result.append(("nic.%d" % (len(instance.nics) - 1),
11006
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11007
                       (new_nic.mac, new_nic.ip,
11008
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11009
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11010
                       )))
11011
      else:
11012
        for key in (constants.INIC_MAC, constants.INIC_IP):
11013
          if key in nic_dict:
11014
            setattr(instance.nics[nic_op], key, nic_dict[key])
11015
        if nic_op in self.nic_pinst:
11016
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11017
        for key, val in nic_dict.iteritems():
11018
          result.append(("nic.%s/%d" % (key, nic_op), val))
11019

    
11020
    # hvparams changes
11021
    if self.op.hvparams:
11022
      instance.hvparams = self.hv_inst
11023
      for key, val in self.op.hvparams.iteritems():
11024
        result.append(("hv/%s" % key, val))
11025

    
11026
    # beparams changes
11027
    if self.op.beparams:
11028
      instance.beparams = self.be_inst
11029
      for key, val in self.op.beparams.iteritems():
11030
        result.append(("be/%s" % key, val))
11031

    
11032
    # OS change
11033
    if self.op.os_name:
11034
      instance.os = self.op.os_name
11035

    
11036
    # osparams changes
11037
    if self.op.osparams:
11038
      instance.osparams = self.os_inst
11039
      for key, val in self.op.osparams.iteritems():
11040
        result.append(("os/%s" % key, val))
11041

    
11042
    self.cfg.Update(instance, feedback_fn)
11043

    
11044
    return result
11045

    
11046
  _DISK_CONVERSIONS = {
11047
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11048
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11049
    }
11050

    
11051

    
11052
class LUBackupQuery(NoHooksLU):
11053
  """Query the exports list
11054

11055
  """
11056
  REQ_BGL = False
11057

    
11058
  def ExpandNames(self):
11059
    self.needed_locks = {}
11060
    self.share_locks[locking.LEVEL_NODE] = 1
11061
    if not self.op.nodes:
11062
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11063
    else:
11064
      self.needed_locks[locking.LEVEL_NODE] = \
11065
        _GetWantedNodes(self, self.op.nodes)
11066

    
11067
  def Exec(self, feedback_fn):
11068
    """Compute the list of all the exported system images.
11069

11070
    @rtype: dict
11071
    @return: a dictionary with the structure node->(export-list)
11072
        where export-list is a list of the instances exported on
11073
        that node.
11074

11075
    """
11076
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
11077
    rpcresult = self.rpc.call_export_list(self.nodes)
11078
    result = {}
11079
    for node in rpcresult:
11080
      if rpcresult[node].fail_msg:
11081
        result[node] = False
11082
      else:
11083
        result[node] = rpcresult[node].payload
11084

    
11085
    return result
11086

    
11087

    
11088
class LUBackupPrepare(NoHooksLU):
11089
  """Prepares an instance for an export and returns useful information.
11090

11091
  """
11092
  REQ_BGL = False
11093

    
11094
  def ExpandNames(self):
11095
    self._ExpandAndLockInstance()
11096

    
11097
  def CheckPrereq(self):
11098
    """Check prerequisites.
11099

11100
    """
11101
    instance_name = self.op.instance_name
11102

    
11103
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11104
    assert self.instance is not None, \
11105
          "Cannot retrieve locked instance %s" % self.op.instance_name
11106
    _CheckNodeOnline(self, self.instance.primary_node)
11107

    
11108
    self._cds = _GetClusterDomainSecret()
11109

    
11110
  def Exec(self, feedback_fn):
11111
    """Prepares an instance for an export.
11112

11113
    """
11114
    instance = self.instance
11115

    
11116
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11117
      salt = utils.GenerateSecret(8)
11118

    
11119
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11120
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11121
                                              constants.RIE_CERT_VALIDITY)
11122
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11123

    
11124
      (name, cert_pem) = result.payload
11125

    
11126
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11127
                                             cert_pem)
11128

    
11129
      return {
11130
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11131
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11132
                          salt),
11133
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11134
        }
11135

    
11136
    return None
11137

    
11138

    
11139
class LUBackupExport(LogicalUnit):
11140
  """Export an instance to an image in the cluster.
11141

11142
  """
11143
  HPATH = "instance-export"
11144
  HTYPE = constants.HTYPE_INSTANCE
11145
  REQ_BGL = False
11146

    
11147
  def CheckArguments(self):
11148
    """Check the arguments.
11149

11150
    """
11151
    self.x509_key_name = self.op.x509_key_name
11152
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11153

    
11154
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11155
      if not self.x509_key_name:
11156
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11157
                                   errors.ECODE_INVAL)
11158

    
11159
      if not self.dest_x509_ca_pem:
11160
        raise errors.OpPrereqError("Missing destination X509 CA",
11161
                                   errors.ECODE_INVAL)
11162

    
11163
  def ExpandNames(self):
11164
    self._ExpandAndLockInstance()
11165

    
11166
    # Lock all nodes for local exports
11167
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11168
      # FIXME: lock only instance primary and destination node
11169
      #
11170
      # Sad but true, for now we have do lock all nodes, as we don't know where
11171
      # the previous export might be, and in this LU we search for it and
11172
      # remove it from its current node. In the future we could fix this by:
11173
      #  - making a tasklet to search (share-lock all), then create the
11174
      #    new one, then one to remove, after
11175
      #  - removing the removal operation altogether
11176
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11177

    
11178
  def DeclareLocks(self, level):
11179
    """Last minute lock declaration."""
11180
    # All nodes are locked anyway, so nothing to do here.
11181

    
11182
  def BuildHooksEnv(self):
11183
    """Build hooks env.
11184

11185
    This will run on the master, primary node and target node.
11186

11187
    """
11188
    env = {
11189
      "EXPORT_MODE": self.op.mode,
11190
      "EXPORT_NODE": self.op.target_node,
11191
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11192
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11193
      # TODO: Generic function for boolean env variables
11194
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11195
      }
11196

    
11197
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11198

    
11199
    return env
11200

    
11201
  def BuildHooksNodes(self):
11202
    """Build hooks nodes.
11203

11204
    """
11205
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11206

    
11207
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11208
      nl.append(self.op.target_node)
11209

    
11210
    return (nl, nl)
11211

    
11212
  def CheckPrereq(self):
11213
    """Check prerequisites.
11214

11215
    This checks that the instance and node names are valid.
11216

11217
    """
11218
    instance_name = self.op.instance_name
11219

    
11220
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11221
    assert self.instance is not None, \
11222
          "Cannot retrieve locked instance %s" % self.op.instance_name
11223
    _CheckNodeOnline(self, self.instance.primary_node)
11224

    
11225
    if (self.op.remove_instance and self.instance.admin_up and
11226
        not self.op.shutdown):
11227
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11228
                                 " down before")
11229

    
11230
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11231
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11232
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11233
      assert self.dst_node is not None
11234

    
11235
      _CheckNodeOnline(self, self.dst_node.name)
11236
      _CheckNodeNotDrained(self, self.dst_node.name)
11237

    
11238
      self._cds = None
11239
      self.dest_disk_info = None
11240
      self.dest_x509_ca = None
11241

    
11242
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11243
      self.dst_node = None
11244

    
11245
      if len(self.op.target_node) != len(self.instance.disks):
11246
        raise errors.OpPrereqError(("Received destination information for %s"
11247
                                    " disks, but instance %s has %s disks") %
11248
                                   (len(self.op.target_node), instance_name,
11249
                                    len(self.instance.disks)),
11250
                                   errors.ECODE_INVAL)
11251

    
11252
      cds = _GetClusterDomainSecret()
11253

    
11254
      # Check X509 key name
11255
      try:
11256
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11257
      except (TypeError, ValueError), err:
11258
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11259

    
11260
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11261
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11262
                                   errors.ECODE_INVAL)
11263

    
11264
      # Load and verify CA
11265
      try:
11266
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11267
      except OpenSSL.crypto.Error, err:
11268
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11269
                                   (err, ), errors.ECODE_INVAL)
11270

    
11271
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11272
      if errcode is not None:
11273
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11274
                                   (msg, ), errors.ECODE_INVAL)
11275

    
11276
      self.dest_x509_ca = cert
11277

    
11278
      # Verify target information
11279
      disk_info = []
11280
      for idx, disk_data in enumerate(self.op.target_node):
11281
        try:
11282
          (host, port, magic) = \
11283
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11284
        except errors.GenericError, err:
11285
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11286
                                     (idx, err), errors.ECODE_INVAL)
11287

    
11288
        disk_info.append((host, port, magic))
11289

    
11290
      assert len(disk_info) == len(self.op.target_node)
11291
      self.dest_disk_info = disk_info
11292

    
11293
    else:
11294
      raise errors.ProgrammerError("Unhandled export mode %r" %
11295
                                   self.op.mode)
11296

    
11297
    # instance disk type verification
11298
    # TODO: Implement export support for file-based disks
11299
    for disk in self.instance.disks:
11300
      if disk.dev_type == constants.LD_FILE:
11301
        raise errors.OpPrereqError("Export not supported for instances with"
11302
                                   " file-based disks", errors.ECODE_INVAL)
11303

    
11304
  def _CleanupExports(self, feedback_fn):
11305
    """Removes exports of current instance from all other nodes.
11306

11307
    If an instance in a cluster with nodes A..D was exported to node C, its
11308
    exports will be removed from the nodes A, B and D.
11309

11310
    """
11311
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11312

    
11313
    nodelist = self.cfg.GetNodeList()
11314
    nodelist.remove(self.dst_node.name)
11315

    
11316
    # on one-node clusters nodelist will be empty after the removal
11317
    # if we proceed the backup would be removed because OpBackupQuery
11318
    # substitutes an empty list with the full cluster node list.
11319
    iname = self.instance.name
11320
    if nodelist:
11321
      feedback_fn("Removing old exports for instance %s" % iname)
11322
      exportlist = self.rpc.call_export_list(nodelist)
11323
      for node in exportlist:
11324
        if exportlist[node].fail_msg:
11325
          continue
11326
        if iname in exportlist[node].payload:
11327
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11328
          if msg:
11329
            self.LogWarning("Could not remove older export for instance %s"
11330
                            " on node %s: %s", iname, node, msg)
11331

    
11332
  def Exec(self, feedback_fn):
11333
    """Export an instance to an image in the cluster.
11334

11335
    """
11336
    assert self.op.mode in constants.EXPORT_MODES
11337

    
11338
    instance = self.instance
11339
    src_node = instance.primary_node
11340

    
11341
    if self.op.shutdown:
11342
      # shutdown the instance, but not the disks
11343
      feedback_fn("Shutting down instance %s" % instance.name)
11344
      result = self.rpc.call_instance_shutdown(src_node, instance,
11345
                                               self.op.shutdown_timeout)
11346
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11347
      result.Raise("Could not shutdown instance %s on"
11348
                   " node %s" % (instance.name, src_node))
11349

    
11350
    # set the disks ID correctly since call_instance_start needs the
11351
    # correct drbd minor to create the symlinks
11352
    for disk in instance.disks:
11353
      self.cfg.SetDiskID(disk, src_node)
11354

    
11355
    activate_disks = (not instance.admin_up)
11356

    
11357
    if activate_disks:
11358
      # Activate the instance disks if we'exporting a stopped instance
11359
      feedback_fn("Activating disks for %s" % instance.name)
11360
      _StartInstanceDisks(self, instance, None)
11361

    
11362
    try:
11363
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11364
                                                     instance)
11365

    
11366
      helper.CreateSnapshots()
11367
      try:
11368
        if (self.op.shutdown and instance.admin_up and
11369
            not self.op.remove_instance):
11370
          assert not activate_disks
11371
          feedback_fn("Starting instance %s" % instance.name)
11372
          result = self.rpc.call_instance_start(src_node, instance,
11373
                                                None, None, False)
11374
          msg = result.fail_msg
11375
          if msg:
11376
            feedback_fn("Failed to start instance: %s" % msg)
11377
            _ShutdownInstanceDisks(self, instance)
11378
            raise errors.OpExecError("Could not start instance: %s" % msg)
11379

    
11380
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11381
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11382
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11383
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11384
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11385

    
11386
          (key_name, _, _) = self.x509_key_name
11387

    
11388
          dest_ca_pem = \
11389
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11390
                                            self.dest_x509_ca)
11391

    
11392
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11393
                                                     key_name, dest_ca_pem,
11394
                                                     timeouts)
11395
      finally:
11396
        helper.Cleanup()
11397

    
11398
      # Check for backwards compatibility
11399
      assert len(dresults) == len(instance.disks)
11400
      assert compat.all(isinstance(i, bool) for i in dresults), \
11401
             "Not all results are boolean: %r" % dresults
11402

    
11403
    finally:
11404
      if activate_disks:
11405
        feedback_fn("Deactivating disks for %s" % instance.name)
11406
        _ShutdownInstanceDisks(self, instance)
11407

    
11408
    if not (compat.all(dresults) and fin_resu):
11409
      failures = []
11410
      if not fin_resu:
11411
        failures.append("export finalization")
11412
      if not compat.all(dresults):
11413
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11414
                               if not dsk)
11415
        failures.append("disk export: disk(s) %s" % fdsk)
11416

    
11417
      raise errors.OpExecError("Export failed, errors in %s" %
11418
                               utils.CommaJoin(failures))
11419

    
11420
    # At this point, the export was successful, we can cleanup/finish
11421

    
11422
    # Remove instance if requested
11423
    if self.op.remove_instance:
11424
      feedback_fn("Removing instance %s" % instance.name)
11425
      _RemoveInstance(self, feedback_fn, instance,
11426
                      self.op.ignore_remove_failures)
11427

    
11428
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11429
      self._CleanupExports(feedback_fn)
11430

    
11431
    return fin_resu, dresults
11432

    
11433

    
11434
class LUBackupRemove(NoHooksLU):
11435
  """Remove exports related to the named instance.
11436

11437
  """
11438
  REQ_BGL = False
11439

    
11440
  def ExpandNames(self):
11441
    self.needed_locks = {}
11442
    # We need all nodes to be locked in order for RemoveExport to work, but we
11443
    # don't need to lock the instance itself, as nothing will happen to it (and
11444
    # we can remove exports also for a removed instance)
11445
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11446

    
11447
  def Exec(self, feedback_fn):
11448
    """Remove any export.
11449

11450
    """
11451
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11452
    # If the instance was not found we'll try with the name that was passed in.
11453
    # This will only work if it was an FQDN, though.
11454
    fqdn_warn = False
11455
    if not instance_name:
11456
      fqdn_warn = True
11457
      instance_name = self.op.instance_name
11458

    
11459
    locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
11460
    exportlist = self.rpc.call_export_list(locked_nodes)
11461
    found = False
11462
    for node in exportlist:
11463
      msg = exportlist[node].fail_msg
11464
      if msg:
11465
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11466
        continue
11467
      if instance_name in exportlist[node].payload:
11468
        found = True
11469
        result = self.rpc.call_export_remove(node, instance_name)
11470
        msg = result.fail_msg
11471
        if msg:
11472
          logging.error("Could not remove export for instance %s"
11473
                        " on node %s: %s", instance_name, node, msg)
11474

    
11475
    if fqdn_warn and not found:
11476
      feedback_fn("Export not found. If trying to remove an export belonging"
11477
                  " to a deleted instance please use its Fully Qualified"
11478
                  " Domain Name.")
11479

    
11480

    
11481
class LUGroupAdd(LogicalUnit):
11482
  """Logical unit for creating node groups.
11483

11484
  """
11485
  HPATH = "group-add"
11486
  HTYPE = constants.HTYPE_GROUP
11487
  REQ_BGL = False
11488

    
11489
  def ExpandNames(self):
11490
    # We need the new group's UUID here so that we can create and acquire the
11491
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11492
    # that it should not check whether the UUID exists in the configuration.
11493
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11494
    self.needed_locks = {}
11495
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11496

    
11497
  def CheckPrereq(self):
11498
    """Check prerequisites.
11499

11500
    This checks that the given group name is not an existing node group
11501
    already.
11502

11503
    """
11504
    try:
11505
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11506
    except errors.OpPrereqError:
11507
      pass
11508
    else:
11509
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11510
                                 " node group (UUID: %s)" %
11511
                                 (self.op.group_name, existing_uuid),
11512
                                 errors.ECODE_EXISTS)
11513

    
11514
    if self.op.ndparams:
11515
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11516

    
11517
  def BuildHooksEnv(self):
11518
    """Build hooks env.
11519

11520
    """
11521
    return {
11522
      "GROUP_NAME": self.op.group_name,
11523
      }
11524

    
11525
  def BuildHooksNodes(self):
11526
    """Build hooks nodes.
11527

11528
    """
11529
    mn = self.cfg.GetMasterNode()
11530
    return ([mn], [mn])
11531

    
11532
  def Exec(self, feedback_fn):
11533
    """Add the node group to the cluster.
11534

11535
    """
11536
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11537
                                  uuid=self.group_uuid,
11538
                                  alloc_policy=self.op.alloc_policy,
11539
                                  ndparams=self.op.ndparams)
11540

    
11541
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11542
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11543

    
11544

    
11545
class LUGroupAssignNodes(NoHooksLU):
11546
  """Logical unit for assigning nodes to groups.
11547

11548
  """
11549
  REQ_BGL = False
11550

    
11551
  def ExpandNames(self):
11552
    # These raise errors.OpPrereqError on their own:
11553
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11554
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11555

    
11556
    # We want to lock all the affected nodes and groups. We have readily
11557
    # available the list of nodes, and the *destination* group. To gather the
11558
    # list of "source" groups, we need to fetch node information later on.
11559
    self.needed_locks = {
11560
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11561
      locking.LEVEL_NODE: self.op.nodes,
11562
      }
11563

    
11564
  def DeclareLocks(self, level):
11565
    if level == locking.LEVEL_NODEGROUP:
11566
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11567

    
11568
      # Try to get all affected nodes' groups without having the group or node
11569
      # lock yet. Needs verification later in the code flow.
11570
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11571

    
11572
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11573

    
11574
  def CheckPrereq(self):
11575
    """Check prerequisites.
11576

11577
    """
11578
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11579
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11580
            frozenset(self.op.nodes))
11581

    
11582
    expected_locks = (set([self.group_uuid]) |
11583
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11584
    actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11585
    if actual_locks != expected_locks:
11586
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11587
                               " current groups are '%s', used to be '%s'" %
11588
                               (utils.CommaJoin(expected_locks),
11589
                                utils.CommaJoin(actual_locks)))
11590

    
11591
    self.node_data = self.cfg.GetAllNodesInfo()
11592
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11593
    instance_data = self.cfg.GetAllInstancesInfo()
11594

    
11595
    if self.group is None:
11596
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11597
                               (self.op.group_name, self.group_uuid))
11598

    
11599
    (new_splits, previous_splits) = \
11600
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11601
                                             for node in self.op.nodes],
11602
                                            self.node_data, instance_data)
11603

    
11604
    if new_splits:
11605
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11606

    
11607
      if not self.op.force:
11608
        raise errors.OpExecError("The following instances get split by this"
11609
                                 " change and --force was not given: %s" %
11610
                                 fmt_new_splits)
11611
      else:
11612
        self.LogWarning("This operation will split the following instances: %s",
11613
                        fmt_new_splits)
11614

    
11615
        if previous_splits:
11616
          self.LogWarning("In addition, these already-split instances continue"
11617
                          " to be split across groups: %s",
11618
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
11619

    
11620
  def Exec(self, feedback_fn):
11621
    """Assign nodes to a new group.
11622

11623
    """
11624
    for node in self.op.nodes:
11625
      self.node_data[node].group = self.group_uuid
11626

    
11627
    # FIXME: Depends on side-effects of modifying the result of
11628
    # C{cfg.GetAllNodesInfo}
11629

    
11630
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11631

    
11632
  @staticmethod
11633
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11634
    """Check for split instances after a node assignment.
11635

11636
    This method considers a series of node assignments as an atomic operation,
11637
    and returns information about split instances after applying the set of
11638
    changes.
11639

11640
    In particular, it returns information about newly split instances, and
11641
    instances that were already split, and remain so after the change.
11642

11643
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11644
    considered.
11645

11646
    @type changes: list of (node_name, new_group_uuid) pairs.
11647
    @param changes: list of node assignments to consider.
11648
    @param node_data: a dict with data for all nodes
11649
    @param instance_data: a dict with all instances to consider
11650
    @rtype: a two-tuple
11651
    @return: a list of instances that were previously okay and result split as a
11652
      consequence of this change, and a list of instances that were previously
11653
      split and this change does not fix.
11654

11655
    """
11656
    changed_nodes = dict((node, group) for node, group in changes
11657
                         if node_data[node].group != group)
11658

    
11659
    all_split_instances = set()
11660
    previously_split_instances = set()
11661

    
11662
    def InstanceNodes(instance):
11663
      return [instance.primary_node] + list(instance.secondary_nodes)
11664

    
11665
    for inst in instance_data.values():
11666
      if inst.disk_template not in constants.DTS_INT_MIRROR:
11667
        continue
11668

    
11669
      instance_nodes = InstanceNodes(inst)
11670

    
11671
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
11672
        previously_split_instances.add(inst.name)
11673

    
11674
      if len(set(changed_nodes.get(node, node_data[node].group)
11675
                 for node in instance_nodes)) > 1:
11676
        all_split_instances.add(inst.name)
11677

    
11678
    return (list(all_split_instances - previously_split_instances),
11679
            list(previously_split_instances & all_split_instances))
11680

    
11681

    
11682
class _GroupQuery(_QueryBase):
11683
  FIELDS = query.GROUP_FIELDS
11684

    
11685
  def ExpandNames(self, lu):
11686
    lu.needed_locks = {}
11687

    
11688
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11689
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11690

    
11691
    if not self.names:
11692
      self.wanted = [name_to_uuid[name]
11693
                     for name in utils.NiceSort(name_to_uuid.keys())]
11694
    else:
11695
      # Accept names to be either names or UUIDs.
11696
      missing = []
11697
      self.wanted = []
11698
      all_uuid = frozenset(self._all_groups.keys())
11699

    
11700
      for name in self.names:
11701
        if name in all_uuid:
11702
          self.wanted.append(name)
11703
        elif name in name_to_uuid:
11704
          self.wanted.append(name_to_uuid[name])
11705
        else:
11706
          missing.append(name)
11707

    
11708
      if missing:
11709
        raise errors.OpPrereqError("Some groups do not exist: %s" %
11710
                                   utils.CommaJoin(missing),
11711
                                   errors.ECODE_NOENT)
11712

    
11713
  def DeclareLocks(self, lu, level):
11714
    pass
11715

    
11716
  def _GetQueryData(self, lu):
11717
    """Computes the list of node groups and their attributes.
11718

11719
    """
11720
    do_nodes = query.GQ_NODE in self.requested_data
11721
    do_instances = query.GQ_INST in self.requested_data
11722

    
11723
    group_to_nodes = None
11724
    group_to_instances = None
11725

    
11726
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11727
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11728
    # latter GetAllInstancesInfo() is not enough, for we have to go through
11729
    # instance->node. Hence, we will need to process nodes even if we only need
11730
    # instance information.
11731
    if do_nodes or do_instances:
11732
      all_nodes = lu.cfg.GetAllNodesInfo()
11733
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11734
      node_to_group = {}
11735

    
11736
      for node in all_nodes.values():
11737
        if node.group in group_to_nodes:
11738
          group_to_nodes[node.group].append(node.name)
11739
          node_to_group[node.name] = node.group
11740

    
11741
      if do_instances:
11742
        all_instances = lu.cfg.GetAllInstancesInfo()
11743
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
11744

    
11745
        for instance in all_instances.values():
11746
          node = instance.primary_node
11747
          if node in node_to_group:
11748
            group_to_instances[node_to_group[node]].append(instance.name)
11749

    
11750
        if not do_nodes:
11751
          # Do not pass on node information if it was not requested.
11752
          group_to_nodes = None
11753

    
11754
    return query.GroupQueryData([self._all_groups[uuid]
11755
                                 for uuid in self.wanted],
11756
                                group_to_nodes, group_to_instances)
11757

    
11758

    
11759
class LUGroupQuery(NoHooksLU):
11760
  """Logical unit for querying node groups.
11761

11762
  """
11763
  REQ_BGL = False
11764

    
11765
  def CheckArguments(self):
11766
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11767
                          self.op.output_fields, False)
11768

    
11769
  def ExpandNames(self):
11770
    self.gq.ExpandNames(self)
11771

    
11772
  def Exec(self, feedback_fn):
11773
    return self.gq.OldStyleQuery(self)
11774

    
11775

    
11776
class LUGroupSetParams(LogicalUnit):
11777
  """Modifies the parameters of a node group.
11778

11779
  """
11780
  HPATH = "group-modify"
11781
  HTYPE = constants.HTYPE_GROUP
11782
  REQ_BGL = False
11783

    
11784
  def CheckArguments(self):
11785
    all_changes = [
11786
      self.op.ndparams,
11787
      self.op.alloc_policy,
11788
      ]
11789

    
11790
    if all_changes.count(None) == len(all_changes):
11791
      raise errors.OpPrereqError("Please pass at least one modification",
11792
                                 errors.ECODE_INVAL)
11793

    
11794
  def ExpandNames(self):
11795
    # This raises errors.OpPrereqError on its own:
11796
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11797

    
11798
    self.needed_locks = {
11799
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11800
      }
11801

    
11802
  def CheckPrereq(self):
11803
    """Check prerequisites.
11804

11805
    """
11806
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11807

    
11808
    if self.group is None:
11809
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11810
                               (self.op.group_name, self.group_uuid))
11811

    
11812
    if self.op.ndparams:
11813
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11814
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11815
      self.new_ndparams = new_ndparams
11816

    
11817
  def BuildHooksEnv(self):
11818
    """Build hooks env.
11819

11820
    """
11821
    return {
11822
      "GROUP_NAME": self.op.group_name,
11823
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
11824
      }
11825

    
11826
  def BuildHooksNodes(self):
11827
    """Build hooks nodes.
11828

11829
    """
11830
    mn = self.cfg.GetMasterNode()
11831
    return ([mn], [mn])
11832

    
11833
  def Exec(self, feedback_fn):
11834
    """Modifies the node group.
11835

11836
    """
11837
    result = []
11838

    
11839
    if self.op.ndparams:
11840
      self.group.ndparams = self.new_ndparams
11841
      result.append(("ndparams", str(self.group.ndparams)))
11842

    
11843
    if self.op.alloc_policy:
11844
      self.group.alloc_policy = self.op.alloc_policy
11845

    
11846
    self.cfg.Update(self.group, feedback_fn)
11847
    return result
11848

    
11849

    
11850

    
11851
class LUGroupRemove(LogicalUnit):
11852
  HPATH = "group-remove"
11853
  HTYPE = constants.HTYPE_GROUP
11854
  REQ_BGL = False
11855

    
11856
  def ExpandNames(self):
11857
    # This will raises errors.OpPrereqError on its own:
11858
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11859
    self.needed_locks = {
11860
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11861
      }
11862

    
11863
  def CheckPrereq(self):
11864
    """Check prerequisites.
11865

11866
    This checks that the given group name exists as a node group, that is
11867
    empty (i.e., contains no nodes), and that is not the last group of the
11868
    cluster.
11869

11870
    """
11871
    # Verify that the group is empty.
11872
    group_nodes = [node.name
11873
                   for node in self.cfg.GetAllNodesInfo().values()
11874
                   if node.group == self.group_uuid]
11875

    
11876
    if group_nodes:
11877
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
11878
                                 " nodes: %s" %
11879
                                 (self.op.group_name,
11880
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
11881
                                 errors.ECODE_STATE)
11882

    
11883
    # Verify the cluster would not be left group-less.
11884
    if len(self.cfg.GetNodeGroupList()) == 1:
11885
      raise errors.OpPrereqError("Group '%s' is the only group,"
11886
                                 " cannot be removed" %
11887
                                 self.op.group_name,
11888
                                 errors.ECODE_STATE)
11889

    
11890
  def BuildHooksEnv(self):
11891
    """Build hooks env.
11892

11893
    """
11894
    return {
11895
      "GROUP_NAME": self.op.group_name,
11896
      }
11897

    
11898
  def BuildHooksNodes(self):
11899
    """Build hooks nodes.
11900

11901
    """
11902
    mn = self.cfg.GetMasterNode()
11903
    return ([mn], [mn])
11904

    
11905
  def Exec(self, feedback_fn):
11906
    """Remove the node group.
11907

11908
    """
11909
    try:
11910
      self.cfg.RemoveNodeGroup(self.group_uuid)
11911
    except errors.ConfigurationError:
11912
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11913
                               (self.op.group_name, self.group_uuid))
11914

    
11915
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11916

    
11917

    
11918
class LUGroupRename(LogicalUnit):
11919
  HPATH = "group-rename"
11920
  HTYPE = constants.HTYPE_GROUP
11921
  REQ_BGL = False
11922

    
11923
  def ExpandNames(self):
11924
    # This raises errors.OpPrereqError on its own:
11925
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11926

    
11927
    self.needed_locks = {
11928
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11929
      }
11930

    
11931
  def CheckPrereq(self):
11932
    """Check prerequisites.
11933

11934
    Ensures requested new name is not yet used.
11935

11936
    """
11937
    try:
11938
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11939
    except errors.OpPrereqError:
11940
      pass
11941
    else:
11942
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11943
                                 " node group (UUID: %s)" %
11944
                                 (self.op.new_name, new_name_uuid),
11945
                                 errors.ECODE_EXISTS)
11946

    
11947
  def BuildHooksEnv(self):
11948
    """Build hooks env.
11949

11950
    """
11951
    return {
11952
      "OLD_NAME": self.op.group_name,
11953
      "NEW_NAME": self.op.new_name,
11954
      }
11955

    
11956
  def BuildHooksNodes(self):
11957
    """Build hooks nodes.
11958

11959
    """
11960
    mn = self.cfg.GetMasterNode()
11961

    
11962
    all_nodes = self.cfg.GetAllNodesInfo()
11963
    all_nodes.pop(mn, None)
11964

    
11965
    run_nodes = [mn]
11966
    run_nodes.extend(node.name for node in all_nodes.values()
11967
                     if node.group == self.group_uuid)
11968

    
11969
    return (run_nodes, run_nodes)
11970

    
11971
  def Exec(self, feedback_fn):
11972
    """Rename the node group.
11973

11974
    """
11975
    group = self.cfg.GetNodeGroup(self.group_uuid)
11976

    
11977
    if group is None:
11978
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11979
                               (self.op.group_name, self.group_uuid))
11980

    
11981
    group.name = self.op.new_name
11982
    self.cfg.Update(group, feedback_fn)
11983

    
11984
    return self.op.new_name
11985

    
11986

    
11987
class LUGroupEvacuate(LogicalUnit):
11988
  HPATH = "group-evacuate"
11989
  HTYPE = constants.HTYPE_GROUP
11990
  REQ_BGL = False
11991

    
11992
  def ExpandNames(self):
11993
    # This raises errors.OpPrereqError on its own:
11994
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11995

    
11996
    if self.op.target_groups:
11997
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11998
                                  self.op.target_groups)
11999
    else:
12000
      self.req_target_uuids = []
12001

    
12002
    if self.group_uuid in self.req_target_uuids:
12003
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12004
                                 " as a target group (targets are %s)" %
12005
                                 (self.group_uuid,
12006
                                  utils.CommaJoin(self.req_target_uuids)),
12007
                                 errors.ECODE_INVAL)
12008

    
12009
    if not self.op.iallocator:
12010
      # Use default iallocator
12011
      self.op.iallocator = self.cfg.GetDefaultIAllocator()
12012

    
12013
    if not self.op.iallocator:
12014
      raise errors.OpPrereqError("No iallocator was specified, neither in the"
12015
                                 " opcode nor as a cluster-wide default",
12016
                                 errors.ECODE_INVAL)
12017

    
12018
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
12019
    self.needed_locks = {
12020
      locking.LEVEL_INSTANCE: [],
12021
      locking.LEVEL_NODEGROUP: [],
12022
      locking.LEVEL_NODE: [],
12023
      }
12024

    
12025
  def DeclareLocks(self, level):
12026
    if level == locking.LEVEL_INSTANCE:
12027
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12028

    
12029
      # Lock instances optimistically, needs verification once node and group
12030
      # locks have been acquired
12031
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12032
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12033

    
12034
    elif level == locking.LEVEL_NODEGROUP:
12035
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12036

    
12037
      if self.req_target_uuids:
12038
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12039

    
12040
        # Lock all groups used by instances optimistically; this requires going
12041
        # via the node before it's locked, requiring verification later on
12042
        lock_groups.update(group_uuid
12043
                           for instance_name in
12044
                             self.glm.list_owned(locking.LEVEL_INSTANCE)
12045
                           for group_uuid in
12046
                             self.cfg.GetInstanceNodeGroups(instance_name))
12047
      else:
12048
        # No target groups, need to lock all of them
12049
        lock_groups = locking.ALL_SET
12050

    
12051
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12052

    
12053
    elif level == locking.LEVEL_NODE:
12054
      # This will only lock the nodes in the group to be evacuated which
12055
      # contain actual instances
12056
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12057
      self._LockInstancesNodes()
12058

    
12059
      # Lock all nodes in group to be evacuated
12060
      assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
12061
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
12062
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12063

    
12064
  def CheckPrereq(self):
12065
    owned_instances = frozenset(self.glm.list_owned(locking.LEVEL_INSTANCE))
12066
    owned_groups = frozenset(self.glm.list_owned(locking.LEVEL_NODEGROUP))
12067
    owned_nodes = frozenset(self.glm.list_owned(locking.LEVEL_NODE))
12068

    
12069
    assert owned_groups.issuperset(self.req_target_uuids)
12070
    assert self.group_uuid in owned_groups
12071

    
12072
    # Check if locked instances are still correct
12073
    wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
12074
    if owned_instances != wanted_instances:
12075
      raise errors.OpPrereqError("Instances in node group to be evacuated (%s)"
12076
                                 " changed since locks were acquired, wanted"
12077
                                 " %s, have %s; retry the operation" %
12078
                                 (self.group_uuid,
12079
                                  utils.CommaJoin(wanted_instances),
12080
                                  utils.CommaJoin(owned_instances)),
12081
                                 errors.ECODE_STATE)
12082

    
12083
    # Get instance information
12084
    self.instances = dict((name, self.cfg.GetInstanceInfo(name))
12085
                          for name in owned_instances)
12086

    
12087
    # Check if node groups for locked instances are still correct
12088
    for instance_name in owned_instances:
12089
      inst = self.instances[instance_name]
12090
      assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
12091
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12092
      assert owned_nodes.issuperset(inst.all_nodes), \
12093
        "Instance %s's nodes changed while we kept the lock" % instance_name
12094

    
12095
      inst_groups = self.cfg.GetInstanceNodeGroups(instance_name)
12096
      if not owned_groups.issuperset(inst_groups):
12097
        raise errors.OpPrereqError("Instance %s's node groups changed since"
12098
                                   " locks were acquired, current groups"
12099
                                   " are '%s', owning groups '%s'; retry the"
12100
                                   " operation" %
12101
                                   (instance_name,
12102
                                    utils.CommaJoin(inst_groups),
12103
                                    utils.CommaJoin(owned_groups)),
12104
                                   errors.ECODE_STATE)
12105

    
12106
    if self.req_target_uuids:
12107
      # User requested specific target groups
12108
      self.target_uuids = self.req_target_uuids
12109
    else:
12110
      # All groups except the one to be evacuated are potential targets
12111
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12112
                           if group_uuid != self.group_uuid]
12113

    
12114
      if not self.target_uuids:
12115
        raise errors.OpExecError("There are no possible target groups")
12116

    
12117
  def BuildHooksEnv(self):
12118
    """Build hooks env.
12119

12120
    """
12121
    return {
12122
      "GROUP_NAME": self.op.group_name,
12123
      "TARGET_GROUPS": " ".join(self.target_uuids),
12124
      }
12125

    
12126
  def BuildHooksNodes(self):
12127
    """Build hooks nodes.
12128

12129
    """
12130
    mn = self.cfg.GetMasterNode()
12131

    
12132
    assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
12133

    
12134
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12135

    
12136
    return (run_nodes, run_nodes)
12137

    
12138
  def Exec(self, feedback_fn):
12139
    instances = list(self.glm.list_owned(locking.LEVEL_INSTANCE))
12140

    
12141
    assert self.group_uuid not in self.target_uuids
12142

    
12143
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12144
                     instances=instances, target_groups=self.target_uuids)
12145

    
12146
    ial.Run(self.op.iallocator)
12147

    
12148
    if not ial.success:
12149
      raise errors.OpPrereqError("Can't compute group evacuation using"
12150
                                 " iallocator '%s': %s" %
12151
                                 (self.op.iallocator, ial.info),
12152
                                 errors.ECODE_NORES)
12153

    
12154
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12155

    
12156
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12157
                 len(jobs), self.op.group_name)
12158

    
12159
    return ResultWithJobs(jobs)
12160

    
12161

    
12162
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
12163
  """Generic tags LU.
12164

12165
  This is an abstract class which is the parent of all the other tags LUs.
12166

12167
  """
12168
  def ExpandNames(self):
12169
    self.group_uuid = None
12170
    self.needed_locks = {}
12171
    if self.op.kind == constants.TAG_NODE:
12172
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12173
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12174
    elif self.op.kind == constants.TAG_INSTANCE:
12175
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12176
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12177
    elif self.op.kind == constants.TAG_NODEGROUP:
12178
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12179

    
12180
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12181
    # not possible to acquire the BGL based on opcode parameters)
12182

    
12183
  def CheckPrereq(self):
12184
    """Check prerequisites.
12185

12186
    """
12187
    if self.op.kind == constants.TAG_CLUSTER:
12188
      self.target = self.cfg.GetClusterInfo()
12189
    elif self.op.kind == constants.TAG_NODE:
12190
      self.target = self.cfg.GetNodeInfo(self.op.name)
12191
    elif self.op.kind == constants.TAG_INSTANCE:
12192
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12193
    elif self.op.kind == constants.TAG_NODEGROUP:
12194
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12195
    else:
12196
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12197
                                 str(self.op.kind), errors.ECODE_INVAL)
12198

    
12199

    
12200
class LUTagsGet(TagsLU):
12201
  """Returns the tags of a given object.
12202

12203
  """
12204
  REQ_BGL = False
12205

    
12206
  def ExpandNames(self):
12207
    TagsLU.ExpandNames(self)
12208

    
12209
    # Share locks as this is only a read operation
12210
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
12211

    
12212
  def Exec(self, feedback_fn):
12213
    """Returns the tag list.
12214

12215
    """
12216
    return list(self.target.GetTags())
12217

    
12218

    
12219
class LUTagsSearch(NoHooksLU):
12220
  """Searches the tags for a given pattern.
12221

12222
  """
12223
  REQ_BGL = False
12224

    
12225
  def ExpandNames(self):
12226
    self.needed_locks = {}
12227

    
12228
  def CheckPrereq(self):
12229
    """Check prerequisites.
12230

12231
    This checks the pattern passed for validity by compiling it.
12232

12233
    """
12234
    try:
12235
      self.re = re.compile(self.op.pattern)
12236
    except re.error, err:
12237
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12238
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12239

    
12240
  def Exec(self, feedback_fn):
12241
    """Returns the tag list.
12242

12243
    """
12244
    cfg = self.cfg
12245
    tgts = [("/cluster", cfg.GetClusterInfo())]
12246
    ilist = cfg.GetAllInstancesInfo().values()
12247
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12248
    nlist = cfg.GetAllNodesInfo().values()
12249
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12250
    tgts.extend(("/nodegroup/%s" % n.name, n)
12251
                for n in cfg.GetAllNodeGroupsInfo().values())
12252
    results = []
12253
    for path, target in tgts:
12254
      for tag in target.GetTags():
12255
        if self.re.search(tag):
12256
          results.append((path, tag))
12257
    return results
12258

    
12259

    
12260
class LUTagsSet(TagsLU):
12261
  """Sets a tag on a given object.
12262

12263
  """
12264
  REQ_BGL = False
12265

    
12266
  def CheckPrereq(self):
12267
    """Check prerequisites.
12268

12269
    This checks the type and length of the tag name and value.
12270

12271
    """
12272
    TagsLU.CheckPrereq(self)
12273
    for tag in self.op.tags:
12274
      objects.TaggableObject.ValidateTag(tag)
12275

    
12276
  def Exec(self, feedback_fn):
12277
    """Sets the tag.
12278

12279
    """
12280
    try:
12281
      for tag in self.op.tags:
12282
        self.target.AddTag(tag)
12283
    except errors.TagError, err:
12284
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12285
    self.cfg.Update(self.target, feedback_fn)
12286

    
12287

    
12288
class LUTagsDel(TagsLU):
12289
  """Delete a list of tags from a given object.
12290

12291
  """
12292
  REQ_BGL = False
12293

    
12294
  def CheckPrereq(self):
12295
    """Check prerequisites.
12296

12297
    This checks that we have the given tag.
12298

12299
    """
12300
    TagsLU.CheckPrereq(self)
12301
    for tag in self.op.tags:
12302
      objects.TaggableObject.ValidateTag(tag)
12303
    del_tags = frozenset(self.op.tags)
12304
    cur_tags = self.target.GetTags()
12305

    
12306
    diff_tags = del_tags - cur_tags
12307
    if diff_tags:
12308
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12309
      raise errors.OpPrereqError("Tag(s) %s not found" %
12310
                                 (utils.CommaJoin(diff_names), ),
12311
                                 errors.ECODE_NOENT)
12312

    
12313
  def Exec(self, feedback_fn):
12314
    """Remove the tag from the object.
12315

12316
    """
12317
    for tag in self.op.tags:
12318
      self.target.RemoveTag(tag)
12319
    self.cfg.Update(self.target, feedback_fn)
12320

    
12321

    
12322
class LUTestDelay(NoHooksLU):
12323
  """Sleep for a specified amount of time.
12324

12325
  This LU sleeps on the master and/or nodes for a specified amount of
12326
  time.
12327

12328
  """
12329
  REQ_BGL = False
12330

    
12331
  def ExpandNames(self):
12332
    """Expand names and set required locks.
12333

12334
    This expands the node list, if any.
12335

12336
    """
12337
    self.needed_locks = {}
12338
    if self.op.on_nodes:
12339
      # _GetWantedNodes can be used here, but is not always appropriate to use
12340
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12341
      # more information.
12342
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12343
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12344

    
12345
  def _TestDelay(self):
12346
    """Do the actual sleep.
12347

12348
    """
12349
    if self.op.on_master:
12350
      if not utils.TestDelay(self.op.duration):
12351
        raise errors.OpExecError("Error during master delay test")
12352
    if self.op.on_nodes:
12353
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12354
      for node, node_result in result.items():
12355
        node_result.Raise("Failure during rpc call to node %s" % node)
12356

    
12357
  def Exec(self, feedback_fn):
12358
    """Execute the test delay opcode, with the wanted repetitions.
12359

12360
    """
12361
    if self.op.repeat == 0:
12362
      self._TestDelay()
12363
    else:
12364
      top_value = self.op.repeat - 1
12365
      for i in range(self.op.repeat):
12366
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12367
        self._TestDelay()
12368

    
12369

    
12370
class LUTestJqueue(NoHooksLU):
12371
  """Utility LU to test some aspects of the job queue.
12372

12373
  """
12374
  REQ_BGL = False
12375

    
12376
  # Must be lower than default timeout for WaitForJobChange to see whether it
12377
  # notices changed jobs
12378
  _CLIENT_CONNECT_TIMEOUT = 20.0
12379
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12380

    
12381
  @classmethod
12382
  def _NotifyUsingSocket(cls, cb, errcls):
12383
    """Opens a Unix socket and waits for another program to connect.
12384

12385
    @type cb: callable
12386
    @param cb: Callback to send socket name to client
12387
    @type errcls: class
12388
    @param errcls: Exception class to use for errors
12389

12390
    """
12391
    # Using a temporary directory as there's no easy way to create temporary
12392
    # sockets without writing a custom loop around tempfile.mktemp and
12393
    # socket.bind
12394
    tmpdir = tempfile.mkdtemp()
12395
    try:
12396
      tmpsock = utils.PathJoin(tmpdir, "sock")
12397

    
12398
      logging.debug("Creating temporary socket at %s", tmpsock)
12399
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12400
      try:
12401
        sock.bind(tmpsock)
12402
        sock.listen(1)
12403

    
12404
        # Send details to client
12405
        cb(tmpsock)
12406

    
12407
        # Wait for client to connect before continuing
12408
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12409
        try:
12410
          (conn, _) = sock.accept()
12411
        except socket.error, err:
12412
          raise errcls("Client didn't connect in time (%s)" % err)
12413
      finally:
12414
        sock.close()
12415
    finally:
12416
      # Remove as soon as client is connected
12417
      shutil.rmtree(tmpdir)
12418

    
12419
    # Wait for client to close
12420
    try:
12421
      try:
12422
        # pylint: disable-msg=E1101
12423
        # Instance of '_socketobject' has no ... member
12424
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12425
        conn.recv(1)
12426
      except socket.error, err:
12427
        raise errcls("Client failed to confirm notification (%s)" % err)
12428
    finally:
12429
      conn.close()
12430

    
12431
  def _SendNotification(self, test, arg, sockname):
12432
    """Sends a notification to the client.
12433

12434
    @type test: string
12435
    @param test: Test name
12436
    @param arg: Test argument (depends on test)
12437
    @type sockname: string
12438
    @param sockname: Socket path
12439

12440
    """
12441
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12442

    
12443
  def _Notify(self, prereq, test, arg):
12444
    """Notifies the client of a test.
12445

12446
    @type prereq: bool
12447
    @param prereq: Whether this is a prereq-phase test
12448
    @type test: string
12449
    @param test: Test name
12450
    @param arg: Test argument (depends on test)
12451

12452
    """
12453
    if prereq:
12454
      errcls = errors.OpPrereqError
12455
    else:
12456
      errcls = errors.OpExecError
12457

    
12458
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12459
                                                  test, arg),
12460
                                   errcls)
12461

    
12462
  def CheckArguments(self):
12463
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12464
    self.expandnames_calls = 0
12465

    
12466
  def ExpandNames(self):
12467
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12468
    if checkargs_calls < 1:
12469
      raise errors.ProgrammerError("CheckArguments was not called")
12470

    
12471
    self.expandnames_calls += 1
12472

    
12473
    if self.op.notify_waitlock:
12474
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12475

    
12476
    self.LogInfo("Expanding names")
12477

    
12478
    # Get lock on master node (just to get a lock, not for a particular reason)
12479
    self.needed_locks = {
12480
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12481
      }
12482

    
12483
  def Exec(self, feedback_fn):
12484
    if self.expandnames_calls < 1:
12485
      raise errors.ProgrammerError("ExpandNames was not called")
12486

    
12487
    if self.op.notify_exec:
12488
      self._Notify(False, constants.JQT_EXEC, None)
12489

    
12490
    self.LogInfo("Executing")
12491

    
12492
    if self.op.log_messages:
12493
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12494
      for idx, msg in enumerate(self.op.log_messages):
12495
        self.LogInfo("Sending log message %s", idx + 1)
12496
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12497
        # Report how many test messages have been sent
12498
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12499

    
12500
    if self.op.fail:
12501
      raise errors.OpExecError("Opcode failure was requested")
12502

    
12503
    return True
12504

    
12505

    
12506
class IAllocator(object):
12507
  """IAllocator framework.
12508

12509
  An IAllocator instance has three sets of attributes:
12510
    - cfg that is needed to query the cluster
12511
    - input data (all members of the _KEYS class attribute are required)
12512
    - four buffer attributes (in|out_data|text), that represent the
12513
      input (to the external script) in text and data structure format,
12514
      and the output from it, again in two formats
12515
    - the result variables from the script (success, info, nodes) for
12516
      easy usage
12517

12518
  """
12519
  # pylint: disable-msg=R0902
12520
  # lots of instance attributes
12521

    
12522
  def __init__(self, cfg, rpc, mode, **kwargs):
12523
    self.cfg = cfg
12524
    self.rpc = rpc
12525
    # init buffer variables
12526
    self.in_text = self.out_text = self.in_data = self.out_data = None
12527
    # init all input fields so that pylint is happy
12528
    self.mode = mode
12529
    self.memory = self.disks = self.disk_template = None
12530
    self.os = self.tags = self.nics = self.vcpus = None
12531
    self.hypervisor = None
12532
    self.relocate_from = None
12533
    self.name = None
12534
    self.evac_nodes = None
12535
    self.instances = None
12536
    self.evac_mode = None
12537
    self.target_groups = []
12538
    # computed fields
12539
    self.required_nodes = None
12540
    # init result fields
12541
    self.success = self.info = self.result = None
12542

    
12543
    try:
12544
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12545
    except KeyError:
12546
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12547
                                   " IAllocator" % self.mode)
12548

    
12549
    keyset = [n for (n, _) in keydata]
12550

    
12551
    for key in kwargs:
12552
      if key not in keyset:
12553
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12554
                                     " IAllocator" % key)
12555
      setattr(self, key, kwargs[key])
12556

    
12557
    for key in keyset:
12558
      if key not in kwargs:
12559
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12560
                                     " IAllocator" % key)
12561
    self._BuildInputData(compat.partial(fn, self), keydata)
12562

    
12563
  def _ComputeClusterData(self):
12564
    """Compute the generic allocator input data.
12565

12566
    This is the data that is independent of the actual operation.
12567

12568
    """
12569
    cfg = self.cfg
12570
    cluster_info = cfg.GetClusterInfo()
12571
    # cluster data
12572
    data = {
12573
      "version": constants.IALLOCATOR_VERSION,
12574
      "cluster_name": cfg.GetClusterName(),
12575
      "cluster_tags": list(cluster_info.GetTags()),
12576
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12577
      # we don't have job IDs
12578
      }
12579
    ninfo = cfg.GetAllNodesInfo()
12580
    iinfo = cfg.GetAllInstancesInfo().values()
12581
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12582

    
12583
    # node data
12584
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
12585

    
12586
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12587
      hypervisor_name = self.hypervisor
12588
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12589
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12590
    else:
12591
      hypervisor_name = cluster_info.enabled_hypervisors[0]
12592

    
12593
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12594
                                        hypervisor_name)
12595
    node_iinfo = \
12596
      self.rpc.call_all_instances_info(node_list,
12597
                                       cluster_info.enabled_hypervisors)
12598

    
12599
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12600

    
12601
    config_ndata = self._ComputeBasicNodeData(ninfo)
12602
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12603
                                                 i_list, config_ndata)
12604
    assert len(data["nodes"]) == len(ninfo), \
12605
        "Incomplete node data computed"
12606

    
12607
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12608

    
12609
    self.in_data = data
12610

    
12611
  @staticmethod
12612
  def _ComputeNodeGroupData(cfg):
12613
    """Compute node groups data.
12614

12615
    """
12616
    ng = dict((guuid, {
12617
      "name": gdata.name,
12618
      "alloc_policy": gdata.alloc_policy,
12619
      })
12620
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12621

    
12622
    return ng
12623

    
12624
  @staticmethod
12625
  def _ComputeBasicNodeData(node_cfg):
12626
    """Compute global node data.
12627

12628
    @rtype: dict
12629
    @returns: a dict of name: (node dict, node config)
12630

12631
    """
12632
    # fill in static (config-based) values
12633
    node_results = dict((ninfo.name, {
12634
      "tags": list(ninfo.GetTags()),
12635
      "primary_ip": ninfo.primary_ip,
12636
      "secondary_ip": ninfo.secondary_ip,
12637
      "offline": ninfo.offline,
12638
      "drained": ninfo.drained,
12639
      "master_candidate": ninfo.master_candidate,
12640
      "group": ninfo.group,
12641
      "master_capable": ninfo.master_capable,
12642
      "vm_capable": ninfo.vm_capable,
12643
      })
12644
      for ninfo in node_cfg.values())
12645

    
12646
    return node_results
12647

    
12648
  @staticmethod
12649
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12650
                              node_results):
12651
    """Compute global node data.
12652

12653
    @param node_results: the basic node structures as filled from the config
12654

12655
    """
12656
    # make a copy of the current dict
12657
    node_results = dict(node_results)
12658
    for nname, nresult in node_data.items():
12659
      assert nname in node_results, "Missing basic data for node %s" % nname
12660
      ninfo = node_cfg[nname]
12661

    
12662
      if not (ninfo.offline or ninfo.drained):
12663
        nresult.Raise("Can't get data for node %s" % nname)
12664
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12665
                                nname)
12666
        remote_info = nresult.payload
12667

    
12668
        for attr in ["memory_total", "memory_free", "memory_dom0",
12669
                     "vg_size", "vg_free", "cpu_total"]:
12670
          if attr not in remote_info:
12671
            raise errors.OpExecError("Node '%s' didn't return attribute"
12672
                                     " '%s'" % (nname, attr))
12673
          if not isinstance(remote_info[attr], int):
12674
            raise errors.OpExecError("Node '%s' returned invalid value"
12675
                                     " for '%s': %s" %
12676
                                     (nname, attr, remote_info[attr]))
12677
        # compute memory used by primary instances
12678
        i_p_mem = i_p_up_mem = 0
12679
        for iinfo, beinfo in i_list:
12680
          if iinfo.primary_node == nname:
12681
            i_p_mem += beinfo[constants.BE_MEMORY]
12682
            if iinfo.name not in node_iinfo[nname].payload:
12683
              i_used_mem = 0
12684
            else:
12685
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
12686
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12687
            remote_info["memory_free"] -= max(0, i_mem_diff)
12688

    
12689
            if iinfo.admin_up:
12690
              i_p_up_mem += beinfo[constants.BE_MEMORY]
12691

    
12692
        # compute memory used by instances
12693
        pnr_dyn = {
12694
          "total_memory": remote_info["memory_total"],
12695
          "reserved_memory": remote_info["memory_dom0"],
12696
          "free_memory": remote_info["memory_free"],
12697
          "total_disk": remote_info["vg_size"],
12698
          "free_disk": remote_info["vg_free"],
12699
          "total_cpus": remote_info["cpu_total"],
12700
          "i_pri_memory": i_p_mem,
12701
          "i_pri_up_memory": i_p_up_mem,
12702
          }
12703
        pnr_dyn.update(node_results[nname])
12704
        node_results[nname] = pnr_dyn
12705

    
12706
    return node_results
12707

    
12708
  @staticmethod
12709
  def _ComputeInstanceData(cluster_info, i_list):
12710
    """Compute global instance data.
12711

12712
    """
12713
    instance_data = {}
12714
    for iinfo, beinfo in i_list:
12715
      nic_data = []
12716
      for nic in iinfo.nics:
12717
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12718
        nic_dict = {
12719
          "mac": nic.mac,
12720
          "ip": nic.ip,
12721
          "mode": filled_params[constants.NIC_MODE],
12722
          "link": filled_params[constants.NIC_LINK],
12723
          }
12724
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12725
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12726
        nic_data.append(nic_dict)
12727
      pir = {
12728
        "tags": list(iinfo.GetTags()),
12729
        "admin_up": iinfo.admin_up,
12730
        "vcpus": beinfo[constants.BE_VCPUS],
12731
        "memory": beinfo[constants.BE_MEMORY],
12732
        "os": iinfo.os,
12733
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12734
        "nics": nic_data,
12735
        "disks": [{constants.IDISK_SIZE: dsk.size,
12736
                   constants.IDISK_MODE: dsk.mode}
12737
                  for dsk in iinfo.disks],
12738
        "disk_template": iinfo.disk_template,
12739
        "hypervisor": iinfo.hypervisor,
12740
        }
12741
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12742
                                                 pir["disks"])
12743
      instance_data[iinfo.name] = pir
12744

    
12745
    return instance_data
12746

    
12747
  def _AddNewInstance(self):
12748
    """Add new instance data to allocator structure.
12749

12750
    This in combination with _AllocatorGetClusterData will create the
12751
    correct structure needed as input for the allocator.
12752

12753
    The checks for the completeness of the opcode must have already been
12754
    done.
12755

12756
    """
12757
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12758

    
12759
    if self.disk_template in constants.DTS_INT_MIRROR:
12760
      self.required_nodes = 2
12761
    else:
12762
      self.required_nodes = 1
12763

    
12764
    request = {
12765
      "name": self.name,
12766
      "disk_template": self.disk_template,
12767
      "tags": self.tags,
12768
      "os": self.os,
12769
      "vcpus": self.vcpus,
12770
      "memory": self.memory,
12771
      "disks": self.disks,
12772
      "disk_space_total": disk_space,
12773
      "nics": self.nics,
12774
      "required_nodes": self.required_nodes,
12775
      "hypervisor": self.hypervisor,
12776
      }
12777

    
12778
    return request
12779

    
12780
  def _AddRelocateInstance(self):
12781
    """Add relocate instance data to allocator structure.
12782

12783
    This in combination with _IAllocatorGetClusterData will create the
12784
    correct structure needed as input for the allocator.
12785

12786
    The checks for the completeness of the opcode must have already been
12787
    done.
12788

12789
    """
12790
    instance = self.cfg.GetInstanceInfo(self.name)
12791
    if instance is None:
12792
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
12793
                                   " IAllocator" % self.name)
12794

    
12795
    if instance.disk_template not in constants.DTS_MIRRORED:
12796
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12797
                                 errors.ECODE_INVAL)
12798

    
12799
    if instance.disk_template in constants.DTS_INT_MIRROR and \
12800
        len(instance.secondary_nodes) != 1:
12801
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
12802
                                 errors.ECODE_STATE)
12803

    
12804
    self.required_nodes = 1
12805
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12806
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12807

    
12808
    request = {
12809
      "name": self.name,
12810
      "disk_space_total": disk_space,
12811
      "required_nodes": self.required_nodes,
12812
      "relocate_from": self.relocate_from,
12813
      }
12814
    return request
12815

    
12816
  def _AddEvacuateNodes(self):
12817
    """Add evacuate nodes data to allocator structure.
12818

12819
    """
12820
    request = {
12821
      "evac_nodes": self.evac_nodes
12822
      }
12823
    return request
12824

    
12825
  def _AddNodeEvacuate(self):
12826
    """Get data for node-evacuate requests.
12827

12828
    """
12829
    return {
12830
      "instances": self.instances,
12831
      "evac_mode": self.evac_mode,
12832
      }
12833

    
12834
  def _AddChangeGroup(self):
12835
    """Get data for node-evacuate requests.
12836

12837
    """
12838
    return {
12839
      "instances": self.instances,
12840
      "target_groups": self.target_groups,
12841
      }
12842

    
12843
  def _BuildInputData(self, fn, keydata):
12844
    """Build input data structures.
12845

12846
    """
12847
    self._ComputeClusterData()
12848

    
12849
    request = fn()
12850
    request["type"] = self.mode
12851
    for keyname, keytype in keydata:
12852
      if keyname not in request:
12853
        raise errors.ProgrammerError("Request parameter %s is missing" %
12854
                                     keyname)
12855
      val = request[keyname]
12856
      if not keytype(val):
12857
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
12858
                                     " validation, value %s, expected"
12859
                                     " type %s" % (keyname, val, keytype))
12860
    self.in_data["request"] = request
12861

    
12862
    self.in_text = serializer.Dump(self.in_data)
12863

    
12864
  _STRING_LIST = ht.TListOf(ht.TString)
12865
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12866
     # pylint: disable-msg=E1101
12867
     # Class '...' has no 'OP_ID' member
12868
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12869
                          opcodes.OpInstanceMigrate.OP_ID,
12870
                          opcodes.OpInstanceReplaceDisks.OP_ID])
12871
     })))
12872

    
12873
  _NEVAC_MOVED = \
12874
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
12875
                       ht.TItems([ht.TNonEmptyString,
12876
                                  ht.TNonEmptyString,
12877
                                  ht.TListOf(ht.TNonEmptyString),
12878
                                 ])))
12879
  _NEVAC_FAILED = \
12880
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
12881
                       ht.TItems([ht.TNonEmptyString,
12882
                                  ht.TMaybeString,
12883
                                 ])))
12884
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
12885
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
12886

    
12887
  _MODE_DATA = {
12888
    constants.IALLOCATOR_MODE_ALLOC:
12889
      (_AddNewInstance,
12890
       [
12891
        ("name", ht.TString),
12892
        ("memory", ht.TInt),
12893
        ("disks", ht.TListOf(ht.TDict)),
12894
        ("disk_template", ht.TString),
12895
        ("os", ht.TString),
12896
        ("tags", _STRING_LIST),
12897
        ("nics", ht.TListOf(ht.TDict)),
12898
        ("vcpus", ht.TInt),
12899
        ("hypervisor", ht.TString),
12900
        ], ht.TList),
12901
    constants.IALLOCATOR_MODE_RELOC:
12902
      (_AddRelocateInstance,
12903
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
12904
       ht.TList),
12905
    constants.IALLOCATOR_MODE_MEVAC:
12906
      (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
12907
       ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
12908
     constants.IALLOCATOR_MODE_NODE_EVAC:
12909
      (_AddNodeEvacuate, [
12910
        ("instances", _STRING_LIST),
12911
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
12912
        ], _NEVAC_RESULT),
12913
     constants.IALLOCATOR_MODE_CHG_GROUP:
12914
      (_AddChangeGroup, [
12915
        ("instances", _STRING_LIST),
12916
        ("target_groups", _STRING_LIST),
12917
        ], _NEVAC_RESULT),
12918
    }
12919

    
12920
  def Run(self, name, validate=True, call_fn=None):
12921
    """Run an instance allocator and return the results.
12922

12923
    """
12924
    if call_fn is None:
12925
      call_fn = self.rpc.call_iallocator_runner
12926

    
12927
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
12928
    result.Raise("Failure while running the iallocator script")
12929

    
12930
    self.out_text = result.payload
12931
    if validate:
12932
      self._ValidateResult()
12933

    
12934
  def _ValidateResult(self):
12935
    """Process the allocator results.
12936

12937
    This will process and if successful save the result in
12938
    self.out_data and the other parameters.
12939

12940
    """
12941
    try:
12942
      rdict = serializer.Load(self.out_text)
12943
    except Exception, err:
12944
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
12945

    
12946
    if not isinstance(rdict, dict):
12947
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
12948

    
12949
    # TODO: remove backwards compatiblity in later versions
12950
    if "nodes" in rdict and "result" not in rdict:
12951
      rdict["result"] = rdict["nodes"]
12952
      del rdict["nodes"]
12953

    
12954
    for key in "success", "info", "result":
12955
      if key not in rdict:
12956
        raise errors.OpExecError("Can't parse iallocator results:"
12957
                                 " missing key '%s'" % key)
12958
      setattr(self, key, rdict[key])
12959

    
12960
    if not self._result_check(self.result):
12961
      raise errors.OpExecError("Iallocator returned invalid result,"
12962
                               " expected %s, got %s" %
12963
                               (self._result_check, self.result),
12964
                               errors.ECODE_INVAL)
12965

    
12966
    if self.mode in (constants.IALLOCATOR_MODE_RELOC,
12967
                     constants.IALLOCATOR_MODE_MEVAC):
12968
      node2group = dict((name, ndata["group"])
12969
                        for (name, ndata) in self.in_data["nodes"].items())
12970

    
12971
      fn = compat.partial(self._NodesToGroups, node2group,
12972
                          self.in_data["nodegroups"])
12973

    
12974
      if self.mode == constants.IALLOCATOR_MODE_RELOC:
12975
        assert self.relocate_from is not None
12976
        assert self.required_nodes == 1
12977

    
12978
        request_groups = fn(self.relocate_from)
12979
        result_groups = fn(rdict["result"])
12980

    
12981
        if result_groups != request_groups:
12982
          raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12983
                                   " differ from original groups (%s)" %
12984
                                   (utils.CommaJoin(result_groups),
12985
                                    utils.CommaJoin(request_groups)))
12986
      elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
12987
        request_groups = fn(self.evac_nodes)
12988
        for (instance_name, secnode) in self.result:
12989
          result_groups = fn([secnode])
12990
          if result_groups != request_groups:
12991
            raise errors.OpExecError("Iallocator returned new secondary node"
12992
                                     " '%s' (group '%s') for instance '%s'"
12993
                                     " which is not in original group '%s'" %
12994
                                     (secnode, utils.CommaJoin(result_groups),
12995
                                      instance_name,
12996
                                      utils.CommaJoin(request_groups)))
12997
      else:
12998
        raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
12999

    
13000
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13001
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13002

    
13003
    self.out_data = rdict
13004

    
13005
  @staticmethod
13006
  def _NodesToGroups(node2group, groups, nodes):
13007
    """Returns a list of unique group names for a list of nodes.
13008

13009
    @type node2group: dict
13010
    @param node2group: Map from node name to group UUID
13011
    @type groups: dict
13012
    @param groups: Group information
13013
    @type nodes: list
13014
    @param nodes: Node names
13015

13016
    """
13017
    result = set()
13018

    
13019
    for node in nodes:
13020
      try:
13021
        group_uuid = node2group[node]
13022
      except KeyError:
13023
        # Ignore unknown node
13024
        pass
13025
      else:
13026
        try:
13027
          group = groups[group_uuid]
13028
        except KeyError:
13029
          # Can't find group, let's use UUID
13030
          group_name = group_uuid
13031
        else:
13032
          group_name = group["name"]
13033

    
13034
        result.add(group_name)
13035

    
13036
    return sorted(result)
13037

    
13038

    
13039
class LUTestAllocator(NoHooksLU):
13040
  """Run allocator tests.
13041

13042
  This LU runs the allocator tests
13043

13044
  """
13045
  def CheckPrereq(self):
13046
    """Check prerequisites.
13047

13048
    This checks the opcode parameters depending on the director and mode test.
13049

13050
    """
13051
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13052
      for attr in ["memory", "disks", "disk_template",
13053
                   "os", "tags", "nics", "vcpus"]:
13054
        if not hasattr(self.op, attr):
13055
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13056
                                     attr, errors.ECODE_INVAL)
13057
      iname = self.cfg.ExpandInstanceName(self.op.name)
13058
      if iname is not None:
13059
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13060
                                   iname, errors.ECODE_EXISTS)
13061
      if not isinstance(self.op.nics, list):
13062
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13063
                                   errors.ECODE_INVAL)
13064
      if not isinstance(self.op.disks, list):
13065
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13066
                                   errors.ECODE_INVAL)
13067
      for row in self.op.disks:
13068
        if (not isinstance(row, dict) or
13069
            constants.IDISK_SIZE not in row or
13070
            not isinstance(row[constants.IDISK_SIZE], int) or
13071
            constants.IDISK_MODE not in row or
13072
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13073
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13074
                                     " parameter", errors.ECODE_INVAL)
13075
      if self.op.hypervisor is None:
13076
        self.op.hypervisor = self.cfg.GetHypervisorType()
13077
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13078
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13079
      self.op.name = fname
13080
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
13081
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13082
      if not hasattr(self.op, "evac_nodes"):
13083
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
13084
                                   " opcode input", errors.ECODE_INVAL)
13085
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13086
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13087
      if not self.op.instances:
13088
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13089
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13090
    else:
13091
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13092
                                 self.op.mode, errors.ECODE_INVAL)
13093

    
13094
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13095
      if self.op.allocator is None:
13096
        raise errors.OpPrereqError("Missing allocator name",
13097
                                   errors.ECODE_INVAL)
13098
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13099
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13100
                                 self.op.direction, errors.ECODE_INVAL)
13101

    
13102
  def Exec(self, feedback_fn):
13103
    """Run the allocator test.
13104

13105
    """
13106
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13107
      ial = IAllocator(self.cfg, self.rpc,
13108
                       mode=self.op.mode,
13109
                       name=self.op.name,
13110
                       memory=self.op.memory,
13111
                       disks=self.op.disks,
13112
                       disk_template=self.op.disk_template,
13113
                       os=self.op.os,
13114
                       tags=self.op.tags,
13115
                       nics=self.op.nics,
13116
                       vcpus=self.op.vcpus,
13117
                       hypervisor=self.op.hypervisor,
13118
                       )
13119
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13120
      ial = IAllocator(self.cfg, self.rpc,
13121
                       mode=self.op.mode,
13122
                       name=self.op.name,
13123
                       relocate_from=list(self.relocate_from),
13124
                       )
13125
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13126
      ial = IAllocator(self.cfg, self.rpc,
13127
                       mode=self.op.mode,
13128
                       evac_nodes=self.op.evac_nodes)
13129
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13130
      ial = IAllocator(self.cfg, self.rpc,
13131
                       mode=self.op.mode,
13132
                       instances=self.op.instances,
13133
                       target_groups=self.op.target_groups)
13134
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13135
      ial = IAllocator(self.cfg, self.rpc,
13136
                       mode=self.op.mode,
13137
                       instances=self.op.instances,
13138
                       evac_mode=self.op.evac_mode)
13139
    else:
13140
      raise errors.ProgrammerError("Uncatched mode %s in"
13141
                                   " LUTestAllocator.Exec", self.op.mode)
13142

    
13143
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13144
      result = ial.in_text
13145
    else:
13146
      ial.Run(self.op.allocator, validate=False)
13147
      result = ial.out_text
13148
    return result
13149

    
13150

    
13151
#: Query type implementations
13152
_QUERY_IMPL = {
13153
  constants.QR_INSTANCE: _InstanceQuery,
13154
  constants.QR_NODE: _NodeQuery,
13155
  constants.QR_GROUP: _GroupQuery,
13156
  constants.QR_OS: _OsQuery,
13157
  }
13158

    
13159
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13160

    
13161

    
13162
def _GetQueryImplementation(name):
13163
  """Returns the implemtnation for a query type.
13164

13165
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13166

13167
  """
13168
  try:
13169
    return _QUERY_IMPL[name]
13170
  except KeyError:
13171
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13172
                               errors.ECODE_INVAL)