Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 3b61ee44

History | View | Annotate | Download (460.2 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable-msg=W0611
64

    
65

    
66
def _SupportsOob(cfg, node):
67
  """Tells if node supports OOB.
68

69
  @type cfg: L{config.ConfigWriter}
70
  @param cfg: The cluster configuration
71
  @type node: L{objects.Node}
72
  @param node: The node
73
  @return: The OOB script if supported or an empty string otherwise
74

75
  """
76
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
77

    
78

    
79
class ResultWithJobs:
80
  """Data container for LU results with jobs.
81

82
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
83
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
84
  contained in the C{jobs} attribute and include the job IDs in the opcode
85
  result.
86

87
  """
88
  def __init__(self, jobs, **kwargs):
89
    """Initializes this class.
90

91
    Additional return values can be specified as keyword arguments.
92

93
    @type jobs: list of lists of L{opcode.OpCode}
94
    @param jobs: A list of lists of opcode objects
95

96
    """
97
    self.jobs = jobs
98
    self.other = kwargs
99

    
100

    
101
class LogicalUnit(object):
102
  """Logical Unit base class.
103

104
  Subclasses must follow these rules:
105
    - implement ExpandNames
106
    - implement CheckPrereq (except when tasklets are used)
107
    - implement Exec (except when tasklets are used)
108
    - implement BuildHooksEnv
109
    - implement BuildHooksNodes
110
    - redefine HPATH and HTYPE
111
    - optionally redefine their run requirements:
112
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
113

114
  Note that all commands require root permissions.
115

116
  @ivar dry_run_result: the value (if any) that will be returned to the caller
117
      in dry-run mode (signalled by opcode dry_run parameter)
118

119
  """
120
  HPATH = None
121
  HTYPE = None
122
  REQ_BGL = True
123

    
124
  def __init__(self, processor, op, context, rpc):
125
    """Constructor for LogicalUnit.
126

127
    This needs to be overridden in derived classes in order to check op
128
    validity.
129

130
    """
131
    self.proc = processor
132
    self.op = op
133
    self.cfg = context.cfg
134
    self.glm = context.glm
135
    self.context = context
136
    self.rpc = rpc
137
    # Dicts used to declare locking needs to mcpu
138
    self.needed_locks = None
139
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
140
    self.add_locks = {}
141
    self.remove_locks = {}
142
    # Used to force good behavior when calling helper functions
143
    self.recalculate_locks = {}
144
    # logging
145
    self.Log = processor.Log # pylint: disable-msg=C0103
146
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
147
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
148
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
149
    # support for dry-run
150
    self.dry_run_result = None
151
    # support for generic debug attribute
152
    if (not hasattr(self.op, "debug_level") or
153
        not isinstance(self.op.debug_level, int)):
154
      self.op.debug_level = 0
155

    
156
    # Tasklets
157
    self.tasklets = None
158

    
159
    # Validate opcode parameters and set defaults
160
    self.op.Validate(True)
161

    
162
    self.CheckArguments()
163

    
164
  def CheckArguments(self):
165
    """Check syntactic validity for the opcode arguments.
166

167
    This method is for doing a simple syntactic check and ensure
168
    validity of opcode parameters, without any cluster-related
169
    checks. While the same can be accomplished in ExpandNames and/or
170
    CheckPrereq, doing these separate is better because:
171

172
      - ExpandNames is left as as purely a lock-related function
173
      - CheckPrereq is run after we have acquired locks (and possible
174
        waited for them)
175

176
    The function is allowed to change the self.op attribute so that
177
    later methods can no longer worry about missing parameters.
178

179
    """
180
    pass
181

    
182
  def ExpandNames(self):
183
    """Expand names for this LU.
184

185
    This method is called before starting to execute the opcode, and it should
186
    update all the parameters of the opcode to their canonical form (e.g. a
187
    short node name must be fully expanded after this method has successfully
188
    completed). This way locking, hooks, logging, etc. can work correctly.
189

190
    LUs which implement this method must also populate the self.needed_locks
191
    member, as a dict with lock levels as keys, and a list of needed lock names
192
    as values. Rules:
193

194
      - use an empty dict if you don't need any lock
195
      - if you don't need any lock at a particular level omit that level
196
      - don't put anything for the BGL level
197
      - if you want all locks at a level use locking.ALL_SET as a value
198

199
    If you need to share locks (rather than acquire them exclusively) at one
200
    level you can modify self.share_locks, setting a true value (usually 1) for
201
    that level. By default locks are not shared.
202

203
    This function can also define a list of tasklets, which then will be
204
    executed in order instead of the usual LU-level CheckPrereq and Exec
205
    functions, if those are not defined by the LU.
206

207
    Examples::
208

209
      # Acquire all nodes and one instance
210
      self.needed_locks = {
211
        locking.LEVEL_NODE: locking.ALL_SET,
212
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
213
      }
214
      # Acquire just two nodes
215
      self.needed_locks = {
216
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
217
      }
218
      # Acquire no locks
219
      self.needed_locks = {} # No, you can't leave it to the default value None
220

221
    """
222
    # The implementation of this method is mandatory only if the new LU is
223
    # concurrent, so that old LUs don't need to be changed all at the same
224
    # time.
225
    if self.REQ_BGL:
226
      self.needed_locks = {} # Exclusive LUs don't need locks.
227
    else:
228
      raise NotImplementedError
229

    
230
  def DeclareLocks(self, level):
231
    """Declare LU locking needs for a level
232

233
    While most LUs can just declare their locking needs at ExpandNames time,
234
    sometimes there's the need to calculate some locks after having acquired
235
    the ones before. This function is called just before acquiring locks at a
236
    particular level, but after acquiring the ones at lower levels, and permits
237
    such calculations. It can be used to modify self.needed_locks, and by
238
    default it does nothing.
239

240
    This function is only called if you have something already set in
241
    self.needed_locks for the level.
242

243
    @param level: Locking level which is going to be locked
244
    @type level: member of ganeti.locking.LEVELS
245

246
    """
247

    
248
  def CheckPrereq(self):
249
    """Check prerequisites for this LU.
250

251
    This method should check that the prerequisites for the execution
252
    of this LU are fulfilled. It can do internode communication, but
253
    it should be idempotent - no cluster or system changes are
254
    allowed.
255

256
    The method should raise errors.OpPrereqError in case something is
257
    not fulfilled. Its return value is ignored.
258

259
    This method should also update all the parameters of the opcode to
260
    their canonical form if it hasn't been done by ExpandNames before.
261

262
    """
263
    if self.tasklets is not None:
264
      for (idx, tl) in enumerate(self.tasklets):
265
        logging.debug("Checking prerequisites for tasklet %s/%s",
266
                      idx + 1, len(self.tasklets))
267
        tl.CheckPrereq()
268
    else:
269
      pass
270

    
271
  def Exec(self, feedback_fn):
272
    """Execute the LU.
273

274
    This method should implement the actual work. It should raise
275
    errors.OpExecError for failures that are somewhat dealt with in
276
    code, or expected.
277

278
    """
279
    if self.tasklets is not None:
280
      for (idx, tl) in enumerate(self.tasklets):
281
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
282
        tl.Exec(feedback_fn)
283
    else:
284
      raise NotImplementedError
285

    
286
  def BuildHooksEnv(self):
287
    """Build hooks environment for this LU.
288

289
    @rtype: dict
290
    @return: Dictionary containing the environment that will be used for
291
      running the hooks for this LU. The keys of the dict must not be prefixed
292
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
293
      will extend the environment with additional variables. If no environment
294
      should be defined, an empty dictionary should be returned (not C{None}).
295
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
296
      will not be called.
297

298
    """
299
    raise NotImplementedError
300

    
301
  def BuildHooksNodes(self):
302
    """Build list of nodes to run LU's hooks.
303

304
    @rtype: tuple; (list, list)
305
    @return: Tuple containing a list of node names on which the hook
306
      should run before the execution and a list of node names on which the
307
      hook should run after the execution. No nodes should be returned as an
308
      empty list (and not None).
309
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
310
      will not be called.
311

312
    """
313
    raise NotImplementedError
314

    
315
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
316
    """Notify the LU about the results of its hooks.
317

318
    This method is called every time a hooks phase is executed, and notifies
319
    the Logical Unit about the hooks' result. The LU can then use it to alter
320
    its result based on the hooks.  By default the method does nothing and the
321
    previous result is passed back unchanged but any LU can define it if it
322
    wants to use the local cluster hook-scripts somehow.
323

324
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
325
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
326
    @param hook_results: the results of the multi-node hooks rpc call
327
    @param feedback_fn: function used send feedback back to the caller
328
    @param lu_result: the previous Exec result this LU had, or None
329
        in the PRE phase
330
    @return: the new Exec result, based on the previous result
331
        and hook results
332

333
    """
334
    # API must be kept, thus we ignore the unused argument and could
335
    # be a function warnings
336
    # pylint: disable-msg=W0613,R0201
337
    return lu_result
338

    
339
  def _ExpandAndLockInstance(self):
340
    """Helper function to expand and lock an instance.
341

342
    Many LUs that work on an instance take its name in self.op.instance_name
343
    and need to expand it and then declare the expanded name for locking. This
344
    function does it, and then updates self.op.instance_name to the expanded
345
    name. It also initializes needed_locks as a dict, if this hasn't been done
346
    before.
347

348
    """
349
    if self.needed_locks is None:
350
      self.needed_locks = {}
351
    else:
352
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
353
        "_ExpandAndLockInstance called with instance-level locks set"
354
    self.op.instance_name = _ExpandInstanceName(self.cfg,
355
                                                self.op.instance_name)
356
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
357

    
358
  def _LockInstancesNodes(self, primary_only=False):
359
    """Helper function to declare instances' nodes for locking.
360

361
    This function should be called after locking one or more instances to lock
362
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
363
    with all primary or secondary nodes for instances already locked and
364
    present in self.needed_locks[locking.LEVEL_INSTANCE].
365

366
    It should be called from DeclareLocks, and for safety only works if
367
    self.recalculate_locks[locking.LEVEL_NODE] is set.
368

369
    In the future it may grow parameters to just lock some instance's nodes, or
370
    to just lock primaries or secondary nodes, if needed.
371

372
    If should be called in DeclareLocks in a way similar to::
373

374
      if level == locking.LEVEL_NODE:
375
        self._LockInstancesNodes()
376

377
    @type primary_only: boolean
378
    @param primary_only: only lock primary nodes of locked instances
379

380
    """
381
    assert locking.LEVEL_NODE in self.recalculate_locks, \
382
      "_LockInstancesNodes helper function called with no nodes to recalculate"
383

    
384
    # TODO: check if we're really been called with the instance locks held
385

    
386
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
387
    # future we might want to have different behaviors depending on the value
388
    # of self.recalculate_locks[locking.LEVEL_NODE]
389
    wanted_nodes = []
390
    for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
391
      instance = self.context.cfg.GetInstanceInfo(instance_name)
392
      wanted_nodes.append(instance.primary_node)
393
      if not primary_only:
394
        wanted_nodes.extend(instance.secondary_nodes)
395

    
396
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
397
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
398
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
399
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
400

    
401
    del self.recalculate_locks[locking.LEVEL_NODE]
402

    
403

    
404
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
405
  """Simple LU which runs no hooks.
406

407
  This LU is intended as a parent for other LogicalUnits which will
408
  run no hooks, in order to reduce duplicate code.
409

410
  """
411
  HPATH = None
412
  HTYPE = None
413

    
414
  def BuildHooksEnv(self):
415
    """Empty BuildHooksEnv for NoHooksLu.
416

417
    This just raises an error.
418

419
    """
420
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
421

    
422
  def BuildHooksNodes(self):
423
    """Empty BuildHooksNodes for NoHooksLU.
424

425
    """
426
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
427

    
428

    
429
class Tasklet:
430
  """Tasklet base class.
431

432
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
433
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
434
  tasklets know nothing about locks.
435

436
  Subclasses must follow these rules:
437
    - Implement CheckPrereq
438
    - Implement Exec
439

440
  """
441
  def __init__(self, lu):
442
    self.lu = lu
443

    
444
    # Shortcuts
445
    self.cfg = lu.cfg
446
    self.rpc = lu.rpc
447

    
448
  def CheckPrereq(self):
449
    """Check prerequisites for this tasklets.
450

451
    This method should check whether the prerequisites for the execution of
452
    this tasklet are fulfilled. It can do internode communication, but it
453
    should be idempotent - no cluster or system changes are allowed.
454

455
    The method should raise errors.OpPrereqError in case something is not
456
    fulfilled. Its return value is ignored.
457

458
    This method should also update all parameters to their canonical form if it
459
    hasn't been done before.
460

461
    """
462
    pass
463

    
464
  def Exec(self, feedback_fn):
465
    """Execute the tasklet.
466

467
    This method should implement the actual work. It should raise
468
    errors.OpExecError for failures that are somewhat dealt with in code, or
469
    expected.
470

471
    """
472
    raise NotImplementedError
473

    
474

    
475
class _QueryBase:
476
  """Base for query utility classes.
477

478
  """
479
  #: Attribute holding field definitions
480
  FIELDS = None
481

    
482
  def __init__(self, filter_, fields, use_locking):
483
    """Initializes this class.
484

485
    """
486
    self.use_locking = use_locking
487

    
488
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
489
                             namefield="name")
490
    self.requested_data = self.query.RequestedData()
491
    self.names = self.query.RequestedNames()
492

    
493
    # Sort only if no names were requested
494
    self.sort_by_name = not self.names
495

    
496
    self.do_locking = None
497
    self.wanted = None
498

    
499
  def _GetNames(self, lu, all_names, lock_level):
500
    """Helper function to determine names asked for in the query.
501

502
    """
503
    if self.do_locking:
504
      names = lu.glm.list_owned(lock_level)
505
    else:
506
      names = all_names
507

    
508
    if self.wanted == locking.ALL_SET:
509
      assert not self.names
510
      # caller didn't specify names, so ordering is not important
511
      return utils.NiceSort(names)
512

    
513
    # caller specified names and we must keep the same order
514
    assert self.names
515
    assert not self.do_locking or lu.glm.is_owned(lock_level)
516

    
517
    missing = set(self.wanted).difference(names)
518
    if missing:
519
      raise errors.OpExecError("Some items were removed before retrieving"
520
                               " their data: %s" % missing)
521

    
522
    # Return expanded names
523
    return self.wanted
524

    
525
  def ExpandNames(self, lu):
526
    """Expand names for this query.
527

528
    See L{LogicalUnit.ExpandNames}.
529

530
    """
531
    raise NotImplementedError()
532

    
533
  def DeclareLocks(self, lu, level):
534
    """Declare locks for this query.
535

536
    See L{LogicalUnit.DeclareLocks}.
537

538
    """
539
    raise NotImplementedError()
540

    
541
  def _GetQueryData(self, lu):
542
    """Collects all data for this query.
543

544
    @return: Query data object
545

546
    """
547
    raise NotImplementedError()
548

    
549
  def NewStyleQuery(self, lu):
550
    """Collect data and execute query.
551

552
    """
553
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
554
                                  sort_by_name=self.sort_by_name)
555

    
556
  def OldStyleQuery(self, lu):
557
    """Collect data and execute query.
558

559
    """
560
    return self.query.OldStyleQuery(self._GetQueryData(lu),
561
                                    sort_by_name=self.sort_by_name)
562

    
563

    
564
def _GetWantedNodes(lu, nodes):
565
  """Returns list of checked and expanded node names.
566

567
  @type lu: L{LogicalUnit}
568
  @param lu: the logical unit on whose behalf we execute
569
  @type nodes: list
570
  @param nodes: list of node names or None for all nodes
571
  @rtype: list
572
  @return: the list of nodes, sorted
573
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
574

575
  """
576
  if nodes:
577
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
578

    
579
  return utils.NiceSort(lu.cfg.GetNodeList())
580

    
581

    
582
def _GetWantedInstances(lu, instances):
583
  """Returns list of checked and expanded instance names.
584

585
  @type lu: L{LogicalUnit}
586
  @param lu: the logical unit on whose behalf we execute
587
  @type instances: list
588
  @param instances: list of instance names or None for all instances
589
  @rtype: list
590
  @return: the list of instances, sorted
591
  @raise errors.OpPrereqError: if the instances parameter is wrong type
592
  @raise errors.OpPrereqError: if any of the passed instances is not found
593

594
  """
595
  if instances:
596
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
597
  else:
598
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
599
  return wanted
600

    
601

    
602
def _GetUpdatedParams(old_params, update_dict,
603
                      use_default=True, use_none=False):
604
  """Return the new version of a parameter dictionary.
605

606
  @type old_params: dict
607
  @param old_params: old parameters
608
  @type update_dict: dict
609
  @param update_dict: dict containing new parameter values, or
610
      constants.VALUE_DEFAULT to reset the parameter to its default
611
      value
612
  @param use_default: boolean
613
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
614
      values as 'to be deleted' values
615
  @param use_none: boolean
616
  @type use_none: whether to recognise C{None} values as 'to be
617
      deleted' values
618
  @rtype: dict
619
  @return: the new parameter dictionary
620

621
  """
622
  params_copy = copy.deepcopy(old_params)
623
  for key, val in update_dict.iteritems():
624
    if ((use_default and val == constants.VALUE_DEFAULT) or
625
        (use_none and val is None)):
626
      try:
627
        del params_copy[key]
628
      except KeyError:
629
        pass
630
    else:
631
      params_copy[key] = val
632
  return params_copy
633

    
634

    
635
def _ReleaseLocks(lu, level, names=None, keep=None):
636
  """Releases locks owned by an LU.
637

638
  @type lu: L{LogicalUnit}
639
  @param level: Lock level
640
  @type names: list or None
641
  @param names: Names of locks to release
642
  @type keep: list or None
643
  @param keep: Names of locks to retain
644

645
  """
646
  assert not (keep is not None and names is not None), \
647
         "Only one of the 'names' and the 'keep' parameters can be given"
648

    
649
  if names is not None:
650
    should_release = names.__contains__
651
  elif keep:
652
    should_release = lambda name: name not in keep
653
  else:
654
    should_release = None
655

    
656
  if should_release:
657
    retain = []
658
    release = []
659

    
660
    # Determine which locks to release
661
    for name in lu.glm.list_owned(level):
662
      if should_release(name):
663
        release.append(name)
664
      else:
665
        retain.append(name)
666

    
667
    assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
668

    
669
    # Release just some locks
670
    lu.glm.release(level, names=release)
671

    
672
    assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
673
  else:
674
    # Release everything
675
    lu.glm.release(level)
676

    
677
    assert not lu.glm.is_owned(level), "No locks should be owned"
678

    
679

    
680
def _RunPostHook(lu, node_name):
681
  """Runs the post-hook for an opcode on a single node.
682

683
  """
684
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
685
  try:
686
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
687
  except:
688
    # pylint: disable-msg=W0702
689
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
690

    
691

    
692
def _CheckOutputFields(static, dynamic, selected):
693
  """Checks whether all selected fields are valid.
694

695
  @type static: L{utils.FieldSet}
696
  @param static: static fields set
697
  @type dynamic: L{utils.FieldSet}
698
  @param dynamic: dynamic fields set
699

700
  """
701
  f = utils.FieldSet()
702
  f.Extend(static)
703
  f.Extend(dynamic)
704

    
705
  delta = f.NonMatching(selected)
706
  if delta:
707
    raise errors.OpPrereqError("Unknown output fields selected: %s"
708
                               % ",".join(delta), errors.ECODE_INVAL)
709

    
710

    
711
def _CheckGlobalHvParams(params):
712
  """Validates that given hypervisor params are not global ones.
713

714
  This will ensure that instances don't get customised versions of
715
  global params.
716

717
  """
718
  used_globals = constants.HVC_GLOBALS.intersection(params)
719
  if used_globals:
720
    msg = ("The following hypervisor parameters are global and cannot"
721
           " be customized at instance level, please modify them at"
722
           " cluster level: %s" % utils.CommaJoin(used_globals))
723
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
724

    
725

    
726
def _CheckNodeOnline(lu, node, msg=None):
727
  """Ensure that a given node is online.
728

729
  @param lu: the LU on behalf of which we make the check
730
  @param node: the node to check
731
  @param msg: if passed, should be a message to replace the default one
732
  @raise errors.OpPrereqError: if the node is offline
733

734
  """
735
  if msg is None:
736
    msg = "Can't use offline node"
737
  if lu.cfg.GetNodeInfo(node).offline:
738
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
739

    
740

    
741
def _CheckNodeNotDrained(lu, node):
742
  """Ensure that a given node is not drained.
743

744
  @param lu: the LU on behalf of which we make the check
745
  @param node: the node to check
746
  @raise errors.OpPrereqError: if the node is drained
747

748
  """
749
  if lu.cfg.GetNodeInfo(node).drained:
750
    raise errors.OpPrereqError("Can't use drained node %s" % node,
751
                               errors.ECODE_STATE)
752

    
753

    
754
def _CheckNodeVmCapable(lu, node):
755
  """Ensure that a given node is vm capable.
756

757
  @param lu: the LU on behalf of which we make the check
758
  @param node: the node to check
759
  @raise errors.OpPrereqError: if the node is not vm capable
760

761
  """
762
  if not lu.cfg.GetNodeInfo(node).vm_capable:
763
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
764
                               errors.ECODE_STATE)
765

    
766

    
767
def _CheckNodeHasOS(lu, node, os_name, force_variant):
768
  """Ensure that a node supports a given OS.
769

770
  @param lu: the LU on behalf of which we make the check
771
  @param node: the node to check
772
  @param os_name: the OS to query about
773
  @param force_variant: whether to ignore variant errors
774
  @raise errors.OpPrereqError: if the node is not supporting the OS
775

776
  """
777
  result = lu.rpc.call_os_get(node, os_name)
778
  result.Raise("OS '%s' not in supported OS list for node %s" %
779
               (os_name, node),
780
               prereq=True, ecode=errors.ECODE_INVAL)
781
  if not force_variant:
782
    _CheckOSVariant(result.payload, os_name)
783

    
784

    
785
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
786
  """Ensure that a node has the given secondary ip.
787

788
  @type lu: L{LogicalUnit}
789
  @param lu: the LU on behalf of which we make the check
790
  @type node: string
791
  @param node: the node to check
792
  @type secondary_ip: string
793
  @param secondary_ip: the ip to check
794
  @type prereq: boolean
795
  @param prereq: whether to throw a prerequisite or an execute error
796
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
797
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
798

799
  """
800
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
801
  result.Raise("Failure checking secondary ip on node %s" % node,
802
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
803
  if not result.payload:
804
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
805
           " please fix and re-run this command" % secondary_ip)
806
    if prereq:
807
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
808
    else:
809
      raise errors.OpExecError(msg)
810

    
811

    
812
def _GetClusterDomainSecret():
813
  """Reads the cluster domain secret.
814

815
  """
816
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
817
                               strict=True)
818

    
819

    
820
def _CheckInstanceDown(lu, instance, reason):
821
  """Ensure that an instance is not running."""
822
  if instance.admin_up:
823
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
824
                               (instance.name, reason), errors.ECODE_STATE)
825

    
826
  pnode = instance.primary_node
827
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
828
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
829
              prereq=True, ecode=errors.ECODE_ENVIRON)
830

    
831
  if instance.name in ins_l.payload:
832
    raise errors.OpPrereqError("Instance %s is running, %s" %
833
                               (instance.name, reason), errors.ECODE_STATE)
834

    
835

    
836
def _ExpandItemName(fn, name, kind):
837
  """Expand an item name.
838

839
  @param fn: the function to use for expansion
840
  @param name: requested item name
841
  @param kind: text description ('Node' or 'Instance')
842
  @return: the resolved (full) name
843
  @raise errors.OpPrereqError: if the item is not found
844

845
  """
846
  full_name = fn(name)
847
  if full_name is None:
848
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
849
                               errors.ECODE_NOENT)
850
  return full_name
851

    
852

    
853
def _ExpandNodeName(cfg, name):
854
  """Wrapper over L{_ExpandItemName} for nodes."""
855
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
856

    
857

    
858
def _ExpandInstanceName(cfg, name):
859
  """Wrapper over L{_ExpandItemName} for instance."""
860
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
861

    
862

    
863
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
864
                          memory, vcpus, nics, disk_template, disks,
865
                          bep, hvp, hypervisor_name, tags):
866
  """Builds instance related env variables for hooks
867

868
  This builds the hook environment from individual variables.
869

870
  @type name: string
871
  @param name: the name of the instance
872
  @type primary_node: string
873
  @param primary_node: the name of the instance's primary node
874
  @type secondary_nodes: list
875
  @param secondary_nodes: list of secondary nodes as strings
876
  @type os_type: string
877
  @param os_type: the name of the instance's OS
878
  @type status: boolean
879
  @param status: the should_run status of the instance
880
  @type memory: string
881
  @param memory: the memory size of the instance
882
  @type vcpus: string
883
  @param vcpus: the count of VCPUs the instance has
884
  @type nics: list
885
  @param nics: list of tuples (ip, mac, mode, link) representing
886
      the NICs the instance has
887
  @type disk_template: string
888
  @param disk_template: the disk template of the instance
889
  @type disks: list
890
  @param disks: the list of (size, mode) pairs
891
  @type bep: dict
892
  @param bep: the backend parameters for the instance
893
  @type hvp: dict
894
  @param hvp: the hypervisor parameters for the instance
895
  @type hypervisor_name: string
896
  @param hypervisor_name: the hypervisor for the instance
897
  @type tags: list
898
  @param tags: list of instance tags as strings
899
  @rtype: dict
900
  @return: the hook environment for this instance
901

902
  """
903
  if status:
904
    str_status = "up"
905
  else:
906
    str_status = "down"
907
  env = {
908
    "OP_TARGET": name,
909
    "INSTANCE_NAME": name,
910
    "INSTANCE_PRIMARY": primary_node,
911
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
912
    "INSTANCE_OS_TYPE": os_type,
913
    "INSTANCE_STATUS": str_status,
914
    "INSTANCE_MEMORY": memory,
915
    "INSTANCE_VCPUS": vcpus,
916
    "INSTANCE_DISK_TEMPLATE": disk_template,
917
    "INSTANCE_HYPERVISOR": hypervisor_name,
918
  }
919

    
920
  if nics:
921
    nic_count = len(nics)
922
    for idx, (ip, mac, mode, link) in enumerate(nics):
923
      if ip is None:
924
        ip = ""
925
      env["INSTANCE_NIC%d_IP" % idx] = ip
926
      env["INSTANCE_NIC%d_MAC" % idx] = mac
927
      env["INSTANCE_NIC%d_MODE" % idx] = mode
928
      env["INSTANCE_NIC%d_LINK" % idx] = link
929
      if mode == constants.NIC_MODE_BRIDGED:
930
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
931
  else:
932
    nic_count = 0
933

    
934
  env["INSTANCE_NIC_COUNT"] = nic_count
935

    
936
  if disks:
937
    disk_count = len(disks)
938
    for idx, (size, mode) in enumerate(disks):
939
      env["INSTANCE_DISK%d_SIZE" % idx] = size
940
      env["INSTANCE_DISK%d_MODE" % idx] = mode
941
  else:
942
    disk_count = 0
943

    
944
  env["INSTANCE_DISK_COUNT"] = disk_count
945

    
946
  if not tags:
947
    tags = []
948

    
949
  env["INSTANCE_TAGS"] = " ".join(tags)
950

    
951
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
952
    for key, value in source.items():
953
      env["INSTANCE_%s_%s" % (kind, key)] = value
954

    
955
  return env
956

    
957

    
958
def _NICListToTuple(lu, nics):
959
  """Build a list of nic information tuples.
960

961
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
962
  value in LUInstanceQueryData.
963

964
  @type lu:  L{LogicalUnit}
965
  @param lu: the logical unit on whose behalf we execute
966
  @type nics: list of L{objects.NIC}
967
  @param nics: list of nics to convert to hooks tuples
968

969
  """
970
  hooks_nics = []
971
  cluster = lu.cfg.GetClusterInfo()
972
  for nic in nics:
973
    ip = nic.ip
974
    mac = nic.mac
975
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
976
    mode = filled_params[constants.NIC_MODE]
977
    link = filled_params[constants.NIC_LINK]
978
    hooks_nics.append((ip, mac, mode, link))
979
  return hooks_nics
980

    
981

    
982
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
983
  """Builds instance related env variables for hooks from an object.
984

985
  @type lu: L{LogicalUnit}
986
  @param lu: the logical unit on whose behalf we execute
987
  @type instance: L{objects.Instance}
988
  @param instance: the instance for which we should build the
989
      environment
990
  @type override: dict
991
  @param override: dictionary with key/values that will override
992
      our values
993
  @rtype: dict
994
  @return: the hook environment dictionary
995

996
  """
997
  cluster = lu.cfg.GetClusterInfo()
998
  bep = cluster.FillBE(instance)
999
  hvp = cluster.FillHV(instance)
1000
  args = {
1001
    'name': instance.name,
1002
    'primary_node': instance.primary_node,
1003
    'secondary_nodes': instance.secondary_nodes,
1004
    'os_type': instance.os,
1005
    'status': instance.admin_up,
1006
    'memory': bep[constants.BE_MEMORY],
1007
    'vcpus': bep[constants.BE_VCPUS],
1008
    'nics': _NICListToTuple(lu, instance.nics),
1009
    'disk_template': instance.disk_template,
1010
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
1011
    'bep': bep,
1012
    'hvp': hvp,
1013
    'hypervisor_name': instance.hypervisor,
1014
    'tags': instance.tags,
1015
  }
1016
  if override:
1017
    args.update(override)
1018
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1019

    
1020

    
1021
def _AdjustCandidatePool(lu, exceptions):
1022
  """Adjust the candidate pool after node operations.
1023

1024
  """
1025
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1026
  if mod_list:
1027
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1028
               utils.CommaJoin(node.name for node in mod_list))
1029
    for name in mod_list:
1030
      lu.context.ReaddNode(name)
1031
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1032
  if mc_now > mc_max:
1033
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1034
               (mc_now, mc_max))
1035

    
1036

    
1037
def _DecideSelfPromotion(lu, exceptions=None):
1038
  """Decide whether I should promote myself as a master candidate.
1039

1040
  """
1041
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1042
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1043
  # the new node will increase mc_max with one, so:
1044
  mc_should = min(mc_should + 1, cp_size)
1045
  return mc_now < mc_should
1046

    
1047

    
1048
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1049
  """Check that the brigdes needed by a list of nics exist.
1050

1051
  """
1052
  cluster = lu.cfg.GetClusterInfo()
1053
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1054
  brlist = [params[constants.NIC_LINK] for params in paramslist
1055
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1056
  if brlist:
1057
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1058
    result.Raise("Error checking bridges on destination node '%s'" %
1059
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1060

    
1061

    
1062
def _CheckInstanceBridgesExist(lu, instance, node=None):
1063
  """Check that the brigdes needed by an instance exist.
1064

1065
  """
1066
  if node is None:
1067
    node = instance.primary_node
1068
  _CheckNicsBridgesExist(lu, instance.nics, node)
1069

    
1070

    
1071
def _CheckOSVariant(os_obj, name):
1072
  """Check whether an OS name conforms to the os variants specification.
1073

1074
  @type os_obj: L{objects.OS}
1075
  @param os_obj: OS object to check
1076
  @type name: string
1077
  @param name: OS name passed by the user, to check for validity
1078

1079
  """
1080
  if not os_obj.supported_variants:
1081
    return
1082
  variant = objects.OS.GetVariant(name)
1083
  if not variant:
1084
    raise errors.OpPrereqError("OS name must include a variant",
1085
                               errors.ECODE_INVAL)
1086

    
1087
  if variant not in os_obj.supported_variants:
1088
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1089

    
1090

    
1091
def _GetNodeInstancesInner(cfg, fn):
1092
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1093

    
1094

    
1095
def _GetNodeInstances(cfg, node_name):
1096
  """Returns a list of all primary and secondary instances on a node.
1097

1098
  """
1099

    
1100
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1101

    
1102

    
1103
def _GetNodePrimaryInstances(cfg, node_name):
1104
  """Returns primary instances on a node.
1105

1106
  """
1107
  return _GetNodeInstancesInner(cfg,
1108
                                lambda inst: node_name == inst.primary_node)
1109

    
1110

    
1111
def _GetNodeSecondaryInstances(cfg, node_name):
1112
  """Returns secondary instances on a node.
1113

1114
  """
1115
  return _GetNodeInstancesInner(cfg,
1116
                                lambda inst: node_name in inst.secondary_nodes)
1117

    
1118

    
1119
def _GetStorageTypeArgs(cfg, storage_type):
1120
  """Returns the arguments for a storage type.
1121

1122
  """
1123
  # Special case for file storage
1124
  if storage_type == constants.ST_FILE:
1125
    # storage.FileStorage wants a list of storage directories
1126
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1127

    
1128
  return []
1129

    
1130

    
1131
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1132
  faulty = []
1133

    
1134
  for dev in instance.disks:
1135
    cfg.SetDiskID(dev, node_name)
1136

    
1137
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1138
  result.Raise("Failed to get disk status from node %s" % node_name,
1139
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1140

    
1141
  for idx, bdev_status in enumerate(result.payload):
1142
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1143
      faulty.append(idx)
1144

    
1145
  return faulty
1146

    
1147

    
1148
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1149
  """Check the sanity of iallocator and node arguments and use the
1150
  cluster-wide iallocator if appropriate.
1151

1152
  Check that at most one of (iallocator, node) is specified. If none is
1153
  specified, then the LU's opcode's iallocator slot is filled with the
1154
  cluster-wide default iallocator.
1155

1156
  @type iallocator_slot: string
1157
  @param iallocator_slot: the name of the opcode iallocator slot
1158
  @type node_slot: string
1159
  @param node_slot: the name of the opcode target node slot
1160

1161
  """
1162
  node = getattr(lu.op, node_slot, None)
1163
  iallocator = getattr(lu.op, iallocator_slot, None)
1164

    
1165
  if node is not None and iallocator is not None:
1166
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1167
                               errors.ECODE_INVAL)
1168
  elif node is None and iallocator is None:
1169
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1170
    if default_iallocator:
1171
      setattr(lu.op, iallocator_slot, default_iallocator)
1172
    else:
1173
      raise errors.OpPrereqError("No iallocator or node given and no"
1174
                                 " cluster-wide default iallocator found;"
1175
                                 " please specify either an iallocator or a"
1176
                                 " node, or set a cluster-wide default"
1177
                                 " iallocator")
1178

    
1179

    
1180
class LUClusterPostInit(LogicalUnit):
1181
  """Logical unit for running hooks after cluster initialization.
1182

1183
  """
1184
  HPATH = "cluster-init"
1185
  HTYPE = constants.HTYPE_CLUSTER
1186

    
1187
  def BuildHooksEnv(self):
1188
    """Build hooks env.
1189

1190
    """
1191
    return {
1192
      "OP_TARGET": self.cfg.GetClusterName(),
1193
      }
1194

    
1195
  def BuildHooksNodes(self):
1196
    """Build hooks nodes.
1197

1198
    """
1199
    return ([], [self.cfg.GetMasterNode()])
1200

    
1201
  def Exec(self, feedback_fn):
1202
    """Nothing to do.
1203

1204
    """
1205
    return True
1206

    
1207

    
1208
class LUClusterDestroy(LogicalUnit):
1209
  """Logical unit for destroying the cluster.
1210

1211
  """
1212
  HPATH = "cluster-destroy"
1213
  HTYPE = constants.HTYPE_CLUSTER
1214

    
1215
  def BuildHooksEnv(self):
1216
    """Build hooks env.
1217

1218
    """
1219
    return {
1220
      "OP_TARGET": self.cfg.GetClusterName(),
1221
      }
1222

    
1223
  def BuildHooksNodes(self):
1224
    """Build hooks nodes.
1225

1226
    """
1227
    return ([], [])
1228

    
1229
  def CheckPrereq(self):
1230
    """Check prerequisites.
1231

1232
    This checks whether the cluster is empty.
1233

1234
    Any errors are signaled by raising errors.OpPrereqError.
1235

1236
    """
1237
    master = self.cfg.GetMasterNode()
1238

    
1239
    nodelist = self.cfg.GetNodeList()
1240
    if len(nodelist) != 1 or nodelist[0] != master:
1241
      raise errors.OpPrereqError("There are still %d node(s) in"
1242
                                 " this cluster." % (len(nodelist) - 1),
1243
                                 errors.ECODE_INVAL)
1244
    instancelist = self.cfg.GetInstanceList()
1245
    if instancelist:
1246
      raise errors.OpPrereqError("There are still %d instance(s) in"
1247
                                 " this cluster." % len(instancelist),
1248
                                 errors.ECODE_INVAL)
1249

    
1250
  def Exec(self, feedback_fn):
1251
    """Destroys the cluster.
1252

1253
    """
1254
    master = self.cfg.GetMasterNode()
1255

    
1256
    # Run post hooks on master node before it's removed
1257
    _RunPostHook(self, master)
1258

    
1259
    result = self.rpc.call_node_stop_master(master, False)
1260
    result.Raise("Could not disable the master role")
1261

    
1262
    return master
1263

    
1264

    
1265
def _VerifyCertificate(filename):
1266
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1267

1268
  @type filename: string
1269
  @param filename: Path to PEM file
1270

1271
  """
1272
  try:
1273
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1274
                                           utils.ReadFile(filename))
1275
  except Exception, err: # pylint: disable-msg=W0703
1276
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1277
            "Failed to load X509 certificate %s: %s" % (filename, err))
1278

    
1279
  (errcode, msg) = \
1280
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1281
                                constants.SSL_CERT_EXPIRATION_ERROR)
1282

    
1283
  if msg:
1284
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1285
  else:
1286
    fnamemsg = None
1287

    
1288
  if errcode is None:
1289
    return (None, fnamemsg)
1290
  elif errcode == utils.CERT_WARNING:
1291
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1292
  elif errcode == utils.CERT_ERROR:
1293
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1294

    
1295
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1296

    
1297

    
1298
def _GetAllHypervisorParameters(cluster, instances):
1299
  """Compute the set of all hypervisor parameters.
1300

1301
  @type cluster: L{objects.Cluster}
1302
  @param cluster: the cluster object
1303
  @param instances: list of L{objects.Instance}
1304
  @param instances: additional instances from which to obtain parameters
1305
  @rtype: list of (origin, hypervisor, parameters)
1306
  @return: a list with all parameters found, indicating the hypervisor they
1307
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1308

1309
  """
1310
  hvp_data = []
1311

    
1312
  for hv_name in cluster.enabled_hypervisors:
1313
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1314

    
1315
  for os_name, os_hvp in cluster.os_hvp.items():
1316
    for hv_name, hv_params in os_hvp.items():
1317
      if hv_params:
1318
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1319
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1320

    
1321
  # TODO: collapse identical parameter values in a single one
1322
  for instance in instances:
1323
    if instance.hvparams:
1324
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1325
                       cluster.FillHV(instance)))
1326

    
1327
  return hvp_data
1328

    
1329

    
1330
class _VerifyErrors(object):
1331
  """Mix-in for cluster/group verify LUs.
1332

1333
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1334
  self.op and self._feedback_fn to be available.)
1335

1336
  """
1337
  TCLUSTER = "cluster"
1338
  TNODE = "node"
1339
  TINSTANCE = "instance"
1340

    
1341
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1342
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1343
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1344
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1345
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1346
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1347
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1348
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1349
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1350
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1351
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1352
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1353
  ENODEDRBD = (TNODE, "ENODEDRBD")
1354
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1355
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1356
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1357
  ENODEHV = (TNODE, "ENODEHV")
1358
  ENODELVM = (TNODE, "ENODELVM")
1359
  ENODEN1 = (TNODE, "ENODEN1")
1360
  ENODENET = (TNODE, "ENODENET")
1361
  ENODEOS = (TNODE, "ENODEOS")
1362
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1363
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1364
  ENODERPC = (TNODE, "ENODERPC")
1365
  ENODESSH = (TNODE, "ENODESSH")
1366
  ENODEVERSION = (TNODE, "ENODEVERSION")
1367
  ENODESETUP = (TNODE, "ENODESETUP")
1368
  ENODETIME = (TNODE, "ENODETIME")
1369
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1370

    
1371
  ETYPE_FIELD = "code"
1372
  ETYPE_ERROR = "ERROR"
1373
  ETYPE_WARNING = "WARNING"
1374

    
1375
  def _Error(self, ecode, item, msg, *args, **kwargs):
1376
    """Format an error message.
1377

1378
    Based on the opcode's error_codes parameter, either format a
1379
    parseable error code, or a simpler error string.
1380

1381
    This must be called only from Exec and functions called from Exec.
1382

1383
    """
1384
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1385
    itype, etxt = ecode
1386
    # first complete the msg
1387
    if args:
1388
      msg = msg % args
1389
    # then format the whole message
1390
    if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1391
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1392
    else:
1393
      if item:
1394
        item = " " + item
1395
      else:
1396
        item = ""
1397
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1398
    # and finally report it via the feedback_fn
1399
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1400

    
1401
  def _ErrorIf(self, cond, *args, **kwargs):
1402
    """Log an error message if the passed condition is True.
1403

1404
    """
1405
    cond = (bool(cond)
1406
            or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1407
    if cond:
1408
      self._Error(*args, **kwargs)
1409
    # do not mark the operation as failed for WARN cases only
1410
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1411
      self.bad = self.bad or cond
1412

    
1413

    
1414
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1415
  """Verifies the cluster config.
1416

1417
  """
1418
  REQ_BGL = True
1419

    
1420
  def _VerifyHVP(self, hvp_data):
1421
    """Verifies locally the syntax of the hypervisor parameters.
1422

1423
    """
1424
    for item, hv_name, hv_params in hvp_data:
1425
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1426
             (item, hv_name))
1427
      try:
1428
        hv_class = hypervisor.GetHypervisor(hv_name)
1429
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1430
        hv_class.CheckParameterSyntax(hv_params)
1431
      except errors.GenericError, err:
1432
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1433

    
1434
  def ExpandNames(self):
1435
    # Information can be safely retrieved as the BGL is acquired in exclusive
1436
    # mode
1437
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1438
    self.all_node_info = self.cfg.GetAllNodesInfo()
1439
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1440
    self.needed_locks = {}
1441

    
1442
  def Exec(self, feedback_fn):
1443
    """Verify integrity of cluster, performing various test on nodes.
1444

1445
    """
1446
    self.bad = False
1447
    self._feedback_fn = feedback_fn
1448

    
1449
    feedback_fn("* Verifying cluster config")
1450

    
1451
    for msg in self.cfg.VerifyConfig():
1452
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1453

    
1454
    feedback_fn("* Verifying cluster certificate files")
1455

    
1456
    for cert_filename in constants.ALL_CERT_FILES:
1457
      (errcode, msg) = _VerifyCertificate(cert_filename)
1458
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1459

    
1460
    feedback_fn("* Verifying hypervisor parameters")
1461

    
1462
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1463
                                                self.all_inst_info.values()))
1464

    
1465
    feedback_fn("* Verifying all nodes belong to an existing group")
1466

    
1467
    # We do this verification here because, should this bogus circumstance
1468
    # occur, it would never be caught by VerifyGroup, which only acts on
1469
    # nodes/instances reachable from existing node groups.
1470

    
1471
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1472
                         if node.group not in self.all_group_info)
1473

    
1474
    dangling_instances = {}
1475
    no_node_instances = []
1476

    
1477
    for inst in self.all_inst_info.values():
1478
      if inst.primary_node in dangling_nodes:
1479
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1480
      elif inst.primary_node not in self.all_node_info:
1481
        no_node_instances.append(inst.name)
1482

    
1483
    pretty_dangling = [
1484
        "%s (%s)" %
1485
        (node.name,
1486
         utils.CommaJoin(dangling_instances.get(node.name,
1487
                                                ["no instances"])))
1488
        for node in dangling_nodes]
1489

    
1490
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1491
                  "the following nodes (and their instances) belong to a non"
1492
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1493

    
1494
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1495
                  "the following instances have a non-existing primary-node:"
1496
                  " %s", utils.CommaJoin(no_node_instances))
1497

    
1498
    return (not self.bad, [g.name for g in self.all_group_info.values()])
1499

    
1500

    
1501
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1502
  """Verifies the status of a node group.
1503

1504
  """
1505
  HPATH = "cluster-verify"
1506
  HTYPE = constants.HTYPE_CLUSTER
1507
  REQ_BGL = False
1508

    
1509
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1510

    
1511
  class NodeImage(object):
1512
    """A class representing the logical and physical status of a node.
1513

1514
    @type name: string
1515
    @ivar name: the node name to which this object refers
1516
    @ivar volumes: a structure as returned from
1517
        L{ganeti.backend.GetVolumeList} (runtime)
1518
    @ivar instances: a list of running instances (runtime)
1519
    @ivar pinst: list of configured primary instances (config)
1520
    @ivar sinst: list of configured secondary instances (config)
1521
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1522
        instances for which this node is secondary (config)
1523
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1524
    @ivar dfree: free disk, as reported by the node (runtime)
1525
    @ivar offline: the offline status (config)
1526
    @type rpc_fail: boolean
1527
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1528
        not whether the individual keys were correct) (runtime)
1529
    @type lvm_fail: boolean
1530
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1531
    @type hyp_fail: boolean
1532
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1533
    @type ghost: boolean
1534
    @ivar ghost: whether this is a known node or not (config)
1535
    @type os_fail: boolean
1536
    @ivar os_fail: whether the RPC call didn't return valid OS data
1537
    @type oslist: list
1538
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1539
    @type vm_capable: boolean
1540
    @ivar vm_capable: whether the node can host instances
1541

1542
    """
1543
    def __init__(self, offline=False, name=None, vm_capable=True):
1544
      self.name = name
1545
      self.volumes = {}
1546
      self.instances = []
1547
      self.pinst = []
1548
      self.sinst = []
1549
      self.sbp = {}
1550
      self.mfree = 0
1551
      self.dfree = 0
1552
      self.offline = offline
1553
      self.vm_capable = vm_capable
1554
      self.rpc_fail = False
1555
      self.lvm_fail = False
1556
      self.hyp_fail = False
1557
      self.ghost = False
1558
      self.os_fail = False
1559
      self.oslist = {}
1560

    
1561
  def ExpandNames(self):
1562
    # This raises errors.OpPrereqError on its own:
1563
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1564

    
1565
    # Get instances in node group; this is unsafe and needs verification later
1566
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1567

    
1568
    self.needed_locks = {
1569
      locking.LEVEL_INSTANCE: inst_names,
1570
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1571
      locking.LEVEL_NODE: [],
1572
      }
1573

    
1574
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1575

    
1576
  def DeclareLocks(self, level):
1577
    if level == locking.LEVEL_NODE:
1578
      # Get members of node group; this is unsafe and needs verification later
1579
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1580

    
1581
      all_inst_info = self.cfg.GetAllInstancesInfo()
1582

    
1583
      # In Exec(), we warn about mirrored instances that have primary and
1584
      # secondary living in separate node groups. To fully verify that
1585
      # volumes for these instances are healthy, we will need to do an
1586
      # extra call to their secondaries. We ensure here those nodes will
1587
      # be locked.
1588
      for inst in self.glm.list_owned(locking.LEVEL_INSTANCE):
1589
        # Important: access only the instances whose lock is owned
1590
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1591
          nodes.update(all_inst_info[inst].secondary_nodes)
1592

    
1593
      self.needed_locks[locking.LEVEL_NODE] = nodes
1594

    
1595
  def CheckPrereq(self):
1596
    group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1597
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1598

    
1599
    unlocked_nodes = \
1600
        group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1601

    
1602
    unlocked_instances = \
1603
        group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
1604

    
1605
    if unlocked_nodes:
1606
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1607
                                 utils.CommaJoin(unlocked_nodes))
1608

    
1609
    if unlocked_instances:
1610
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1611
                                 utils.CommaJoin(unlocked_instances))
1612

    
1613
    self.all_node_info = self.cfg.GetAllNodesInfo()
1614
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1615

    
1616
    self.my_node_names = utils.NiceSort(group_nodes)
1617
    self.my_inst_names = utils.NiceSort(group_instances)
1618

    
1619
    self.my_node_info = dict((name, self.all_node_info[name])
1620
                             for name in self.my_node_names)
1621

    
1622
    self.my_inst_info = dict((name, self.all_inst_info[name])
1623
                             for name in self.my_inst_names)
1624

    
1625
    # We detect here the nodes that will need the extra RPC calls for verifying
1626
    # split LV volumes; they should be locked.
1627
    extra_lv_nodes = set()
1628

    
1629
    for inst in self.my_inst_info.values():
1630
      if inst.disk_template in constants.DTS_INT_MIRROR:
1631
        group = self.my_node_info[inst.primary_node].group
1632
        for nname in inst.secondary_nodes:
1633
          if self.all_node_info[nname].group != group:
1634
            extra_lv_nodes.add(nname)
1635

    
1636
    unlocked_lv_nodes = \
1637
        extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1638

    
1639
    if unlocked_lv_nodes:
1640
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1641
                                 utils.CommaJoin(unlocked_lv_nodes))
1642
    self.extra_lv_nodes = list(extra_lv_nodes)
1643

    
1644
  def _VerifyNode(self, ninfo, nresult):
1645
    """Perform some basic validation on data returned from a node.
1646

1647
      - check the result data structure is well formed and has all the
1648
        mandatory fields
1649
      - check ganeti version
1650

1651
    @type ninfo: L{objects.Node}
1652
    @param ninfo: the node to check
1653
    @param nresult: the results from the node
1654
    @rtype: boolean
1655
    @return: whether overall this call was successful (and we can expect
1656
         reasonable values in the respose)
1657

1658
    """
1659
    node = ninfo.name
1660
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1661

    
1662
    # main result, nresult should be a non-empty dict
1663
    test = not nresult or not isinstance(nresult, dict)
1664
    _ErrorIf(test, self.ENODERPC, node,
1665
                  "unable to verify node: no data returned")
1666
    if test:
1667
      return False
1668

    
1669
    # compares ganeti version
1670
    local_version = constants.PROTOCOL_VERSION
1671
    remote_version = nresult.get("version", None)
1672
    test = not (remote_version and
1673
                isinstance(remote_version, (list, tuple)) and
1674
                len(remote_version) == 2)
1675
    _ErrorIf(test, self.ENODERPC, node,
1676
             "connection to node returned invalid data")
1677
    if test:
1678
      return False
1679

    
1680
    test = local_version != remote_version[0]
1681
    _ErrorIf(test, self.ENODEVERSION, node,
1682
             "incompatible protocol versions: master %s,"
1683
             " node %s", local_version, remote_version[0])
1684
    if test:
1685
      return False
1686

    
1687
    # node seems compatible, we can actually try to look into its results
1688

    
1689
    # full package version
1690
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1691
                  self.ENODEVERSION, node,
1692
                  "software version mismatch: master %s, node %s",
1693
                  constants.RELEASE_VERSION, remote_version[1],
1694
                  code=self.ETYPE_WARNING)
1695

    
1696
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1697
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1698
      for hv_name, hv_result in hyp_result.iteritems():
1699
        test = hv_result is not None
1700
        _ErrorIf(test, self.ENODEHV, node,
1701
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1702

    
1703
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1704
    if ninfo.vm_capable and isinstance(hvp_result, list):
1705
      for item, hv_name, hv_result in hvp_result:
1706
        _ErrorIf(True, self.ENODEHV, node,
1707
                 "hypervisor %s parameter verify failure (source %s): %s",
1708
                 hv_name, item, hv_result)
1709

    
1710
    test = nresult.get(constants.NV_NODESETUP,
1711
                       ["Missing NODESETUP results"])
1712
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1713
             "; ".join(test))
1714

    
1715
    return True
1716

    
1717
  def _VerifyNodeTime(self, ninfo, nresult,
1718
                      nvinfo_starttime, nvinfo_endtime):
1719
    """Check the node time.
1720

1721
    @type ninfo: L{objects.Node}
1722
    @param ninfo: the node to check
1723
    @param nresult: the remote results for the node
1724
    @param nvinfo_starttime: the start time of the RPC call
1725
    @param nvinfo_endtime: the end time of the RPC call
1726

1727
    """
1728
    node = ninfo.name
1729
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1730

    
1731
    ntime = nresult.get(constants.NV_TIME, None)
1732
    try:
1733
      ntime_merged = utils.MergeTime(ntime)
1734
    except (ValueError, TypeError):
1735
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1736
      return
1737

    
1738
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1739
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1740
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1741
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1742
    else:
1743
      ntime_diff = None
1744

    
1745
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1746
             "Node time diverges by at least %s from master node time",
1747
             ntime_diff)
1748

    
1749
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1750
    """Check the node LVM results.
1751

1752
    @type ninfo: L{objects.Node}
1753
    @param ninfo: the node to check
1754
    @param nresult: the remote results for the node
1755
    @param vg_name: the configured VG name
1756

1757
    """
1758
    if vg_name is None:
1759
      return
1760

    
1761
    node = ninfo.name
1762
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1763

    
1764
    # checks vg existence and size > 20G
1765
    vglist = nresult.get(constants.NV_VGLIST, None)
1766
    test = not vglist
1767
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1768
    if not test:
1769
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1770
                                            constants.MIN_VG_SIZE)
1771
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1772

    
1773
    # check pv names
1774
    pvlist = nresult.get(constants.NV_PVLIST, None)
1775
    test = pvlist is None
1776
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1777
    if not test:
1778
      # check that ':' is not present in PV names, since it's a
1779
      # special character for lvcreate (denotes the range of PEs to
1780
      # use on the PV)
1781
      for _, pvname, owner_vg in pvlist:
1782
        test = ":" in pvname
1783
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1784
                 " '%s' of VG '%s'", pvname, owner_vg)
1785

    
1786
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1787
    """Check the node bridges.
1788

1789
    @type ninfo: L{objects.Node}
1790
    @param ninfo: the node to check
1791
    @param nresult: the remote results for the node
1792
    @param bridges: the expected list of bridges
1793

1794
    """
1795
    if not bridges:
1796
      return
1797

    
1798
    node = ninfo.name
1799
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1800

    
1801
    missing = nresult.get(constants.NV_BRIDGES, None)
1802
    test = not isinstance(missing, list)
1803
    _ErrorIf(test, self.ENODENET, node,
1804
             "did not return valid bridge information")
1805
    if not test:
1806
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1807
               utils.CommaJoin(sorted(missing)))
1808

    
1809
  def _VerifyNodeNetwork(self, ninfo, nresult):
1810
    """Check the node network connectivity results.
1811

1812
    @type ninfo: L{objects.Node}
1813
    @param ninfo: the node to check
1814
    @param nresult: the remote results for the node
1815

1816
    """
1817
    node = ninfo.name
1818
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1819

    
1820
    test = constants.NV_NODELIST not in nresult
1821
    _ErrorIf(test, self.ENODESSH, node,
1822
             "node hasn't returned node ssh connectivity data")
1823
    if not test:
1824
      if nresult[constants.NV_NODELIST]:
1825
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1826
          _ErrorIf(True, self.ENODESSH, node,
1827
                   "ssh communication with node '%s': %s", a_node, a_msg)
1828

    
1829
    test = constants.NV_NODENETTEST not in nresult
1830
    _ErrorIf(test, self.ENODENET, node,
1831
             "node hasn't returned node tcp connectivity data")
1832
    if not test:
1833
      if nresult[constants.NV_NODENETTEST]:
1834
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1835
        for anode in nlist:
1836
          _ErrorIf(True, self.ENODENET, node,
1837
                   "tcp communication with node '%s': %s",
1838
                   anode, nresult[constants.NV_NODENETTEST][anode])
1839

    
1840
    test = constants.NV_MASTERIP not in nresult
1841
    _ErrorIf(test, self.ENODENET, node,
1842
             "node hasn't returned node master IP reachability data")
1843
    if not test:
1844
      if not nresult[constants.NV_MASTERIP]:
1845
        if node == self.master_node:
1846
          msg = "the master node cannot reach the master IP (not configured?)"
1847
        else:
1848
          msg = "cannot reach the master IP"
1849
        _ErrorIf(True, self.ENODENET, node, msg)
1850

    
1851
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1852
                      diskstatus):
1853
    """Verify an instance.
1854

1855
    This function checks to see if the required block devices are
1856
    available on the instance's node.
1857

1858
    """
1859
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1860
    node_current = instanceconfig.primary_node
1861

    
1862
    node_vol_should = {}
1863
    instanceconfig.MapLVsByNode(node_vol_should)
1864

    
1865
    for node in node_vol_should:
1866
      n_img = node_image[node]
1867
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1868
        # ignore missing volumes on offline or broken nodes
1869
        continue
1870
      for volume in node_vol_should[node]:
1871
        test = volume not in n_img.volumes
1872
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1873
                 "volume %s missing on node %s", volume, node)
1874

    
1875
    if instanceconfig.admin_up:
1876
      pri_img = node_image[node_current]
1877
      test = instance not in pri_img.instances and not pri_img.offline
1878
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1879
               "instance not running on its primary node %s",
1880
               node_current)
1881

    
1882
    diskdata = [(nname, success, status, idx)
1883
                for (nname, disks) in diskstatus.items()
1884
                for idx, (success, status) in enumerate(disks)]
1885

    
1886
    for nname, success, bdev_status, idx in diskdata:
1887
      # the 'ghost node' construction in Exec() ensures that we have a
1888
      # node here
1889
      snode = node_image[nname]
1890
      bad_snode = snode.ghost or snode.offline
1891
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1892
               self.EINSTANCEFAULTYDISK, instance,
1893
               "couldn't retrieve status for disk/%s on %s: %s",
1894
               idx, nname, bdev_status)
1895
      _ErrorIf((instanceconfig.admin_up and success and
1896
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1897
               self.EINSTANCEFAULTYDISK, instance,
1898
               "disk/%s on %s is faulty", idx, nname)
1899

    
1900
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1901
    """Verify if there are any unknown volumes in the cluster.
1902

1903
    The .os, .swap and backup volumes are ignored. All other volumes are
1904
    reported as unknown.
1905

1906
    @type reserved: L{ganeti.utils.FieldSet}
1907
    @param reserved: a FieldSet of reserved volume names
1908

1909
    """
1910
    for node, n_img in node_image.items():
1911
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1912
        # skip non-healthy nodes
1913
        continue
1914
      for volume in n_img.volumes:
1915
        test = ((node not in node_vol_should or
1916
                volume not in node_vol_should[node]) and
1917
                not reserved.Matches(volume))
1918
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1919
                      "volume %s is unknown", volume)
1920

    
1921
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1922
    """Verify N+1 Memory Resilience.
1923

1924
    Check that if one single node dies we can still start all the
1925
    instances it was primary for.
1926

1927
    """
1928
    cluster_info = self.cfg.GetClusterInfo()
1929
    for node, n_img in node_image.items():
1930
      # This code checks that every node which is now listed as
1931
      # secondary has enough memory to host all instances it is
1932
      # supposed to should a single other node in the cluster fail.
1933
      # FIXME: not ready for failover to an arbitrary node
1934
      # FIXME: does not support file-backed instances
1935
      # WARNING: we currently take into account down instances as well
1936
      # as up ones, considering that even if they're down someone
1937
      # might want to start them even in the event of a node failure.
1938
      if n_img.offline:
1939
        # we're skipping offline nodes from the N+1 warning, since
1940
        # most likely we don't have good memory infromation from them;
1941
        # we already list instances living on such nodes, and that's
1942
        # enough warning
1943
        continue
1944
      for prinode, instances in n_img.sbp.items():
1945
        needed_mem = 0
1946
        for instance in instances:
1947
          bep = cluster_info.FillBE(instance_cfg[instance])
1948
          if bep[constants.BE_AUTO_BALANCE]:
1949
            needed_mem += bep[constants.BE_MEMORY]
1950
        test = n_img.mfree < needed_mem
1951
        self._ErrorIf(test, self.ENODEN1, node,
1952
                      "not enough memory to accomodate instance failovers"
1953
                      " should node %s fail (%dMiB needed, %dMiB available)",
1954
                      prinode, needed_mem, n_img.mfree)
1955

    
1956
  @classmethod
1957
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1958
                   (files_all, files_all_opt, files_mc, files_vm)):
1959
    """Verifies file checksums collected from all nodes.
1960

1961
    @param errorif: Callback for reporting errors
1962
    @param nodeinfo: List of L{objects.Node} objects
1963
    @param master_node: Name of master node
1964
    @param all_nvinfo: RPC results
1965

1966
    """
1967
    node_names = frozenset(node.name for node in nodeinfo)
1968

    
1969
    assert master_node in node_names
1970
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1971
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1972
           "Found file listed in more than one file list"
1973

    
1974
    # Define functions determining which nodes to consider for a file
1975
    file2nodefn = dict([(filename, fn)
1976
      for (files, fn) in [(files_all, None),
1977
                          (files_all_opt, None),
1978
                          (files_mc, lambda node: (node.master_candidate or
1979
                                                   node.name == master_node)),
1980
                          (files_vm, lambda node: node.vm_capable)]
1981
      for filename in files])
1982

    
1983
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1984

    
1985
    for node in nodeinfo:
1986
      nresult = all_nvinfo[node.name]
1987

    
1988
      if nresult.fail_msg or not nresult.payload:
1989
        node_files = None
1990
      else:
1991
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
1992

    
1993
      test = not (node_files and isinstance(node_files, dict))
1994
      errorif(test, cls.ENODEFILECHECK, node.name,
1995
              "Node did not return file checksum data")
1996
      if test:
1997
        continue
1998

    
1999
      for (filename, checksum) in node_files.items():
2000
        # Check if the file should be considered for a node
2001
        fn = file2nodefn[filename]
2002
        if fn is None or fn(node):
2003
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
2004

    
2005
    for (filename, checksums) in fileinfo.items():
2006
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2007

    
2008
      # Nodes having the file
2009
      with_file = frozenset(node_name
2010
                            for nodes in fileinfo[filename].values()
2011
                            for node_name in nodes)
2012

    
2013
      # Nodes missing file
2014
      missing_file = node_names - with_file
2015

    
2016
      if filename in files_all_opt:
2017
        # All or no nodes
2018
        errorif(missing_file and missing_file != node_names,
2019
                cls.ECLUSTERFILECHECK, None,
2020
                "File %s is optional, but it must exist on all or no"
2021
                " nodes (not found on %s)",
2022
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2023
      else:
2024
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2025
                "File %s is missing from node(s) %s", filename,
2026
                utils.CommaJoin(utils.NiceSort(missing_file)))
2027

    
2028
      # See if there are multiple versions of the file
2029
      test = len(checksums) > 1
2030
      if test:
2031
        variants = ["variant %s on %s" %
2032
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2033
                    for (idx, (checksum, nodes)) in
2034
                      enumerate(sorted(checksums.items()))]
2035
      else:
2036
        variants = []
2037

    
2038
      errorif(test, cls.ECLUSTERFILECHECK, None,
2039
              "File %s found with %s different checksums (%s)",
2040
              filename, len(checksums), "; ".join(variants))
2041

    
2042
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2043
                      drbd_map):
2044
    """Verifies and the node DRBD status.
2045

2046
    @type ninfo: L{objects.Node}
2047
    @param ninfo: the node to check
2048
    @param nresult: the remote results for the node
2049
    @param instanceinfo: the dict of instances
2050
    @param drbd_helper: the configured DRBD usermode helper
2051
    @param drbd_map: the DRBD map as returned by
2052
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2053

2054
    """
2055
    node = ninfo.name
2056
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2057

    
2058
    if drbd_helper:
2059
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2060
      test = (helper_result == None)
2061
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2062
               "no drbd usermode helper returned")
2063
      if helper_result:
2064
        status, payload = helper_result
2065
        test = not status
2066
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2067
                 "drbd usermode helper check unsuccessful: %s", payload)
2068
        test = status and (payload != drbd_helper)
2069
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2070
                 "wrong drbd usermode helper: %s", payload)
2071

    
2072
    # compute the DRBD minors
2073
    node_drbd = {}
2074
    for minor, instance in drbd_map[node].items():
2075
      test = instance not in instanceinfo
2076
      _ErrorIf(test, self.ECLUSTERCFG, None,
2077
               "ghost instance '%s' in temporary DRBD map", instance)
2078
        # ghost instance should not be running, but otherwise we
2079
        # don't give double warnings (both ghost instance and
2080
        # unallocated minor in use)
2081
      if test:
2082
        node_drbd[minor] = (instance, False)
2083
      else:
2084
        instance = instanceinfo[instance]
2085
        node_drbd[minor] = (instance.name, instance.admin_up)
2086

    
2087
    # and now check them
2088
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2089
    test = not isinstance(used_minors, (tuple, list))
2090
    _ErrorIf(test, self.ENODEDRBD, node,
2091
             "cannot parse drbd status file: %s", str(used_minors))
2092
    if test:
2093
      # we cannot check drbd status
2094
      return
2095

    
2096
    for minor, (iname, must_exist) in node_drbd.items():
2097
      test = minor not in used_minors and must_exist
2098
      _ErrorIf(test, self.ENODEDRBD, node,
2099
               "drbd minor %d of instance %s is not active", minor, iname)
2100
    for minor in used_minors:
2101
      test = minor not in node_drbd
2102
      _ErrorIf(test, self.ENODEDRBD, node,
2103
               "unallocated drbd minor %d is in use", minor)
2104

    
2105
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2106
    """Builds the node OS structures.
2107

2108
    @type ninfo: L{objects.Node}
2109
    @param ninfo: the node to check
2110
    @param nresult: the remote results for the node
2111
    @param nimg: the node image object
2112

2113
    """
2114
    node = ninfo.name
2115
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2116

    
2117
    remote_os = nresult.get(constants.NV_OSLIST, None)
2118
    test = (not isinstance(remote_os, list) or
2119
            not compat.all(isinstance(v, list) and len(v) == 7
2120
                           for v in remote_os))
2121

    
2122
    _ErrorIf(test, self.ENODEOS, node,
2123
             "node hasn't returned valid OS data")
2124

    
2125
    nimg.os_fail = test
2126

    
2127
    if test:
2128
      return
2129

    
2130
    os_dict = {}
2131

    
2132
    for (name, os_path, status, diagnose,
2133
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2134

    
2135
      if name not in os_dict:
2136
        os_dict[name] = []
2137

    
2138
      # parameters is a list of lists instead of list of tuples due to
2139
      # JSON lacking a real tuple type, fix it:
2140
      parameters = [tuple(v) for v in parameters]
2141
      os_dict[name].append((os_path, status, diagnose,
2142
                            set(variants), set(parameters), set(api_ver)))
2143

    
2144
    nimg.oslist = os_dict
2145

    
2146
  def _VerifyNodeOS(self, ninfo, nimg, base):
2147
    """Verifies the node OS list.
2148

2149
    @type ninfo: L{objects.Node}
2150
    @param ninfo: the node to check
2151
    @param nimg: the node image object
2152
    @param base: the 'template' node we match against (e.g. from the master)
2153

2154
    """
2155
    node = ninfo.name
2156
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2157

    
2158
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2159

    
2160
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2161
    for os_name, os_data in nimg.oslist.items():
2162
      assert os_data, "Empty OS status for OS %s?!" % os_name
2163
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2164
      _ErrorIf(not f_status, self.ENODEOS, node,
2165
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2166
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2167
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2168
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2169
      # this will catched in backend too
2170
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
2171
               and not f_var, self.ENODEOS, node,
2172
               "OS %s with API at least %d does not declare any variant",
2173
               os_name, constants.OS_API_V15)
2174
      # comparisons with the 'base' image
2175
      test = os_name not in base.oslist
2176
      _ErrorIf(test, self.ENODEOS, node,
2177
               "Extra OS %s not present on reference node (%s)",
2178
               os_name, base.name)
2179
      if test:
2180
        continue
2181
      assert base.oslist[os_name], "Base node has empty OS status?"
2182
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2183
      if not b_status:
2184
        # base OS is invalid, skipping
2185
        continue
2186
      for kind, a, b in [("API version", f_api, b_api),
2187
                         ("variants list", f_var, b_var),
2188
                         ("parameters", beautify_params(f_param),
2189
                          beautify_params(b_param))]:
2190
        _ErrorIf(a != b, self.ENODEOS, node,
2191
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2192
                 kind, os_name, base.name,
2193
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2194

    
2195
    # check any missing OSes
2196
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2197
    _ErrorIf(missing, self.ENODEOS, node,
2198
             "OSes present on reference node %s but missing on this node: %s",
2199
             base.name, utils.CommaJoin(missing))
2200

    
2201
  def _VerifyOob(self, ninfo, nresult):
2202
    """Verifies out of band functionality of a node.
2203

2204
    @type ninfo: L{objects.Node}
2205
    @param ninfo: the node to check
2206
    @param nresult: the remote results for the node
2207

2208
    """
2209
    node = ninfo.name
2210
    # We just have to verify the paths on master and/or master candidates
2211
    # as the oob helper is invoked on the master
2212
    if ((ninfo.master_candidate or ninfo.master_capable) and
2213
        constants.NV_OOB_PATHS in nresult):
2214
      for path_result in nresult[constants.NV_OOB_PATHS]:
2215
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2216

    
2217
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2218
    """Verifies and updates the node volume data.
2219

2220
    This function will update a L{NodeImage}'s internal structures
2221
    with data from the remote call.
2222

2223
    @type ninfo: L{objects.Node}
2224
    @param ninfo: the node to check
2225
    @param nresult: the remote results for the node
2226
    @param nimg: the node image object
2227
    @param vg_name: the configured VG name
2228

2229
    """
2230
    node = ninfo.name
2231
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2232

    
2233
    nimg.lvm_fail = True
2234
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2235
    if vg_name is None:
2236
      pass
2237
    elif isinstance(lvdata, basestring):
2238
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2239
               utils.SafeEncode(lvdata))
2240
    elif not isinstance(lvdata, dict):
2241
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2242
    else:
2243
      nimg.volumes = lvdata
2244
      nimg.lvm_fail = False
2245

    
2246
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2247
    """Verifies and updates the node instance list.
2248

2249
    If the listing was successful, then updates this node's instance
2250
    list. Otherwise, it marks the RPC call as failed for the instance
2251
    list key.
2252

2253
    @type ninfo: L{objects.Node}
2254
    @param ninfo: the node to check
2255
    @param nresult: the remote results for the node
2256
    @param nimg: the node image object
2257

2258
    """
2259
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2260
    test = not isinstance(idata, list)
2261
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2262
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2263
    if test:
2264
      nimg.hyp_fail = True
2265
    else:
2266
      nimg.instances = idata
2267

    
2268
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2269
    """Verifies and computes a node information map
2270

2271
    @type ninfo: L{objects.Node}
2272
    @param ninfo: the node to check
2273
    @param nresult: the remote results for the node
2274
    @param nimg: the node image object
2275
    @param vg_name: the configured VG name
2276

2277
    """
2278
    node = ninfo.name
2279
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2280

    
2281
    # try to read free memory (from the hypervisor)
2282
    hv_info = nresult.get(constants.NV_HVINFO, None)
2283
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2284
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2285
    if not test:
2286
      try:
2287
        nimg.mfree = int(hv_info["memory_free"])
2288
      except (ValueError, TypeError):
2289
        _ErrorIf(True, self.ENODERPC, node,
2290
                 "node returned invalid nodeinfo, check hypervisor")
2291

    
2292
    # FIXME: devise a free space model for file based instances as well
2293
    if vg_name is not None:
2294
      test = (constants.NV_VGLIST not in nresult or
2295
              vg_name not in nresult[constants.NV_VGLIST])
2296
      _ErrorIf(test, self.ENODELVM, node,
2297
               "node didn't return data for the volume group '%s'"
2298
               " - it is either missing or broken", vg_name)
2299
      if not test:
2300
        try:
2301
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2302
        except (ValueError, TypeError):
2303
          _ErrorIf(True, self.ENODERPC, node,
2304
                   "node returned invalid LVM info, check LVM status")
2305

    
2306
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2307
    """Gets per-disk status information for all instances.
2308

2309
    @type nodelist: list of strings
2310
    @param nodelist: Node names
2311
    @type node_image: dict of (name, L{objects.Node})
2312
    @param node_image: Node objects
2313
    @type instanceinfo: dict of (name, L{objects.Instance})
2314
    @param instanceinfo: Instance objects
2315
    @rtype: {instance: {node: [(succes, payload)]}}
2316
    @return: a dictionary of per-instance dictionaries with nodes as
2317
        keys and disk information as values; the disk information is a
2318
        list of tuples (success, payload)
2319

2320
    """
2321
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2322

    
2323
    node_disks = {}
2324
    node_disks_devonly = {}
2325
    diskless_instances = set()
2326
    diskless = constants.DT_DISKLESS
2327

    
2328
    for nname in nodelist:
2329
      node_instances = list(itertools.chain(node_image[nname].pinst,
2330
                                            node_image[nname].sinst))
2331
      diskless_instances.update(inst for inst in node_instances
2332
                                if instanceinfo[inst].disk_template == diskless)
2333
      disks = [(inst, disk)
2334
               for inst in node_instances
2335
               for disk in instanceinfo[inst].disks]
2336

    
2337
      if not disks:
2338
        # No need to collect data
2339
        continue
2340

    
2341
      node_disks[nname] = disks
2342

    
2343
      # Creating copies as SetDiskID below will modify the objects and that can
2344
      # lead to incorrect data returned from nodes
2345
      devonly = [dev.Copy() for (_, dev) in disks]
2346

    
2347
      for dev in devonly:
2348
        self.cfg.SetDiskID(dev, nname)
2349

    
2350
      node_disks_devonly[nname] = devonly
2351

    
2352
    assert len(node_disks) == len(node_disks_devonly)
2353

    
2354
    # Collect data from all nodes with disks
2355
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2356
                                                          node_disks_devonly)
2357

    
2358
    assert len(result) == len(node_disks)
2359

    
2360
    instdisk = {}
2361

    
2362
    for (nname, nres) in result.items():
2363
      disks = node_disks[nname]
2364

    
2365
      if nres.offline:
2366
        # No data from this node
2367
        data = len(disks) * [(False, "node offline")]
2368
      else:
2369
        msg = nres.fail_msg
2370
        _ErrorIf(msg, self.ENODERPC, nname,
2371
                 "while getting disk information: %s", msg)
2372
        if msg:
2373
          # No data from this node
2374
          data = len(disks) * [(False, msg)]
2375
        else:
2376
          data = []
2377
          for idx, i in enumerate(nres.payload):
2378
            if isinstance(i, (tuple, list)) and len(i) == 2:
2379
              data.append(i)
2380
            else:
2381
              logging.warning("Invalid result from node %s, entry %d: %s",
2382
                              nname, idx, i)
2383
              data.append((False, "Invalid result from the remote node"))
2384

    
2385
      for ((inst, _), status) in zip(disks, data):
2386
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2387

    
2388
    # Add empty entries for diskless instances.
2389
    for inst in diskless_instances:
2390
      assert inst not in instdisk
2391
      instdisk[inst] = {}
2392

    
2393
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2394
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2395
                      compat.all(isinstance(s, (tuple, list)) and
2396
                                 len(s) == 2 for s in statuses)
2397
                      for inst, nnames in instdisk.items()
2398
                      for nname, statuses in nnames.items())
2399
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2400

    
2401
    return instdisk
2402

    
2403
  def BuildHooksEnv(self):
2404
    """Build hooks env.
2405

2406
    Cluster-Verify hooks just ran in the post phase and their failure makes
2407
    the output be logged in the verify output and the verification to fail.
2408

2409
    """
2410
    env = {
2411
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2412
      }
2413

    
2414
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2415
               for node in self.my_node_info.values())
2416

    
2417
    return env
2418

    
2419
  def BuildHooksNodes(self):
2420
    """Build hooks nodes.
2421

2422
    """
2423
    return ([], self.my_node_names)
2424

    
2425
  def Exec(self, feedback_fn):
2426
    """Verify integrity of the node group, performing various test on nodes.
2427

2428
    """
2429
    # This method has too many local variables. pylint: disable-msg=R0914
2430

    
2431
    if not self.my_node_names:
2432
      # empty node group
2433
      feedback_fn("* Empty node group, skipping verification")
2434
      return True
2435

    
2436
    self.bad = False
2437
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2438
    verbose = self.op.verbose
2439
    self._feedback_fn = feedback_fn
2440

    
2441
    vg_name = self.cfg.GetVGName()
2442
    drbd_helper = self.cfg.GetDRBDHelper()
2443
    cluster = self.cfg.GetClusterInfo()
2444
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2445
    hypervisors = cluster.enabled_hypervisors
2446
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2447

    
2448
    i_non_redundant = [] # Non redundant instances
2449
    i_non_a_balanced = [] # Non auto-balanced instances
2450
    n_offline = 0 # Count of offline nodes
2451
    n_drained = 0 # Count of nodes being drained
2452
    node_vol_should = {}
2453

    
2454
    # FIXME: verify OS list
2455

    
2456
    # File verification
2457
    filemap = _ComputeAncillaryFiles(cluster, False)
2458

    
2459
    # do local checksums
2460
    master_node = self.master_node = self.cfg.GetMasterNode()
2461
    master_ip = self.cfg.GetMasterIP()
2462

    
2463
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2464

    
2465
    # We will make nodes contact all nodes in their group, and one node from
2466
    # every other group.
2467
    # TODO: should it be a *random* node, different every time?
2468
    online_nodes = [node.name for node in node_data_list if not node.offline]
2469
    other_group_nodes = {}
2470

    
2471
    for name in sorted(self.all_node_info):
2472
      node = self.all_node_info[name]
2473
      if (node.group not in other_group_nodes
2474
          and node.group != self.group_uuid
2475
          and not node.offline):
2476
        other_group_nodes[node.group] = node.name
2477

    
2478
    node_verify_param = {
2479
      constants.NV_FILELIST:
2480
        utils.UniqueSequence(filename
2481
                             for files in filemap
2482
                             for filename in files),
2483
      constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2484
      constants.NV_HYPERVISOR: hypervisors,
2485
      constants.NV_HVPARAMS:
2486
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2487
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2488
                                 for node in node_data_list
2489
                                 if not node.offline],
2490
      constants.NV_INSTANCELIST: hypervisors,
2491
      constants.NV_VERSION: None,
2492
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2493
      constants.NV_NODESETUP: None,
2494
      constants.NV_TIME: None,
2495
      constants.NV_MASTERIP: (master_node, master_ip),
2496
      constants.NV_OSLIST: None,
2497
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2498
      }
2499

    
2500
    if vg_name is not None:
2501
      node_verify_param[constants.NV_VGLIST] = None
2502
      node_verify_param[constants.NV_LVLIST] = vg_name
2503
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2504
      node_verify_param[constants.NV_DRBDLIST] = None
2505

    
2506
    if drbd_helper:
2507
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2508

    
2509
    # bridge checks
2510
    # FIXME: this needs to be changed per node-group, not cluster-wide
2511
    bridges = set()
2512
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2513
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2514
      bridges.add(default_nicpp[constants.NIC_LINK])
2515
    for instance in self.my_inst_info.values():
2516
      for nic in instance.nics:
2517
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2518
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2519
          bridges.add(full_nic[constants.NIC_LINK])
2520

    
2521
    if bridges:
2522
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2523

    
2524
    # Build our expected cluster state
2525
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2526
                                                 name=node.name,
2527
                                                 vm_capable=node.vm_capable))
2528
                      for node in node_data_list)
2529

    
2530
    # Gather OOB paths
2531
    oob_paths = []
2532
    for node in self.all_node_info.values():
2533
      path = _SupportsOob(self.cfg, node)
2534
      if path and path not in oob_paths:
2535
        oob_paths.append(path)
2536

    
2537
    if oob_paths:
2538
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2539

    
2540
    for instance in self.my_inst_names:
2541
      inst_config = self.my_inst_info[instance]
2542

    
2543
      for nname in inst_config.all_nodes:
2544
        if nname not in node_image:
2545
          gnode = self.NodeImage(name=nname)
2546
          gnode.ghost = (nname not in self.all_node_info)
2547
          node_image[nname] = gnode
2548

    
2549
      inst_config.MapLVsByNode(node_vol_should)
2550

    
2551
      pnode = inst_config.primary_node
2552
      node_image[pnode].pinst.append(instance)
2553

    
2554
      for snode in inst_config.secondary_nodes:
2555
        nimg = node_image[snode]
2556
        nimg.sinst.append(instance)
2557
        if pnode not in nimg.sbp:
2558
          nimg.sbp[pnode] = []
2559
        nimg.sbp[pnode].append(instance)
2560

    
2561
    # At this point, we have the in-memory data structures complete,
2562
    # except for the runtime information, which we'll gather next
2563

    
2564
    # Due to the way our RPC system works, exact response times cannot be
2565
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2566
    # time before and after executing the request, we can at least have a time
2567
    # window.
2568
    nvinfo_starttime = time.time()
2569
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2570
                                           node_verify_param,
2571
                                           self.cfg.GetClusterName())
2572
    nvinfo_endtime = time.time()
2573

    
2574
    if self.extra_lv_nodes and vg_name is not None:
2575
      extra_lv_nvinfo = \
2576
          self.rpc.call_node_verify(self.extra_lv_nodes,
2577
                                    {constants.NV_LVLIST: vg_name},
2578
                                    self.cfg.GetClusterName())
2579
    else:
2580
      extra_lv_nvinfo = {}
2581

    
2582
    all_drbd_map = self.cfg.ComputeDRBDMap()
2583

    
2584
    feedback_fn("* Gathering disk information (%s nodes)" %
2585
                len(self.my_node_names))
2586
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2587
                                     self.my_inst_info)
2588

    
2589
    feedback_fn("* Verifying configuration file consistency")
2590

    
2591
    # If not all nodes are being checked, we need to make sure the master node
2592
    # and a non-checked vm_capable node are in the list.
2593
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2594
    if absent_nodes:
2595
      vf_nvinfo = all_nvinfo.copy()
2596
      vf_node_info = list(self.my_node_info.values())
2597
      additional_nodes = []
2598
      if master_node not in self.my_node_info:
2599
        additional_nodes.append(master_node)
2600
        vf_node_info.append(self.all_node_info[master_node])
2601
      # Add the first vm_capable node we find which is not included
2602
      for node in absent_nodes:
2603
        nodeinfo = self.all_node_info[node]
2604
        if nodeinfo.vm_capable and not nodeinfo.offline:
2605
          additional_nodes.append(node)
2606
          vf_node_info.append(self.all_node_info[node])
2607
          break
2608
      key = constants.NV_FILELIST
2609
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2610
                                                 {key: node_verify_param[key]},
2611
                                                 self.cfg.GetClusterName()))
2612
    else:
2613
      vf_nvinfo = all_nvinfo
2614
      vf_node_info = self.my_node_info.values()
2615

    
2616
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2617

    
2618
    feedback_fn("* Verifying node status")
2619

    
2620
    refos_img = None
2621

    
2622
    for node_i in node_data_list:
2623
      node = node_i.name
2624
      nimg = node_image[node]
2625

    
2626
      if node_i.offline:
2627
        if verbose:
2628
          feedback_fn("* Skipping offline node %s" % (node,))
2629
        n_offline += 1
2630
        continue
2631

    
2632
      if node == master_node:
2633
        ntype = "master"
2634
      elif node_i.master_candidate:
2635
        ntype = "master candidate"
2636
      elif node_i.drained:
2637
        ntype = "drained"
2638
        n_drained += 1
2639
      else:
2640
        ntype = "regular"
2641
      if verbose:
2642
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2643

    
2644
      msg = all_nvinfo[node].fail_msg
2645
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2646
      if msg:
2647
        nimg.rpc_fail = True
2648
        continue
2649

    
2650
      nresult = all_nvinfo[node].payload
2651

    
2652
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2653
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2654
      self._VerifyNodeNetwork(node_i, nresult)
2655
      self._VerifyOob(node_i, nresult)
2656

    
2657
      if nimg.vm_capable:
2658
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2659
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2660
                             all_drbd_map)
2661

    
2662
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2663
        self._UpdateNodeInstances(node_i, nresult, nimg)
2664
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2665
        self._UpdateNodeOS(node_i, nresult, nimg)
2666

    
2667
        if not nimg.os_fail:
2668
          if refos_img is None:
2669
            refos_img = nimg
2670
          self._VerifyNodeOS(node_i, nimg, refos_img)
2671
        self._VerifyNodeBridges(node_i, nresult, bridges)
2672

    
2673
        # Check whether all running instancies are primary for the node. (This
2674
        # can no longer be done from _VerifyInstance below, since some of the
2675
        # wrong instances could be from other node groups.)
2676
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2677

    
2678
        for inst in non_primary_inst:
2679
          test = inst in self.all_inst_info
2680
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2681
                   "instance should not run on node %s", node_i.name)
2682
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2683
                   "node is running unknown instance %s", inst)
2684

    
2685
    for node, result in extra_lv_nvinfo.items():
2686
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2687
                              node_image[node], vg_name)
2688

    
2689
    feedback_fn("* Verifying instance status")
2690
    for instance in self.my_inst_names:
2691
      if verbose:
2692
        feedback_fn("* Verifying instance %s" % instance)
2693
      inst_config = self.my_inst_info[instance]
2694
      self._VerifyInstance(instance, inst_config, node_image,
2695
                           instdisk[instance])
2696
      inst_nodes_offline = []
2697

    
2698
      pnode = inst_config.primary_node
2699
      pnode_img = node_image[pnode]
2700
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2701
               self.ENODERPC, pnode, "instance %s, connection to"
2702
               " primary node failed", instance)
2703

    
2704
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2705
               self.EINSTANCEBADNODE, instance,
2706
               "instance is marked as running and lives on offline node %s",
2707
               inst_config.primary_node)
2708

    
2709
      # If the instance is non-redundant we cannot survive losing its primary
2710
      # node, so we are not N+1 compliant. On the other hand we have no disk
2711
      # templates with more than one secondary so that situation is not well
2712
      # supported either.
2713
      # FIXME: does not support file-backed instances
2714
      if not inst_config.secondary_nodes:
2715
        i_non_redundant.append(instance)
2716

    
2717
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2718
               instance, "instance has multiple secondary nodes: %s",
2719
               utils.CommaJoin(inst_config.secondary_nodes),
2720
               code=self.ETYPE_WARNING)
2721

    
2722
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2723
        pnode = inst_config.primary_node
2724
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2725
        instance_groups = {}
2726

    
2727
        for node in instance_nodes:
2728
          instance_groups.setdefault(self.all_node_info[node].group,
2729
                                     []).append(node)
2730

    
2731
        pretty_list = [
2732
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2733
          # Sort so that we always list the primary node first.
2734
          for group, nodes in sorted(instance_groups.items(),
2735
                                     key=lambda (_, nodes): pnode in nodes,
2736
                                     reverse=True)]
2737

    
2738
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2739
                      instance, "instance has primary and secondary nodes in"
2740
                      " different groups: %s", utils.CommaJoin(pretty_list),
2741
                      code=self.ETYPE_WARNING)
2742

    
2743
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2744
        i_non_a_balanced.append(instance)
2745

    
2746
      for snode in inst_config.secondary_nodes:
2747
        s_img = node_image[snode]
2748
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2749
                 "instance %s, connection to secondary node failed", instance)
2750

    
2751
        if s_img.offline:
2752
          inst_nodes_offline.append(snode)
2753

    
2754
      # warn that the instance lives on offline nodes
2755
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2756
               "instance has offline secondary node(s) %s",
2757
               utils.CommaJoin(inst_nodes_offline))
2758
      # ... or ghost/non-vm_capable nodes
2759
      for node in inst_config.all_nodes:
2760
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2761
                 "instance lives on ghost node %s", node)
2762
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2763
                 instance, "instance lives on non-vm_capable node %s", node)
2764

    
2765
    feedback_fn("* Verifying orphan volumes")
2766
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2767

    
2768
    # We will get spurious "unknown volume" warnings if any node of this group
2769
    # is secondary for an instance whose primary is in another group. To avoid
2770
    # them, we find these instances and add their volumes to node_vol_should.
2771
    for inst in self.all_inst_info.values():
2772
      for secondary in inst.secondary_nodes:
2773
        if (secondary in self.my_node_info
2774
            and inst.name not in self.my_inst_info):
2775
          inst.MapLVsByNode(node_vol_should)
2776
          break
2777

    
2778
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2779

    
2780
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2781
      feedback_fn("* Verifying N+1 Memory redundancy")
2782
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2783

    
2784
    feedback_fn("* Other Notes")
2785
    if i_non_redundant:
2786
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2787
                  % len(i_non_redundant))
2788

    
2789
    if i_non_a_balanced:
2790
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2791
                  % len(i_non_a_balanced))
2792

    
2793
    if n_offline:
2794
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2795

    
2796
    if n_drained:
2797
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2798

    
2799
    return not self.bad
2800

    
2801
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2802
    """Analyze the post-hooks' result
2803

2804
    This method analyses the hook result, handles it, and sends some
2805
    nicely-formatted feedback back to the user.
2806

2807
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2808
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2809
    @param hooks_results: the results of the multi-node hooks rpc call
2810
    @param feedback_fn: function used send feedback back to the caller
2811
    @param lu_result: previous Exec result
2812
    @return: the new Exec result, based on the previous result
2813
        and hook results
2814

2815
    """
2816
    # We only really run POST phase hooks, only for non-empty groups,
2817
    # and are only interested in their results
2818
    if not self.my_node_names:
2819
      # empty node group
2820
      pass
2821
    elif phase == constants.HOOKS_PHASE_POST:
2822
      # Used to change hooks' output to proper indentation
2823
      feedback_fn("* Hooks Results")
2824
      assert hooks_results, "invalid result from hooks"
2825

    
2826
      for node_name in hooks_results:
2827
        res = hooks_results[node_name]
2828
        msg = res.fail_msg
2829
        test = msg and not res.offline
2830
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2831
                      "Communication failure in hooks execution: %s", msg)
2832
        if res.offline or msg:
2833
          # No need to investigate payload if node is offline or gave an error.
2834
          # override manually lu_result here as _ErrorIf only
2835
          # overrides self.bad
2836
          lu_result = 1
2837
          continue
2838
        for script, hkr, output in res.payload:
2839
          test = hkr == constants.HKR_FAIL
2840
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2841
                        "Script %s failed, output:", script)
2842
          if test:
2843
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2844
            feedback_fn("%s" % output)
2845
            lu_result = 0
2846

    
2847
    return lu_result
2848

    
2849

    
2850
class LUClusterVerifyDisks(NoHooksLU):
2851
  """Verifies the cluster disks status.
2852

2853
  """
2854
  REQ_BGL = False
2855

    
2856
  def ExpandNames(self):
2857
    self.needed_locks = {
2858
      locking.LEVEL_NODE: locking.ALL_SET,
2859
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2860
    }
2861
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2862

    
2863
  def Exec(self, feedback_fn):
2864
    """Verify integrity of cluster disks.
2865

2866
    @rtype: tuple of three items
2867
    @return: a tuple of (dict of node-to-node_error, list of instances
2868
        which need activate-disks, dict of instance: (node, volume) for
2869
        missing volumes
2870

2871
    """
2872
    result = res_nodes, res_instances, res_missing = {}, [], {}
2873

    
2874
    nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2875
    instances = self.cfg.GetAllInstancesInfo().values()
2876

    
2877
    nv_dict = {}
2878
    for inst in instances:
2879
      inst_lvs = {}
2880
      if not inst.admin_up:
2881
        continue
2882
      inst.MapLVsByNode(inst_lvs)
2883
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2884
      for node, vol_list in inst_lvs.iteritems():
2885
        for vol in vol_list:
2886
          nv_dict[(node, vol)] = inst
2887

    
2888
    if not nv_dict:
2889
      return result
2890

    
2891
    node_lvs = self.rpc.call_lv_list(nodes, [])
2892
    for node, node_res in node_lvs.items():
2893
      if node_res.offline:
2894
        continue
2895
      msg = node_res.fail_msg
2896
      if msg:
2897
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2898
        res_nodes[node] = msg
2899
        continue
2900

    
2901
      lvs = node_res.payload
2902
      for lv_name, (_, _, lv_online) in lvs.items():
2903
        inst = nv_dict.pop((node, lv_name), None)
2904
        if (not lv_online and inst is not None
2905
            and inst.name not in res_instances):
2906
          res_instances.append(inst.name)
2907

    
2908
    # any leftover items in nv_dict are missing LVs, let's arrange the
2909
    # data better
2910
    for key, inst in nv_dict.iteritems():
2911
      if inst.name not in res_missing:
2912
        res_missing[inst.name] = []
2913
      res_missing[inst.name].append(key)
2914

    
2915
    return result
2916

    
2917

    
2918
class LUClusterRepairDiskSizes(NoHooksLU):
2919
  """Verifies the cluster disks sizes.
2920

2921
  """
2922
  REQ_BGL = False
2923

    
2924
  def ExpandNames(self):
2925
    if self.op.instances:
2926
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
2927
      self.needed_locks = {
2928
        locking.LEVEL_NODE: [],
2929
        locking.LEVEL_INSTANCE: self.wanted_names,
2930
        }
2931
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2932
    else:
2933
      self.wanted_names = None
2934
      self.needed_locks = {
2935
        locking.LEVEL_NODE: locking.ALL_SET,
2936
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2937
        }
2938
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2939

    
2940
  def DeclareLocks(self, level):
2941
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2942
      self._LockInstancesNodes(primary_only=True)
2943

    
2944
  def CheckPrereq(self):
2945
    """Check prerequisites.
2946

2947
    This only checks the optional instance list against the existing names.
2948

2949
    """
2950
    if self.wanted_names is None:
2951
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
2952

    
2953
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2954
                             in self.wanted_names]
2955

    
2956
  def _EnsureChildSizes(self, disk):
2957
    """Ensure children of the disk have the needed disk size.
2958

2959
    This is valid mainly for DRBD8 and fixes an issue where the
2960
    children have smaller disk size.
2961

2962
    @param disk: an L{ganeti.objects.Disk} object
2963

2964
    """
2965
    if disk.dev_type == constants.LD_DRBD8:
2966
      assert disk.children, "Empty children for DRBD8?"
2967
      fchild = disk.children[0]
2968
      mismatch = fchild.size < disk.size
2969
      if mismatch:
2970
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2971
                     fchild.size, disk.size)
2972
        fchild.size = disk.size
2973

    
2974
      # and we recurse on this child only, not on the metadev
2975
      return self._EnsureChildSizes(fchild) or mismatch
2976
    else:
2977
      return False
2978

    
2979
  def Exec(self, feedback_fn):
2980
    """Verify the size of cluster disks.
2981

2982
    """
2983
    # TODO: check child disks too
2984
    # TODO: check differences in size between primary/secondary nodes
2985
    per_node_disks = {}
2986
    for instance in self.wanted_instances:
2987
      pnode = instance.primary_node
2988
      if pnode not in per_node_disks:
2989
        per_node_disks[pnode] = []
2990
      for idx, disk in enumerate(instance.disks):
2991
        per_node_disks[pnode].append((instance, idx, disk))
2992

    
2993
    changed = []
2994
    for node, dskl in per_node_disks.items():
2995
      newl = [v[2].Copy() for v in dskl]
2996
      for dsk in newl:
2997
        self.cfg.SetDiskID(dsk, node)
2998
      result = self.rpc.call_blockdev_getsize(node, newl)
2999
      if result.fail_msg:
3000
        self.LogWarning("Failure in blockdev_getsize call to node"
3001
                        " %s, ignoring", node)
3002
        continue
3003
      if len(result.payload) != len(dskl):
3004
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3005
                        " result.payload=%s", node, len(dskl), result.payload)
3006
        self.LogWarning("Invalid result from node %s, ignoring node results",
3007
                        node)
3008
        continue
3009
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3010
        if size is None:
3011
          self.LogWarning("Disk %d of instance %s did not return size"
3012
                          " information, ignoring", idx, instance.name)
3013
          continue
3014
        if not isinstance(size, (int, long)):
3015
          self.LogWarning("Disk %d of instance %s did not return valid"
3016
                          " size information, ignoring", idx, instance.name)
3017
          continue
3018
        size = size >> 20
3019
        if size != disk.size:
3020
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3021
                       " correcting: recorded %d, actual %d", idx,
3022
                       instance.name, disk.size, size)
3023
          disk.size = size
3024
          self.cfg.Update(instance, feedback_fn)
3025
          changed.append((instance.name, idx, size))
3026
        if self._EnsureChildSizes(disk):
3027
          self.cfg.Update(instance, feedback_fn)
3028
          changed.append((instance.name, idx, disk.size))
3029
    return changed
3030

    
3031

    
3032
class LUClusterRename(LogicalUnit):
3033
  """Rename the cluster.
3034

3035
  """
3036
  HPATH = "cluster-rename"
3037
  HTYPE = constants.HTYPE_CLUSTER
3038

    
3039
  def BuildHooksEnv(self):
3040
    """Build hooks env.
3041

3042
    """
3043
    return {
3044
      "OP_TARGET": self.cfg.GetClusterName(),
3045
      "NEW_NAME": self.op.name,
3046
      }
3047

    
3048
  def BuildHooksNodes(self):
3049
    """Build hooks nodes.
3050

3051
    """
3052
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3053

    
3054
  def CheckPrereq(self):
3055
    """Verify that the passed name is a valid one.
3056

3057
    """
3058
    hostname = netutils.GetHostname(name=self.op.name,
3059
                                    family=self.cfg.GetPrimaryIPFamily())
3060

    
3061
    new_name = hostname.name
3062
    self.ip = new_ip = hostname.ip
3063
    old_name = self.cfg.GetClusterName()
3064
    old_ip = self.cfg.GetMasterIP()
3065
    if new_name == old_name and new_ip == old_ip:
3066
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3067
                                 " cluster has changed",
3068
                                 errors.ECODE_INVAL)
3069
    if new_ip != old_ip:
3070
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3071
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3072
                                   " reachable on the network" %
3073
                                   new_ip, errors.ECODE_NOTUNIQUE)
3074

    
3075
    self.op.name = new_name
3076

    
3077
  def Exec(self, feedback_fn):
3078
    """Rename the cluster.
3079

3080
    """
3081
    clustername = self.op.name
3082
    ip = self.ip
3083

    
3084
    # shutdown the master IP
3085
    master = self.cfg.GetMasterNode()
3086
    result = self.rpc.call_node_stop_master(master, False)
3087
    result.Raise("Could not disable the master role")
3088

    
3089
    try:
3090
      cluster = self.cfg.GetClusterInfo()
3091
      cluster.cluster_name = clustername
3092
      cluster.master_ip = ip
3093
      self.cfg.Update(cluster, feedback_fn)
3094

    
3095
      # update the known hosts file
3096
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3097
      node_list = self.cfg.GetOnlineNodeList()
3098
      try:
3099
        node_list.remove(master)
3100
      except ValueError:
3101
        pass
3102
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3103
    finally:
3104
      result = self.rpc.call_node_start_master(master, False, False)
3105
      msg = result.fail_msg
3106
      if msg:
3107
        self.LogWarning("Could not re-enable the master role on"
3108
                        " the master, please restart manually: %s", msg)
3109

    
3110
    return clustername
3111

    
3112

    
3113
class LUClusterSetParams(LogicalUnit):
3114
  """Change the parameters of the cluster.
3115

3116
  """
3117
  HPATH = "cluster-modify"
3118
  HTYPE = constants.HTYPE_CLUSTER
3119
  REQ_BGL = False
3120

    
3121
  def CheckArguments(self):
3122
    """Check parameters
3123

3124
    """
3125
    if self.op.uid_pool:
3126
      uidpool.CheckUidPool(self.op.uid_pool)
3127

    
3128
    if self.op.add_uids:
3129
      uidpool.CheckUidPool(self.op.add_uids)
3130

    
3131
    if self.op.remove_uids:
3132
      uidpool.CheckUidPool(self.op.remove_uids)
3133

    
3134
  def ExpandNames(self):
3135
    # FIXME: in the future maybe other cluster params won't require checking on
3136
    # all nodes to be modified.
3137
    self.needed_locks = {
3138
      locking.LEVEL_NODE: locking.ALL_SET,
3139
    }
3140
    self.share_locks[locking.LEVEL_NODE] = 1
3141

    
3142
  def BuildHooksEnv(self):
3143
    """Build hooks env.
3144

3145
    """
3146
    return {
3147
      "OP_TARGET": self.cfg.GetClusterName(),
3148
      "NEW_VG_NAME": self.op.vg_name,
3149
      }
3150

    
3151
  def BuildHooksNodes(self):
3152
    """Build hooks nodes.
3153

3154
    """
3155
    mn = self.cfg.GetMasterNode()
3156
    return ([mn], [mn])
3157

    
3158
  def CheckPrereq(self):
3159
    """Check prerequisites.
3160

3161
    This checks whether the given params don't conflict and
3162
    if the given volume group is valid.
3163

3164
    """
3165
    if self.op.vg_name is not None and not self.op.vg_name:
3166
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3167
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3168
                                   " instances exist", errors.ECODE_INVAL)
3169

    
3170
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3171
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3172
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3173
                                   " drbd-based instances exist",
3174
                                   errors.ECODE_INVAL)
3175

    
3176
    node_list = self.glm.list_owned(locking.LEVEL_NODE)
3177

    
3178
    # if vg_name not None, checks given volume group on all nodes
3179
    if self.op.vg_name:
3180
      vglist = self.rpc.call_vg_list(node_list)
3181
      for node in node_list:
3182
        msg = vglist[node].fail_msg
3183
        if msg:
3184
          # ignoring down node
3185
          self.LogWarning("Error while gathering data on node %s"
3186
                          " (ignoring node): %s", node, msg)
3187
          continue
3188
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3189
                                              self.op.vg_name,
3190
                                              constants.MIN_VG_SIZE)
3191
        if vgstatus:
3192
          raise errors.OpPrereqError("Error on node '%s': %s" %
3193
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3194

    
3195
    if self.op.drbd_helper:
3196
      # checks given drbd helper on all nodes
3197
      helpers = self.rpc.call_drbd_helper(node_list)
3198
      for node in node_list:
3199
        ninfo = self.cfg.GetNodeInfo(node)
3200
        if ninfo.offline:
3201
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3202
          continue
3203
        msg = helpers[node].fail_msg
3204
        if msg:
3205
          raise errors.OpPrereqError("Error checking drbd helper on node"
3206
                                     " '%s': %s" % (node, msg),
3207
                                     errors.ECODE_ENVIRON)
3208
        node_helper = helpers[node].payload
3209
        if node_helper != self.op.drbd_helper:
3210
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3211
                                     (node, node_helper), errors.ECODE_ENVIRON)
3212

    
3213
    self.cluster = cluster = self.cfg.GetClusterInfo()
3214
    # validate params changes
3215
    if self.op.beparams:
3216
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3217
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3218

    
3219
    if self.op.ndparams:
3220
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3221
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3222

    
3223
      # TODO: we need a more general way to handle resetting
3224
      # cluster-level parameters to default values
3225
      if self.new_ndparams["oob_program"] == "":
3226
        self.new_ndparams["oob_program"] = \
3227
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3228

    
3229
    if self.op.nicparams:
3230
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3231
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3232
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3233
      nic_errors = []
3234

    
3235
      # check all instances for consistency
3236
      for instance in self.cfg.GetAllInstancesInfo().values():
3237
        for nic_idx, nic in enumerate(instance.nics):
3238
          params_copy = copy.deepcopy(nic.nicparams)
3239
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3240

    
3241
          # check parameter syntax
3242
          try:
3243
            objects.NIC.CheckParameterSyntax(params_filled)
3244
          except errors.ConfigurationError, err:
3245
            nic_errors.append("Instance %s, nic/%d: %s" %
3246
                              (instance.name, nic_idx, err))
3247

    
3248
          # if we're moving instances to routed, check that they have an ip
3249
          target_mode = params_filled[constants.NIC_MODE]
3250
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3251
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3252
                              " address" % (instance.name, nic_idx))
3253
      if nic_errors:
3254
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3255
                                   "\n".join(nic_errors))
3256

    
3257
    # hypervisor list/parameters
3258
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3259
    if self.op.hvparams:
3260
      for hv_name, hv_dict in self.op.hvparams.items():
3261
        if hv_name not in self.new_hvparams:
3262
          self.new_hvparams[hv_name] = hv_dict
3263
        else:
3264
          self.new_hvparams[hv_name].update(hv_dict)
3265

    
3266
    # os hypervisor parameters
3267
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3268
    if self.op.os_hvp:
3269
      for os_name, hvs in self.op.os_hvp.items():
3270
        if os_name not in self.new_os_hvp:
3271
          self.new_os_hvp[os_name] = hvs
3272
        else:
3273
          for hv_name, hv_dict in hvs.items():
3274
            if hv_name not in self.new_os_hvp[os_name]:
3275
              self.new_os_hvp[os_name][hv_name] = hv_dict
3276
            else:
3277
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3278

    
3279
    # os parameters
3280
    self.new_osp = objects.FillDict(cluster.osparams, {})
3281
    if self.op.osparams:
3282
      for os_name, osp in self.op.osparams.items():
3283
        if os_name not in self.new_osp:
3284
          self.new_osp[os_name] = {}
3285

    
3286
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3287
                                                  use_none=True)
3288

    
3289
        if not self.new_osp[os_name]:
3290
          # we removed all parameters
3291
          del self.new_osp[os_name]
3292
        else:
3293
          # check the parameter validity (remote check)
3294
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3295
                         os_name, self.new_osp[os_name])
3296

    
3297
    # changes to the hypervisor list
3298
    if self.op.enabled_hypervisors is not None:
3299
      self.hv_list = self.op.enabled_hypervisors
3300
      for hv in self.hv_list:
3301
        # if the hypervisor doesn't already exist in the cluster
3302
        # hvparams, we initialize it to empty, and then (in both
3303
        # cases) we make sure to fill the defaults, as we might not
3304
        # have a complete defaults list if the hypervisor wasn't
3305
        # enabled before
3306
        if hv not in new_hvp:
3307
          new_hvp[hv] = {}
3308
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3309
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3310
    else:
3311
      self.hv_list = cluster.enabled_hypervisors
3312

    
3313
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3314
      # either the enabled list has changed, or the parameters have, validate
3315
      for hv_name, hv_params in self.new_hvparams.items():
3316
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3317
            (self.op.enabled_hypervisors and
3318
             hv_name in self.op.enabled_hypervisors)):
3319
          # either this is a new hypervisor, or its parameters have changed
3320
          hv_class = hypervisor.GetHypervisor(hv_name)
3321
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3322
          hv_class.CheckParameterSyntax(hv_params)
3323
          _CheckHVParams(self, node_list, hv_name, hv_params)
3324

    
3325
    if self.op.os_hvp:
3326
      # no need to check any newly-enabled hypervisors, since the
3327
      # defaults have already been checked in the above code-block
3328
      for os_name, os_hvp in self.new_os_hvp.items():
3329
        for hv_name, hv_params in os_hvp.items():
3330
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3331
          # we need to fill in the new os_hvp on top of the actual hv_p
3332
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3333
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3334
          hv_class = hypervisor.GetHypervisor(hv_name)
3335
          hv_class.CheckParameterSyntax(new_osp)
3336
          _CheckHVParams(self, node_list, hv_name, new_osp)
3337

    
3338
    if self.op.default_iallocator:
3339
      alloc_script = utils.FindFile(self.op.default_iallocator,
3340
                                    constants.IALLOCATOR_SEARCH_PATH,
3341
                                    os.path.isfile)
3342
      if alloc_script is None:
3343
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3344
                                   " specified" % self.op.default_iallocator,
3345
                                   errors.ECODE_INVAL)
3346

    
3347
  def Exec(self, feedback_fn):
3348
    """Change the parameters of the cluster.
3349

3350
    """
3351
    if self.op.vg_name is not None:
3352
      new_volume = self.op.vg_name
3353
      if not new_volume:
3354
        new_volume = None
3355
      if new_volume != self.cfg.GetVGName():
3356
        self.cfg.SetVGName(new_volume)
3357
      else:
3358
        feedback_fn("Cluster LVM configuration already in desired"
3359
                    " state, not changing")
3360
    if self.op.drbd_helper is not None:
3361
      new_helper = self.op.drbd_helper
3362
      if not new_helper:
3363
        new_helper = None
3364
      if new_helper != self.cfg.GetDRBDHelper():
3365
        self.cfg.SetDRBDHelper(new_helper)
3366
      else:
3367
        feedback_fn("Cluster DRBD helper already in desired state,"
3368
                    " not changing")
3369
    if self.op.hvparams:
3370
      self.cluster.hvparams = self.new_hvparams
3371
    if self.op.os_hvp:
3372
      self.cluster.os_hvp = self.new_os_hvp
3373
    if self.op.enabled_hypervisors is not None:
3374
      self.cluster.hvparams = self.new_hvparams
3375
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3376
    if self.op.beparams:
3377
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3378
    if self.op.nicparams:
3379
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3380
    if self.op.osparams:
3381
      self.cluster.osparams = self.new_osp
3382
    if self.op.ndparams:
3383
      self.cluster.ndparams = self.new_ndparams
3384

    
3385
    if self.op.candidate_pool_size is not None:
3386
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3387
      # we need to update the pool size here, otherwise the save will fail
3388
      _AdjustCandidatePool(self, [])
3389

    
3390
    if self.op.maintain_node_health is not None:
3391
      self.cluster.maintain_node_health = self.op.maintain_node_health
3392

    
3393
    if self.op.prealloc_wipe_disks is not None:
3394
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3395

    
3396
    if self.op.add_uids is not None:
3397
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3398

    
3399
    if self.op.remove_uids is not None:
3400
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3401

    
3402
    if self.op.uid_pool is not None:
3403
      self.cluster.uid_pool = self.op.uid_pool
3404

    
3405
    if self.op.default_iallocator is not None:
3406
      self.cluster.default_iallocator = self.op.default_iallocator
3407

    
3408
    if self.op.reserved_lvs is not None:
3409
      self.cluster.reserved_lvs = self.op.reserved_lvs
3410

    
3411
    def helper_os(aname, mods, desc):
3412
      desc += " OS list"
3413
      lst = getattr(self.cluster, aname)
3414
      for key, val in mods:
3415
        if key == constants.DDM_ADD:
3416
          if val in lst:
3417
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3418
          else:
3419
            lst.append(val)
3420
        elif key == constants.DDM_REMOVE:
3421
          if val in lst:
3422
            lst.remove(val)
3423
          else:
3424
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3425
        else:
3426
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3427

    
3428
    if self.op.hidden_os:
3429
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3430

    
3431
    if self.op.blacklisted_os:
3432
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3433

    
3434
    if self.op.master_netdev:
3435
      master = self.cfg.GetMasterNode()
3436
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3437
                  self.cluster.master_netdev)
3438
      result = self.rpc.call_node_stop_master(master, False)
3439
      result.Raise("Could not disable the master ip")
3440
      feedback_fn("Changing master_netdev from %s to %s" %
3441
                  (self.cluster.master_netdev, self.op.master_netdev))
3442
      self.cluster.master_netdev = self.op.master_netdev
3443

    
3444
    self.cfg.Update(self.cluster, feedback_fn)
3445

    
3446
    if self.op.master_netdev:
3447
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3448
                  self.op.master_netdev)
3449
      result = self.rpc.call_node_start_master(master, False, False)
3450
      if result.fail_msg:
3451
        self.LogWarning("Could not re-enable the master ip on"
3452
                        " the master, please restart manually: %s",
3453
                        result.fail_msg)
3454

    
3455

    
3456
def _UploadHelper(lu, nodes, fname):
3457
  """Helper for uploading a file and showing warnings.
3458

3459
  """
3460
  if os.path.exists(fname):
3461
    result = lu.rpc.call_upload_file(nodes, fname)
3462
    for to_node, to_result in result.items():
3463
      msg = to_result.fail_msg
3464
      if msg:
3465
        msg = ("Copy of file %s to node %s failed: %s" %
3466
               (fname, to_node, msg))
3467
        lu.proc.LogWarning(msg)
3468

    
3469

    
3470
def _ComputeAncillaryFiles(cluster, redist):
3471
  """Compute files external to Ganeti which need to be consistent.
3472

3473
  @type redist: boolean
3474
  @param redist: Whether to include files which need to be redistributed
3475

3476
  """
3477
  # Compute files for all nodes
3478
  files_all = set([
3479
    constants.SSH_KNOWN_HOSTS_FILE,
3480
    constants.CONFD_HMAC_KEY,
3481
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3482
    ])
3483

    
3484
  if not redist:
3485
    files_all.update(constants.ALL_CERT_FILES)
3486
    files_all.update(ssconf.SimpleStore().GetFileList())
3487

    
3488
  if cluster.modify_etc_hosts:
3489
    files_all.add(constants.ETC_HOSTS)
3490

    
3491
  # Files which must either exist on all nodes or on none
3492
  files_all_opt = set([
3493
    constants.RAPI_USERS_FILE,
3494
    ])
3495

    
3496
  # Files which should only be on master candidates
3497
  files_mc = set()
3498
  if not redist:
3499
    files_mc.add(constants.CLUSTER_CONF_FILE)
3500

    
3501
  # Files which should only be on VM-capable nodes
3502
  files_vm = set(filename
3503
    for hv_name in cluster.enabled_hypervisors
3504
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3505

    
3506
  # Filenames must be unique
3507
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3508
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3509
         "Found file listed in more than one file list"
3510

    
3511
  return (files_all, files_all_opt, files_mc, files_vm)
3512

    
3513

    
3514
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3515
  """Distribute additional files which are part of the cluster configuration.
3516

3517
  ConfigWriter takes care of distributing the config and ssconf files, but
3518
  there are more files which should be distributed to all nodes. This function
3519
  makes sure those are copied.
3520

3521
  @param lu: calling logical unit
3522
  @param additional_nodes: list of nodes not in the config to distribute to
3523
  @type additional_vm: boolean
3524
  @param additional_vm: whether the additional nodes are vm-capable or not
3525

3526
  """
3527
  # Gather target nodes
3528
  cluster = lu.cfg.GetClusterInfo()
3529
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3530

    
3531
  online_nodes = lu.cfg.GetOnlineNodeList()
3532
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3533

    
3534
  if additional_nodes is not None:
3535
    online_nodes.extend(additional_nodes)
3536
    if additional_vm:
3537
      vm_nodes.extend(additional_nodes)
3538

    
3539
  # Never distribute to master node
3540
  for nodelist in [online_nodes, vm_nodes]:
3541
    if master_info.name in nodelist:
3542
      nodelist.remove(master_info.name)
3543

    
3544
  # Gather file lists
3545
  (files_all, files_all_opt, files_mc, files_vm) = \
3546
    _ComputeAncillaryFiles(cluster, True)
3547

    
3548
  # Never re-distribute configuration file from here
3549
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3550
              constants.CLUSTER_CONF_FILE in files_vm)
3551
  assert not files_mc, "Master candidates not handled in this function"
3552

    
3553
  filemap = [
3554
    (online_nodes, files_all),
3555
    (online_nodes, files_all_opt),
3556
    (vm_nodes, files_vm),
3557
    ]
3558

    
3559
  # Upload the files
3560
  for (node_list, files) in filemap:
3561
    for fname in files:
3562
      _UploadHelper(lu, node_list, fname)
3563

    
3564

    
3565
class LUClusterRedistConf(NoHooksLU):
3566
  """Force the redistribution of cluster configuration.
3567

3568
  This is a very simple LU.
3569

3570
  """
3571
  REQ_BGL = False
3572

    
3573
  def ExpandNames(self):
3574
    self.needed_locks = {
3575
      locking.LEVEL_NODE: locking.ALL_SET,
3576
    }
3577
    self.share_locks[locking.LEVEL_NODE] = 1
3578

    
3579
  def Exec(self, feedback_fn):
3580
    """Redistribute the configuration.
3581

3582
    """
3583
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3584
    _RedistributeAncillaryFiles(self)
3585

    
3586

    
3587
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3588
  """Sleep and poll for an instance's disk to sync.
3589

3590
  """
3591
  if not instance.disks or disks is not None and not disks:
3592
    return True
3593

    
3594
  disks = _ExpandCheckDisks(instance, disks)
3595

    
3596
  if not oneshot:
3597
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3598

    
3599
  node = instance.primary_node
3600

    
3601
  for dev in disks:
3602
    lu.cfg.SetDiskID(dev, node)
3603

    
3604
  # TODO: Convert to utils.Retry
3605

    
3606
  retries = 0
3607
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3608
  while True:
3609
    max_time = 0
3610
    done = True
3611
    cumul_degraded = False
3612
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3613
    msg = rstats.fail_msg
3614
    if msg:
3615
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3616
      retries += 1
3617
      if retries >= 10:
3618
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3619
                                 " aborting." % node)
3620
      time.sleep(6)
3621
      continue
3622
    rstats = rstats.payload
3623
    retries = 0
3624
    for i, mstat in enumerate(rstats):
3625
      if mstat is None:
3626
        lu.LogWarning("Can't compute data for node %s/%s",
3627
                           node, disks[i].iv_name)
3628
        continue
3629

    
3630
      cumul_degraded = (cumul_degraded or
3631
                        (mstat.is_degraded and mstat.sync_percent is None))
3632
      if mstat.sync_percent is not None:
3633
        done = False
3634
        if mstat.estimated_time is not None:
3635
          rem_time = ("%s remaining (estimated)" %
3636
                      utils.FormatSeconds(mstat.estimated_time))
3637
          max_time = mstat.estimated_time
3638
        else:
3639
          rem_time = "no time estimate"
3640
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3641
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3642

    
3643
    # if we're done but degraded, let's do a few small retries, to
3644
    # make sure we see a stable and not transient situation; therefore
3645
    # we force restart of the loop
3646
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3647
      logging.info("Degraded disks found, %d retries left", degr_retries)
3648
      degr_retries -= 1
3649
      time.sleep(1)
3650
      continue
3651

    
3652
    if done or oneshot:
3653
      break
3654

    
3655
    time.sleep(min(60, max_time))
3656

    
3657
  if done:
3658
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3659
  return not cumul_degraded
3660

    
3661

    
3662
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3663
  """Check that mirrors are not degraded.
3664

3665
  The ldisk parameter, if True, will change the test from the
3666
  is_degraded attribute (which represents overall non-ok status for
3667
  the device(s)) to the ldisk (representing the local storage status).
3668

3669
  """
3670
  lu.cfg.SetDiskID(dev, node)
3671

    
3672
  result = True
3673

    
3674
  if on_primary or dev.AssembleOnSecondary():
3675
    rstats = lu.rpc.call_blockdev_find(node, dev)
3676
    msg = rstats.fail_msg
3677
    if msg:
3678
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3679
      result = False
3680
    elif not rstats.payload:
3681
      lu.LogWarning("Can't find disk on node %s", node)
3682
      result = False
3683
    else:
3684
      if ldisk:
3685
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3686
      else:
3687
        result = result and not rstats.payload.is_degraded
3688

    
3689
  if dev.children:
3690
    for child in dev.children:
3691
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3692

    
3693
  return result
3694

    
3695

    
3696
class LUOobCommand(NoHooksLU):
3697
  """Logical unit for OOB handling.
3698

3699
  """
3700
  REG_BGL = False
3701
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3702

    
3703
  def ExpandNames(self):
3704
    """Gather locks we need.
3705

3706
    """
3707
    if self.op.node_names:
3708
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3709
      lock_names = self.op.node_names
3710
    else:
3711
      lock_names = locking.ALL_SET
3712

    
3713
    self.needed_locks = {
3714
      locking.LEVEL_NODE: lock_names,
3715
      }
3716

    
3717
  def CheckPrereq(self):
3718
    """Check prerequisites.
3719

3720
    This checks:
3721
     - the node exists in the configuration
3722
     - OOB is supported
3723

3724
    Any errors are signaled by raising errors.OpPrereqError.
3725

3726
    """
3727
    self.nodes = []
3728
    self.master_node = self.cfg.GetMasterNode()
3729

    
3730
    assert self.op.power_delay >= 0.0
3731

    
3732
    if self.op.node_names:
3733
      if (self.op.command in self._SKIP_MASTER and
3734
          self.master_node in self.op.node_names):
3735
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3736
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3737

    
3738
        if master_oob_handler:
3739
          additional_text = ("run '%s %s %s' if you want to operate on the"
3740
                             " master regardless") % (master_oob_handler,
3741
                                                      self.op.command,
3742
                                                      self.master_node)
3743
        else:
3744
          additional_text = "it does not support out-of-band operations"
3745

    
3746
        raise errors.OpPrereqError(("Operating on the master node %s is not"
3747
                                    " allowed for %s; %s") %
3748
                                   (self.master_node, self.op.command,
3749
                                    additional_text), errors.ECODE_INVAL)
3750
    else:
3751
      self.op.node_names = self.cfg.GetNodeList()
3752
      if self.op.command in self._SKIP_MASTER:
3753
        self.op.node_names.remove(self.master_node)
3754

    
3755
    if self.op.command in self._SKIP_MASTER:
3756
      assert self.master_node not in self.op.node_names
3757

    
3758
    for node_name in self.op.node_names:
3759
      node = self.cfg.GetNodeInfo(node_name)
3760

    
3761
      if node is None:
3762
        raise errors.OpPrereqError("Node %s not found" % node_name,
3763
                                   errors.ECODE_NOENT)
3764
      else:
3765
        self.nodes.append(node)
3766

    
3767
      if (not self.op.ignore_status and
3768
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3769
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3770
                                    " not marked offline") % node_name,
3771
                                   errors.ECODE_STATE)
3772

    
3773
  def Exec(self, feedback_fn):
3774
    """Execute OOB and return result if we expect any.
3775

3776
    """
3777
    master_node = self.master_node
3778
    ret = []
3779

    
3780
    for idx, node in enumerate(utils.NiceSort(self.nodes,
3781
                                              key=lambda node: node.name)):
3782
      node_entry = [(constants.RS_NORMAL, node.name)]
3783
      ret.append(node_entry)
3784

    
3785
      oob_program = _SupportsOob(self.cfg, node)
3786

    
3787
      if not oob_program:
3788
        node_entry.append((constants.RS_UNAVAIL, None))
3789
        continue
3790

    
3791
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3792
                   self.op.command, oob_program, node.name)
3793
      result = self.rpc.call_run_oob(master_node, oob_program,
3794
                                     self.op.command, node.name,
3795
                                     self.op.timeout)
3796

    
3797
      if result.fail_msg:
3798
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3799
                        node.name, result.fail_msg)
3800
        node_entry.append((constants.RS_NODATA, None))
3801
      else:
3802
        try:
3803
          self._CheckPayload(result)
3804
        except errors.OpExecError, err:
3805
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
3806
                          node.name, err)
3807
          node_entry.append((constants.RS_NODATA, None))
3808
        else:
3809
          if self.op.command == constants.OOB_HEALTH:
3810
            # For health we should log important events
3811
            for item, status in result.payload:
3812
              if status in [constants.OOB_STATUS_WARNING,
3813
                            constants.OOB_STATUS_CRITICAL]:
3814
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
3815
                                item, node.name, status)
3816

    
3817
          if self.op.command == constants.OOB_POWER_ON:
3818
            node.powered = True
3819
          elif self.op.command == constants.OOB_POWER_OFF:
3820
            node.powered = False
3821
          elif self.op.command == constants.OOB_POWER_STATUS:
3822
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3823
            if powered != node.powered:
3824
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3825
                               " match actual power state (%s)"), node.powered,
3826
                              node.name, powered)
3827

    
3828
          # For configuration changing commands we should update the node
3829
          if self.op.command in (constants.OOB_POWER_ON,
3830
                                 constants.OOB_POWER_OFF):
3831
            self.cfg.Update(node, feedback_fn)
3832

    
3833
          node_entry.append((constants.RS_NORMAL, result.payload))
3834

    
3835
          if (self.op.command == constants.OOB_POWER_ON and
3836
              idx < len(self.nodes) - 1):
3837
            time.sleep(self.op.power_delay)
3838

    
3839
    return ret
3840

    
3841
  def _CheckPayload(self, result):
3842
    """Checks if the payload is valid.
3843

3844
    @param result: RPC result
3845
    @raises errors.OpExecError: If payload is not valid
3846

3847
    """
3848
    errs = []
3849
    if self.op.command == constants.OOB_HEALTH:
3850
      if not isinstance(result.payload, list):
3851
        errs.append("command 'health' is expected to return a list but got %s" %
3852
                    type(result.payload))
3853
      else:
3854
        for item, status in result.payload:
3855
          if status not in constants.OOB_STATUSES:
3856
            errs.append("health item '%s' has invalid status '%s'" %
3857
                        (item, status))
3858

    
3859
    if self.op.command == constants.OOB_POWER_STATUS:
3860
      if not isinstance(result.payload, dict):
3861
        errs.append("power-status is expected to return a dict but got %s" %
3862
                    type(result.payload))
3863

    
3864
    if self.op.command in [
3865
        constants.OOB_POWER_ON,
3866
        constants.OOB_POWER_OFF,
3867
        constants.OOB_POWER_CYCLE,
3868
        ]:
3869
      if result.payload is not None:
3870
        errs.append("%s is expected to not return payload but got '%s'" %
3871
                    (self.op.command, result.payload))
3872

    
3873
    if errs:
3874
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3875
                               utils.CommaJoin(errs))
3876

    
3877
class _OsQuery(_QueryBase):
3878
  FIELDS = query.OS_FIELDS
3879

    
3880
  def ExpandNames(self, lu):
3881
    # Lock all nodes in shared mode
3882
    # Temporary removal of locks, should be reverted later
3883
    # TODO: reintroduce locks when they are lighter-weight
3884
    lu.needed_locks = {}
3885
    #self.share_locks[locking.LEVEL_NODE] = 1
3886
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3887

    
3888
    # The following variables interact with _QueryBase._GetNames
3889
    if self.names:
3890
      self.wanted = self.names
3891
    else:
3892
      self.wanted = locking.ALL_SET
3893

    
3894
    self.do_locking = self.use_locking
3895

    
3896
  def DeclareLocks(self, lu, level):
3897
    pass
3898

    
3899
  @staticmethod
3900
  def _DiagnoseByOS(rlist):
3901
    """Remaps a per-node return list into an a per-os per-node dictionary
3902

3903
    @param rlist: a map with node names as keys and OS objects as values
3904

3905
    @rtype: dict
3906
    @return: a dictionary with osnames as keys and as value another
3907
        map, with nodes as keys and tuples of (path, status, diagnose,
3908
        variants, parameters, api_versions) as values, eg::
3909

3910
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3911
                                     (/srv/..., False, "invalid api")],
3912
                           "node2": [(/srv/..., True, "", [], [])]}
3913
          }
3914

3915
    """
3916
    all_os = {}
3917
    # we build here the list of nodes that didn't fail the RPC (at RPC
3918
    # level), so that nodes with a non-responding node daemon don't
3919
    # make all OSes invalid
3920
    good_nodes = [node_name for node_name in rlist
3921
                  if not rlist[node_name].fail_msg]
3922
    for node_name, nr in rlist.items():
3923
      if nr.fail_msg or not nr.payload:
3924
        continue
3925
      for (name, path, status, diagnose, variants,
3926
           params, api_versions) in nr.payload:
3927
        if name not in all_os:
3928
          # build a list of nodes for this os containing empty lists
3929
          # for each node in node_list
3930
          all_os[name] = {}
3931
          for nname in good_nodes:
3932
            all_os[name][nname] = []
3933
        # convert params from [name, help] to (name, help)
3934
        params = [tuple(v) for v in params]
3935
        all_os[name][node_name].append((path, status, diagnose,
3936
                                        variants, params, api_versions))
3937
    return all_os
3938

    
3939
  def _GetQueryData(self, lu):
3940
    """Computes the list of nodes and their attributes.
3941

3942
    """
3943
    # Locking is not used
3944
    assert not (compat.any(lu.glm.is_owned(level)
3945
                           for level in locking.LEVELS
3946
                           if level != locking.LEVEL_CLUSTER) or
3947
                self.do_locking or self.use_locking)
3948

    
3949
    valid_nodes = [node.name
3950
                   for node in lu.cfg.GetAllNodesInfo().values()
3951
                   if not node.offline and node.vm_capable]
3952
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3953
    cluster = lu.cfg.GetClusterInfo()
3954

    
3955
    data = {}
3956

    
3957
    for (os_name, os_data) in pol.items():
3958
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3959
                          hidden=(os_name in cluster.hidden_os),
3960
                          blacklisted=(os_name in cluster.blacklisted_os))
3961

    
3962
      variants = set()
3963
      parameters = set()
3964
      api_versions = set()
3965

    
3966
      for idx, osl in enumerate(os_data.values()):
3967
        info.valid = bool(info.valid and osl and osl[0][1])
3968
        if not info.valid:
3969
          break
3970

    
3971
        (node_variants, node_params, node_api) = osl[0][3:6]
3972
        if idx == 0:
3973
          # First entry
3974
          variants.update(node_variants)
3975
          parameters.update(node_params)
3976
          api_versions.update(node_api)
3977
        else:
3978
          # Filter out inconsistent values
3979
          variants.intersection_update(node_variants)
3980
          parameters.intersection_update(node_params)
3981
          api_versions.intersection_update(node_api)
3982

    
3983
      info.variants = list(variants)
3984
      info.parameters = list(parameters)
3985
      info.api_versions = list(api_versions)
3986

    
3987
      data[os_name] = info
3988

    
3989
    # Prepare data in requested order
3990
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3991
            if name in data]
3992

    
3993

    
3994
class LUOsDiagnose(NoHooksLU):
3995
  """Logical unit for OS diagnose/query.
3996

3997
  """
3998
  REQ_BGL = False
3999

    
4000
  @staticmethod
4001
  def _BuildFilter(fields, names):
4002
    """Builds a filter for querying OSes.
4003

4004
    """
4005
    name_filter = qlang.MakeSimpleFilter("name", names)
4006

    
4007
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4008
    # respective field is not requested
4009
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4010
                     for fname in ["hidden", "blacklisted"]
4011
                     if fname not in fields]
4012
    if "valid" not in fields:
4013
      status_filter.append([qlang.OP_TRUE, "valid"])
4014

    
4015
    if status_filter:
4016
      status_filter.insert(0, qlang.OP_AND)
4017
    else:
4018
      status_filter = None
4019

    
4020
    if name_filter and status_filter:
4021
      return [qlang.OP_AND, name_filter, status_filter]
4022
    elif name_filter:
4023
      return name_filter
4024
    else:
4025
      return status_filter
4026

    
4027
  def CheckArguments(self):
4028
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4029
                       self.op.output_fields, False)
4030

    
4031
  def ExpandNames(self):
4032
    self.oq.ExpandNames(self)
4033

    
4034
  def Exec(self, feedback_fn):
4035
    return self.oq.OldStyleQuery(self)
4036

    
4037

    
4038
class LUNodeRemove(LogicalUnit):
4039
  """Logical unit for removing a node.
4040

4041
  """
4042
  HPATH = "node-remove"
4043
  HTYPE = constants.HTYPE_NODE
4044

    
4045
  def BuildHooksEnv(self):
4046
    """Build hooks env.
4047

4048
    This doesn't run on the target node in the pre phase as a failed
4049
    node would then be impossible to remove.
4050

4051
    """
4052
    return {
4053
      "OP_TARGET": self.op.node_name,
4054
      "NODE_NAME": self.op.node_name,
4055
      }
4056

    
4057
  def BuildHooksNodes(self):
4058
    """Build hooks nodes.
4059

4060
    """
4061
    all_nodes = self.cfg.GetNodeList()
4062
    try:
4063
      all_nodes.remove(self.op.node_name)
4064
    except ValueError:
4065
      logging.warning("Node '%s', which is about to be removed, was not found"
4066
                      " in the list of all nodes", self.op.node_name)
4067
    return (all_nodes, all_nodes)
4068

    
4069
  def CheckPrereq(self):
4070
    """Check prerequisites.
4071

4072
    This checks:
4073
     - the node exists in the configuration
4074
     - it does not have primary or secondary instances
4075
     - it's not the master
4076

4077
    Any errors are signaled by raising errors.OpPrereqError.
4078

4079
    """
4080
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4081
    node = self.cfg.GetNodeInfo(self.op.node_name)
4082
    assert node is not None
4083

    
4084
    instance_list = self.cfg.GetInstanceList()
4085

    
4086
    masternode = self.cfg.GetMasterNode()
4087
    if node.name == masternode:
4088
      raise errors.OpPrereqError("Node is the master node, failover to another"
4089
                                 " node is required", errors.ECODE_INVAL)
4090

    
4091
    for instance_name in instance_list:
4092
      instance = self.cfg.GetInstanceInfo(instance_name)
4093
      if node.name in instance.all_nodes:
4094
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4095
                                   " please remove first" % instance_name,
4096
                                   errors.ECODE_INVAL)
4097
    self.op.node_name = node.name
4098
    self.node = node
4099

    
4100
  def Exec(self, feedback_fn):
4101
    """Removes the node from the cluster.
4102

4103
    """
4104
    node = self.node
4105
    logging.info("Stopping the node daemon and removing configs from node %s",
4106
                 node.name)
4107

    
4108
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4109

    
4110
    # Promote nodes to master candidate as needed
4111
    _AdjustCandidatePool(self, exceptions=[node.name])
4112
    self.context.RemoveNode(node.name)
4113

    
4114
    # Run post hooks on the node before it's removed
4115
    _RunPostHook(self, node.name)
4116

    
4117
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4118
    msg = result.fail_msg
4119
    if msg:
4120
      self.LogWarning("Errors encountered on the remote node while leaving"
4121
                      " the cluster: %s", msg)
4122

    
4123
    # Remove node from our /etc/hosts
4124
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4125
      master_node = self.cfg.GetMasterNode()
4126
      result = self.rpc.call_etc_hosts_modify(master_node,
4127
                                              constants.ETC_HOSTS_REMOVE,
4128
                                              node.name, None)
4129
      result.Raise("Can't update hosts file with new host data")
4130
      _RedistributeAncillaryFiles(self)
4131

    
4132

    
4133
class _NodeQuery(_QueryBase):
4134
  FIELDS = query.NODE_FIELDS
4135

    
4136
  def ExpandNames(self, lu):
4137
    lu.needed_locks = {}
4138
    lu.share_locks[locking.LEVEL_NODE] = 1
4139

    
4140
    if self.names:
4141
      self.wanted = _GetWantedNodes(lu, self.names)
4142
    else:
4143
      self.wanted = locking.ALL_SET
4144

    
4145
    self.do_locking = (self.use_locking and
4146
                       query.NQ_LIVE in self.requested_data)
4147

    
4148
    if self.do_locking:
4149
      # if we don't request only static fields, we need to lock the nodes
4150
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4151

    
4152
  def DeclareLocks(self, lu, level):
4153
    pass
4154

    
4155
  def _GetQueryData(self, lu):
4156
    """Computes the list of nodes and their attributes.
4157

4158
    """
4159
    all_info = lu.cfg.GetAllNodesInfo()
4160

    
4161
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4162

    
4163
    # Gather data as requested
4164
    if query.NQ_LIVE in self.requested_data:
4165
      # filter out non-vm_capable nodes
4166
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4167

    
4168
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4169
                                        lu.cfg.GetHypervisorType())
4170
      live_data = dict((name, nresult.payload)
4171
                       for (name, nresult) in node_data.items()
4172
                       if not nresult.fail_msg and nresult.payload)
4173
    else:
4174
      live_data = None
4175

    
4176
    if query.NQ_INST in self.requested_data:
4177
      node_to_primary = dict([(name, set()) for name in nodenames])
4178
      node_to_secondary = dict([(name, set()) for name in nodenames])
4179

    
4180
      inst_data = lu.cfg.GetAllInstancesInfo()
4181

    
4182
      for inst in inst_data.values():
4183
        if inst.primary_node in node_to_primary:
4184
          node_to_primary[inst.primary_node].add(inst.name)
4185
        for secnode in inst.secondary_nodes:
4186
          if secnode in node_to_secondary:
4187
            node_to_secondary[secnode].add(inst.name)
4188
    else:
4189
      node_to_primary = None
4190
      node_to_secondary = None
4191

    
4192
    if query.NQ_OOB in self.requested_data:
4193
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4194
                         for name, node in all_info.iteritems())
4195
    else:
4196
      oob_support = None
4197

    
4198
    if query.NQ_GROUP in self.requested_data:
4199
      groups = lu.cfg.GetAllNodeGroupsInfo()
4200
    else:
4201
      groups = {}
4202

    
4203
    return query.NodeQueryData([all_info[name] for name in nodenames],
4204
                               live_data, lu.cfg.GetMasterNode(),
4205
                               node_to_primary, node_to_secondary, groups,
4206
                               oob_support, lu.cfg.GetClusterInfo())
4207

    
4208

    
4209
class LUNodeQuery(NoHooksLU):
4210
  """Logical unit for querying nodes.
4211

4212
  """
4213
  # pylint: disable-msg=W0142
4214
  REQ_BGL = False
4215

    
4216
  def CheckArguments(self):
4217
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4218
                         self.op.output_fields, self.op.use_locking)
4219

    
4220
  def ExpandNames(self):
4221
    self.nq.ExpandNames(self)
4222

    
4223
  def Exec(self, feedback_fn):
4224
    return self.nq.OldStyleQuery(self)
4225

    
4226

    
4227
class LUNodeQueryvols(NoHooksLU):
4228
  """Logical unit for getting volumes on node(s).
4229

4230
  """
4231
  REQ_BGL = False
4232
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4233
  _FIELDS_STATIC = utils.FieldSet("node")
4234

    
4235
  def CheckArguments(self):
4236
    _CheckOutputFields(static=self._FIELDS_STATIC,
4237
                       dynamic=self._FIELDS_DYNAMIC,
4238
                       selected=self.op.output_fields)
4239

    
4240
  def ExpandNames(self):
4241
    self.needed_locks = {}
4242
    self.share_locks[locking.LEVEL_NODE] = 1
4243
    if not self.op.nodes:
4244
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4245
    else:
4246
      self.needed_locks[locking.LEVEL_NODE] = \
4247
        _GetWantedNodes(self, self.op.nodes)
4248

    
4249
  def Exec(self, feedback_fn):
4250
    """Computes the list of nodes and their attributes.
4251

4252
    """
4253
    nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4254
    volumes = self.rpc.call_node_volumes(nodenames)
4255

    
4256
    ilist = self.cfg.GetAllInstancesInfo()
4257

    
4258
    vol2inst = dict(((node, vol), inst.name)
4259
                    for inst in ilist.values()
4260
                    for (node, vols) in inst.MapLVsByNode().items()
4261
                    for vol in vols)
4262

    
4263
    output = []
4264
    for node in nodenames:
4265
      nresult = volumes[node]
4266
      if nresult.offline:
4267
        continue
4268
      msg = nresult.fail_msg
4269
      if msg:
4270
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4271
        continue
4272

    
4273
      node_vols = sorted(nresult.payload,
4274
                         key=operator.itemgetter("dev"))
4275

    
4276
      for vol in node_vols:
4277
        node_output = []
4278
        for field in self.op.output_fields:
4279
          if field == "node":
4280
            val = node
4281
          elif field == "phys":
4282
            val = vol['dev']
4283
          elif field == "vg":
4284
            val = vol['vg']
4285
          elif field == "name":
4286
            val = vol['name']
4287
          elif field == "size":
4288
            val = int(float(vol['size']))
4289
          elif field == "instance":
4290
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4291
          else:
4292
            raise errors.ParameterError(field)
4293
          node_output.append(str(val))
4294

    
4295
        output.append(node_output)
4296

    
4297
    return output
4298

    
4299

    
4300
class LUNodeQueryStorage(NoHooksLU):
4301
  """Logical unit for getting information on storage units on node(s).
4302

4303
  """
4304
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4305
  REQ_BGL = False
4306

    
4307
  def CheckArguments(self):
4308
    _CheckOutputFields(static=self._FIELDS_STATIC,
4309
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4310
                       selected=self.op.output_fields)
4311

    
4312
  def ExpandNames(self):
4313
    self.needed_locks = {}
4314
    self.share_locks[locking.LEVEL_NODE] = 1
4315

    
4316
    if self.op.nodes:
4317
      self.needed_locks[locking.LEVEL_NODE] = \
4318
        _GetWantedNodes(self, self.op.nodes)
4319
    else:
4320
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4321

    
4322
  def Exec(self, feedback_fn):
4323
    """Computes the list of nodes and their attributes.
4324

4325
    """
4326
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4327

    
4328
    # Always get name to sort by
4329
    if constants.SF_NAME in self.op.output_fields:
4330
      fields = self.op.output_fields[:]
4331
    else:
4332
      fields = [constants.SF_NAME] + self.op.output_fields
4333

    
4334
    # Never ask for node or type as it's only known to the LU
4335
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4336
      while extra in fields:
4337
        fields.remove(extra)
4338

    
4339
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4340
    name_idx = field_idx[constants.SF_NAME]
4341

    
4342
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4343
    data = self.rpc.call_storage_list(self.nodes,
4344
                                      self.op.storage_type, st_args,
4345
                                      self.op.name, fields)
4346

    
4347
    result = []
4348

    
4349
    for node in utils.NiceSort(self.nodes):
4350
      nresult = data[node]
4351
      if nresult.offline:
4352
        continue
4353

    
4354
      msg = nresult.fail_msg
4355
      if msg:
4356
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4357
        continue
4358

    
4359
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4360

    
4361
      for name in utils.NiceSort(rows.keys()):
4362
        row = rows[name]
4363

    
4364
        out = []
4365

    
4366
        for field in self.op.output_fields:
4367
          if field == constants.SF_NODE:
4368
            val = node
4369
          elif field == constants.SF_TYPE:
4370
            val = self.op.storage_type
4371
          elif field in field_idx:
4372
            val = row[field_idx[field]]
4373
          else:
4374
            raise errors.ParameterError(field)
4375

    
4376
          out.append(val)
4377

    
4378
        result.append(out)
4379

    
4380
    return result
4381

    
4382

    
4383
class _InstanceQuery(_QueryBase):
4384
  FIELDS = query.INSTANCE_FIELDS
4385

    
4386
  def ExpandNames(self, lu):
4387
    lu.needed_locks = {}
4388
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
4389
    lu.share_locks[locking.LEVEL_NODE] = 1
4390

    
4391
    if self.names:
4392
      self.wanted = _GetWantedInstances(lu, self.names)
4393
    else:
4394
      self.wanted = locking.ALL_SET
4395

    
4396
    self.do_locking = (self.use_locking and
4397
                       query.IQ_LIVE in self.requested_data)
4398
    if self.do_locking:
4399
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4400
      lu.needed_locks[locking.LEVEL_NODE] = []
4401
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4402

    
4403
  def DeclareLocks(self, lu, level):
4404
    if level == locking.LEVEL_NODE and self.do_locking:
4405
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
4406

    
4407
  def _GetQueryData(self, lu):
4408
    """Computes the list of instances and their attributes.
4409

4410
    """
4411
    cluster = lu.cfg.GetClusterInfo()
4412
    all_info = lu.cfg.GetAllInstancesInfo()
4413

    
4414
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4415

    
4416
    instance_list = [all_info[name] for name in instance_names]
4417
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4418
                                        for inst in instance_list)))
4419
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4420
    bad_nodes = []
4421
    offline_nodes = []
4422
    wrongnode_inst = set()
4423

    
4424
    # Gather data as requested
4425
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4426
      live_data = {}
4427
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4428
      for name in nodes:
4429
        result = node_data[name]
4430
        if result.offline:
4431
          # offline nodes will be in both lists
4432
          assert result.fail_msg
4433
          offline_nodes.append(name)
4434
        if result.fail_msg:
4435
          bad_nodes.append(name)
4436
        elif result.payload:
4437
          for inst in result.payload:
4438
            if inst in all_info:
4439
              if all_info[inst].primary_node == name:
4440
                live_data.update(result.payload)
4441
              else:
4442
                wrongnode_inst.add(inst)
4443
            else:
4444
              # orphan instance; we don't list it here as we don't
4445
              # handle this case yet in the output of instance listing
4446
              logging.warning("Orphan instance '%s' found on node %s",
4447
                              inst, name)
4448
        # else no instance is alive
4449
    else:
4450
      live_data = {}
4451

    
4452
    if query.IQ_DISKUSAGE in self.requested_data:
4453
      disk_usage = dict((inst.name,
4454
                         _ComputeDiskSize(inst.disk_template,
4455
                                          [{constants.IDISK_SIZE: disk.size}
4456
                                           for disk in inst.disks]))
4457
                        for inst in instance_list)
4458
    else:
4459
      disk_usage = None
4460

    
4461
    if query.IQ_CONSOLE in self.requested_data:
4462
      consinfo = {}
4463
      for inst in instance_list:
4464
        if inst.name in live_data:
4465
          # Instance is running
4466
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4467
        else:
4468
          consinfo[inst.name] = None
4469
      assert set(consinfo.keys()) == set(instance_names)
4470
    else:
4471
      consinfo = None
4472

    
4473
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4474
                                   disk_usage, offline_nodes, bad_nodes,
4475
                                   live_data, wrongnode_inst, consinfo)
4476

    
4477

    
4478
class LUQuery(NoHooksLU):
4479
  """Query for resources/items of a certain kind.
4480

4481
  """
4482
  # pylint: disable-msg=W0142
4483
  REQ_BGL = False
4484

    
4485
  def CheckArguments(self):
4486
    qcls = _GetQueryImplementation(self.op.what)
4487

    
4488
    self.impl = qcls(self.op.filter, self.op.fields, False)
4489

    
4490
  def ExpandNames(self):
4491
    self.impl.ExpandNames(self)
4492

    
4493
  def DeclareLocks(self, level):
4494
    self.impl.DeclareLocks(self, level)
4495

    
4496
  def Exec(self, feedback_fn):
4497
    return self.impl.NewStyleQuery(self)
4498

    
4499

    
4500
class LUQueryFields(NoHooksLU):
4501
  """Query for resources/items of a certain kind.
4502

4503
  """
4504
  # pylint: disable-msg=W0142
4505
  REQ_BGL = False
4506

    
4507
  def CheckArguments(self):
4508
    self.qcls = _GetQueryImplementation(self.op.what)
4509

    
4510
  def ExpandNames(self):
4511
    self.needed_locks = {}
4512

    
4513
  def Exec(self, feedback_fn):
4514
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4515

    
4516

    
4517
class LUNodeModifyStorage(NoHooksLU):
4518
  """Logical unit for modifying a storage volume on a node.
4519

4520
  """
4521
  REQ_BGL = False
4522

    
4523
  def CheckArguments(self):
4524
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4525

    
4526
    storage_type = self.op.storage_type
4527

    
4528
    try:
4529
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4530
    except KeyError:
4531
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4532
                                 " modified" % storage_type,
4533
                                 errors.ECODE_INVAL)
4534

    
4535
    diff = set(self.op.changes.keys()) - modifiable
4536
    if diff:
4537
      raise errors.OpPrereqError("The following fields can not be modified for"
4538
                                 " storage units of type '%s': %r" %
4539
                                 (storage_type, list(diff)),
4540
                                 errors.ECODE_INVAL)
4541

    
4542
  def ExpandNames(self):
4543
    self.needed_locks = {
4544
      locking.LEVEL_NODE: self.op.node_name,
4545
      }
4546

    
4547
  def Exec(self, feedback_fn):
4548
    """Computes the list of nodes and their attributes.
4549

4550
    """
4551
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4552
    result = self.rpc.call_storage_modify(self.op.node_name,
4553
                                          self.op.storage_type, st_args,
4554
                                          self.op.name, self.op.changes)
4555
    result.Raise("Failed to modify storage unit '%s' on %s" %
4556
                 (self.op.name, self.op.node_name))
4557

    
4558

    
4559
class LUNodeAdd(LogicalUnit):
4560
  """Logical unit for adding node to the cluster.
4561

4562
  """
4563
  HPATH = "node-add"
4564
  HTYPE = constants.HTYPE_NODE
4565
  _NFLAGS = ["master_capable", "vm_capable"]
4566

    
4567
  def CheckArguments(self):
4568
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4569
    # validate/normalize the node name
4570
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4571
                                         family=self.primary_ip_family)
4572
    self.op.node_name = self.hostname.name
4573

    
4574
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4575
      raise errors.OpPrereqError("Cannot readd the master node",
4576
                                 errors.ECODE_STATE)
4577

    
4578
    if self.op.readd and self.op.group:
4579
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4580
                                 " being readded", errors.ECODE_INVAL)
4581

    
4582
  def BuildHooksEnv(self):
4583
    """Build hooks env.
4584

4585
    This will run on all nodes before, and on all nodes + the new node after.
4586

4587
    """
4588
    return {
4589
      "OP_TARGET": self.op.node_name,
4590
      "NODE_NAME": self.op.node_name,
4591
      "NODE_PIP": self.op.primary_ip,
4592
      "NODE_SIP": self.op.secondary_ip,
4593
      "MASTER_CAPABLE": str(self.op.master_capable),
4594
      "VM_CAPABLE": str(self.op.vm_capable),
4595
      }
4596

    
4597
  def BuildHooksNodes(self):
4598
    """Build hooks nodes.
4599

4600
    """
4601
    # Exclude added node
4602
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4603
    post_nodes = pre_nodes + [self.op.node_name, ]
4604

    
4605
    return (pre_nodes, post_nodes)
4606

    
4607
  def CheckPrereq(self):
4608
    """Check prerequisites.
4609

4610
    This checks:
4611
     - the new node is not already in the config
4612
     - it is resolvable
4613
     - its parameters (single/dual homed) matches the cluster
4614

4615
    Any errors are signaled by raising errors.OpPrereqError.
4616

4617
    """
4618
    cfg = self.cfg
4619
    hostname = self.hostname
4620
    node = hostname.name
4621
    primary_ip = self.op.primary_ip = hostname.ip
4622
    if self.op.secondary_ip is None:
4623
      if self.primary_ip_family == netutils.IP6Address.family:
4624
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4625
                                   " IPv4 address must be given as secondary",
4626
                                   errors.ECODE_INVAL)
4627
      self.op.secondary_ip = primary_ip
4628

    
4629
    secondary_ip = self.op.secondary_ip
4630
    if not netutils.IP4Address.IsValid(secondary_ip):
4631
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4632
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4633

    
4634
    node_list = cfg.GetNodeList()
4635
    if not self.op.readd and node in node_list:
4636
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4637
                                 node, errors.ECODE_EXISTS)
4638
    elif self.op.readd and node not in node_list:
4639
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4640
                                 errors.ECODE_NOENT)
4641

    
4642
    self.changed_primary_ip = False
4643

    
4644
    for existing_node_name in node_list:
4645
      existing_node = cfg.GetNodeInfo(existing_node_name)
4646

    
4647
      if self.op.readd and node == existing_node_name:
4648
        if existing_node.secondary_ip != secondary_ip:
4649
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4650
                                     " address configuration as before",
4651
                                     errors.ECODE_INVAL)
4652
        if existing_node.primary_ip != primary_ip:
4653
          self.changed_primary_ip = True
4654

    
4655
        continue
4656

    
4657
      if (existing_node.primary_ip == primary_ip or
4658
          existing_node.secondary_ip == primary_ip or
4659
          existing_node.primary_ip == secondary_ip or
4660
          existing_node.secondary_ip == secondary_ip):
4661
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4662
                                   " existing node %s" % existing_node.name,
4663
                                   errors.ECODE_NOTUNIQUE)
4664

    
4665
    # After this 'if' block, None is no longer a valid value for the
4666
    # _capable op attributes
4667
    if self.op.readd:
4668
      old_node = self.cfg.GetNodeInfo(node)
4669
      assert old_node is not None, "Can't retrieve locked node %s" % node
4670
      for attr in self._NFLAGS:
4671
        if getattr(self.op, attr) is None:
4672
          setattr(self.op, attr, getattr(old_node, attr))
4673
    else:
4674
      for attr in self._NFLAGS:
4675
        if getattr(self.op, attr) is None:
4676
          setattr(self.op, attr, True)
4677

    
4678
    if self.op.readd and not self.op.vm_capable:
4679
      pri, sec = cfg.GetNodeInstances(node)
4680
      if pri or sec:
4681
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4682
                                   " flag set to false, but it already holds"
4683
                                   " instances" % node,
4684
                                   errors.ECODE_STATE)
4685

    
4686
    # check that the type of the node (single versus dual homed) is the
4687
    # same as for the master
4688
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4689
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4690
    newbie_singlehomed = secondary_ip == primary_ip
4691
    if master_singlehomed != newbie_singlehomed:
4692
      if master_singlehomed:
4693
        raise errors.OpPrereqError("The master has no secondary ip but the"
4694
                                   " new node has one",
4695
                                   errors.ECODE_INVAL)
4696
      else:
4697
        raise errors.OpPrereqError("The master has a secondary ip but the"
4698
                                   " new node doesn't have one",
4699
                                   errors.ECODE_INVAL)
4700

    
4701
    # checks reachability
4702
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4703
      raise errors.OpPrereqError("Node not reachable by ping",
4704
                                 errors.ECODE_ENVIRON)
4705

    
4706
    if not newbie_singlehomed:
4707
      # check reachability from my secondary ip to newbie's secondary ip
4708
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4709
                           source=myself.secondary_ip):
4710
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4711
                                   " based ping to node daemon port",
4712
                                   errors.ECODE_ENVIRON)
4713

    
4714
    if self.op.readd:
4715
      exceptions = [node]
4716
    else:
4717
      exceptions = []
4718

    
4719
    if self.op.master_capable:
4720
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4721
    else:
4722
      self.master_candidate = False
4723

    
4724
    if self.op.readd:
4725
      self.new_node = old_node
4726
    else:
4727
      node_group = cfg.LookupNodeGroup(self.op.group)
4728
      self.new_node = objects.Node(name=node,
4729
                                   primary_ip=primary_ip,
4730
                                   secondary_ip=secondary_ip,
4731
                                   master_candidate=self.master_candidate,
4732
                                   offline=False, drained=False,
4733
                                   group=node_group)
4734

    
4735
    if self.op.ndparams:
4736
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4737

    
4738
  def Exec(self, feedback_fn):
4739
    """Adds the new node to the cluster.
4740

4741
    """
4742
    new_node = self.new_node
4743
    node = new_node.name
4744

    
4745
    # We adding a new node so we assume it's powered
4746
    new_node.powered = True
4747

    
4748
    # for re-adds, reset the offline/drained/master-candidate flags;
4749
    # we need to reset here, otherwise offline would prevent RPC calls
4750
    # later in the procedure; this also means that if the re-add
4751
    # fails, we are left with a non-offlined, broken node
4752
    if self.op.readd:
4753
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4754
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4755
      # if we demote the node, we do cleanup later in the procedure
4756
      new_node.master_candidate = self.master_candidate
4757
      if self.changed_primary_ip:
4758
        new_node.primary_ip = self.op.primary_ip
4759

    
4760
    # copy the master/vm_capable flags
4761
    for attr in self._NFLAGS:
4762
      setattr(new_node, attr, getattr(self.op, attr))
4763

    
4764
    # notify the user about any possible mc promotion
4765
    if new_node.master_candidate:
4766
      self.LogInfo("Node will be a master candidate")
4767

    
4768
    if self.op.ndparams:
4769
      new_node.ndparams = self.op.ndparams
4770
    else:
4771
      new_node.ndparams = {}
4772

    
4773
    # check connectivity
4774
    result = self.rpc.call_version([node])[node]
4775
    result.Raise("Can't get version information from node %s" % node)
4776
    if constants.PROTOCOL_VERSION == result.payload:
4777
      logging.info("Communication to node %s fine, sw version %s match",
4778
                   node, result.payload)
4779
    else:
4780
      raise errors.OpExecError("Version mismatch master version %s,"
4781
                               " node version %s" %
4782
                               (constants.PROTOCOL_VERSION, result.payload))
4783

    
4784
    # Add node to our /etc/hosts, and add key to known_hosts
4785
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4786
      master_node = self.cfg.GetMasterNode()
4787
      result = self.rpc.call_etc_hosts_modify(master_node,
4788
                                              constants.ETC_HOSTS_ADD,
4789
                                              self.hostname.name,
4790
                                              self.hostname.ip)
4791
      result.Raise("Can't update hosts file with new host data")
4792

    
4793
    if new_node.secondary_ip != new_node.primary_ip:
4794
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4795
                               False)
4796

    
4797
    node_verify_list = [self.cfg.GetMasterNode()]
4798
    node_verify_param = {
4799
      constants.NV_NODELIST: [node],
4800
      # TODO: do a node-net-test as well?
4801
    }
4802

    
4803
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4804
                                       self.cfg.GetClusterName())
4805
    for verifier in node_verify_list:
4806
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4807
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4808
      if nl_payload:
4809
        for failed in nl_payload:
4810
          feedback_fn("ssh/hostname verification failed"
4811
                      " (checking from %s): %s" %
4812
                      (verifier, nl_payload[failed]))
4813
        raise errors.OpExecError("ssh/hostname verification failed")
4814

    
4815
    if self.op.readd:
4816
      _RedistributeAncillaryFiles(self)
4817
      self.context.ReaddNode(new_node)
4818
      # make sure we redistribute the config
4819
      self.cfg.Update(new_node, feedback_fn)
4820
      # and make sure the new node will not have old files around
4821
      if not new_node.master_candidate:
4822
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4823
        msg = result.fail_msg
4824
        if msg:
4825
          self.LogWarning("Node failed to demote itself from master"
4826
                          " candidate status: %s" % msg)
4827
    else:
4828
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4829
                                  additional_vm=self.op.vm_capable)
4830
      self.context.AddNode(new_node, self.proc.GetECId())
4831

    
4832

    
4833
class LUNodeSetParams(LogicalUnit):
4834
  """Modifies the parameters of a node.
4835

4836
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4837
      to the node role (as _ROLE_*)
4838
  @cvar _R2F: a dictionary from node role to tuples of flags
4839
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4840

4841
  """
4842
  HPATH = "node-modify"
4843
  HTYPE = constants.HTYPE_NODE
4844
  REQ_BGL = False
4845
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4846
  _F2R = {
4847
    (True, False, False): _ROLE_CANDIDATE,
4848
    (False, True, False): _ROLE_DRAINED,
4849
    (False, False, True): _ROLE_OFFLINE,
4850
    (False, False, False): _ROLE_REGULAR,
4851
    }
4852
  _R2F = dict((v, k) for k, v in _F2R.items())
4853
  _FLAGS = ["master_candidate", "drained", "offline"]
4854

    
4855
  def CheckArguments(self):
4856
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4857
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4858
                self.op.master_capable, self.op.vm_capable,
4859
                self.op.secondary_ip, self.op.ndparams]
4860
    if all_mods.count(None) == len(all_mods):
4861
      raise errors.OpPrereqError("Please pass at least one modification",
4862
                                 errors.ECODE_INVAL)
4863
    if all_mods.count(True) > 1:
4864
      raise errors.OpPrereqError("Can't set the node into more than one"
4865
                                 " state at the same time",
4866
                                 errors.ECODE_INVAL)
4867

    
4868
    # Boolean value that tells us whether we might be demoting from MC
4869
    self.might_demote = (self.op.master_candidate == False or
4870
                         self.op.offline == True or
4871
                         self.op.drained == True or
4872
                         self.op.master_capable == False)
4873

    
4874
    if self.op.secondary_ip:
4875
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4876
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4877
                                   " address" % self.op.secondary_ip,
4878
                                   errors.ECODE_INVAL)
4879

    
4880
    self.lock_all = self.op.auto_promote and self.might_demote
4881
    self.lock_instances = self.op.secondary_ip is not None
4882

    
4883
  def ExpandNames(self):
4884
    if self.lock_all:
4885
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4886
    else:
4887
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4888

    
4889
    if self.lock_instances:
4890
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4891

    
4892
  def DeclareLocks(self, level):
4893
    # If we have locked all instances, before waiting to lock nodes, release
4894
    # all the ones living on nodes unrelated to the current operation.
4895
    if level == locking.LEVEL_NODE and self.lock_instances:
4896
      self.affected_instances = []
4897
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4898
        instances_keep = []
4899

    
4900
        # Build list of instances to release
4901
        for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
4902
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4903
          if (instance.disk_template in constants.DTS_INT_MIRROR and
4904
              self.op.node_name in instance.all_nodes):
4905
            instances_keep.append(instance_name)
4906
            self.affected_instances.append(instance)
4907

    
4908
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
4909

    
4910
        assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
4911
                set(instances_keep))
4912

    
4913
  def BuildHooksEnv(self):
4914
    """Build hooks env.
4915

4916
    This runs on the master node.
4917

4918
    """
4919
    return {
4920
      "OP_TARGET": self.op.node_name,
4921
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4922
      "OFFLINE": str(self.op.offline),
4923
      "DRAINED": str(self.op.drained),
4924
      "MASTER_CAPABLE": str(self.op.master_capable),
4925
      "VM_CAPABLE": str(self.op.vm_capable),
4926
      }
4927

    
4928
  def BuildHooksNodes(self):
4929
    """Build hooks nodes.
4930

4931
    """
4932
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
4933
    return (nl, nl)
4934

    
4935
  def CheckPrereq(self):
4936
    """Check prerequisites.
4937

4938
    This only checks the instance list against the existing names.
4939

4940
    """
4941
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4942

    
4943
    if (self.op.master_candidate is not None or
4944
        self.op.drained is not None or
4945
        self.op.offline is not None):
4946
      # we can't change the master's node flags
4947
      if self.op.node_name == self.cfg.GetMasterNode():
4948
        raise errors.OpPrereqError("The master role can be changed"
4949
                                   " only via master-failover",
4950
                                   errors.ECODE_INVAL)
4951

    
4952
    if self.op.master_candidate and not node.master_capable:
4953
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4954
                                 " it a master candidate" % node.name,
4955
                                 errors.ECODE_STATE)
4956

    
4957
    if self.op.vm_capable == False:
4958
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4959
      if ipri or isec:
4960
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4961
                                   " the vm_capable flag" % node.name,
4962
                                   errors.ECODE_STATE)
4963

    
4964
    if node.master_candidate and self.might_demote and not self.lock_all:
4965
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
4966
      # check if after removing the current node, we're missing master
4967
      # candidates
4968
      (mc_remaining, mc_should, _) = \
4969
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4970
      if mc_remaining < mc_should:
4971
        raise errors.OpPrereqError("Not enough master candidates, please"
4972
                                   " pass auto promote option to allow"
4973
                                   " promotion", errors.ECODE_STATE)
4974

    
4975
    self.old_flags = old_flags = (node.master_candidate,
4976
                                  node.drained, node.offline)
4977
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4978
    self.old_role = old_role = self._F2R[old_flags]
4979

    
4980
    # Check for ineffective changes
4981
    for attr in self._FLAGS:
4982
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4983
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4984
        setattr(self.op, attr, None)
4985

    
4986
    # Past this point, any flag change to False means a transition
4987
    # away from the respective state, as only real changes are kept
4988

    
4989
    # TODO: We might query the real power state if it supports OOB
4990
    if _SupportsOob(self.cfg, node):
4991
      if self.op.offline is False and not (node.powered or
4992
                                           self.op.powered == True):
4993
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
4994
                                    " offline status can be reset") %
4995
                                   self.op.node_name)
4996
    elif self.op.powered is not None:
4997
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
4998
                                  " as it does not support out-of-band"
4999
                                  " handling") % self.op.node_name)
5000

    
5001
    # If we're being deofflined/drained, we'll MC ourself if needed
5002
    if (self.op.drained == False or self.op.offline == False or
5003
        (self.op.master_capable and not node.master_capable)):
5004
      if _DecideSelfPromotion(self):
5005
        self.op.master_candidate = True
5006
        self.LogInfo("Auto-promoting node to master candidate")
5007

    
5008
    # If we're no longer master capable, we'll demote ourselves from MC
5009
    if self.op.master_capable == False and node.master_candidate:
5010
      self.LogInfo("Demoting from master candidate")
5011
      self.op.master_candidate = False
5012

    
5013
    # Compute new role
5014
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5015
    if self.op.master_candidate:
5016
      new_role = self._ROLE_CANDIDATE
5017
    elif self.op.drained:
5018
      new_role = self._ROLE_DRAINED
5019
    elif self.op.offline:
5020
      new_role = self._ROLE_OFFLINE
5021
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5022
      # False is still in new flags, which means we're un-setting (the
5023
      # only) True flag
5024
      new_role = self._ROLE_REGULAR
5025
    else: # no new flags, nothing, keep old role
5026
      new_role = old_role
5027

    
5028
    self.new_role = new_role
5029

    
5030
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5031
      # Trying to transition out of offline status
5032
      result = self.rpc.call_version([node.name])[node.name]
5033
      if result.fail_msg:
5034
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5035
                                   " to report its version: %s" %
5036
                                   (node.name, result.fail_msg),
5037
                                   errors.ECODE_STATE)
5038
      else:
5039
        self.LogWarning("Transitioning node from offline to online state"
5040
                        " without using re-add. Please make sure the node"
5041
                        " is healthy!")
5042

    
5043
    if self.op.secondary_ip:
5044
      # Ok even without locking, because this can't be changed by any LU
5045
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5046
      master_singlehomed = master.secondary_ip == master.primary_ip
5047
      if master_singlehomed and self.op.secondary_ip:
5048
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5049
                                   " homed cluster", errors.ECODE_INVAL)
5050

    
5051
      if node.offline:
5052
        if self.affected_instances:
5053
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5054
                                     " node has instances (%s) configured"
5055
                                     " to use it" % self.affected_instances)
5056
      else:
5057
        # On online nodes, check that no instances are running, and that
5058
        # the node has the new ip and we can reach it.
5059
        for instance in self.affected_instances:
5060
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5061

    
5062
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5063
        if master.name != node.name:
5064
          # check reachability from master secondary ip to new secondary ip
5065
          if not netutils.TcpPing(self.op.secondary_ip,
5066
                                  constants.DEFAULT_NODED_PORT,
5067
                                  source=master.secondary_ip):
5068
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5069
                                       " based ping to node daemon port",
5070
                                       errors.ECODE_ENVIRON)
5071

    
5072
    if self.op.ndparams:
5073
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5074
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5075
      self.new_ndparams = new_ndparams
5076

    
5077
  def Exec(self, feedback_fn):
5078
    """Modifies a node.
5079

5080
    """
5081
    node = self.node
5082
    old_role = self.old_role
5083
    new_role = self.new_role
5084

    
5085
    result = []
5086

    
5087
    if self.op.ndparams:
5088
      node.ndparams = self.new_ndparams
5089

    
5090
    if self.op.powered is not None:
5091
      node.powered = self.op.powered
5092

    
5093
    for attr in ["master_capable", "vm_capable"]:
5094
      val = getattr(self.op, attr)
5095
      if val is not None:
5096
        setattr(node, attr, val)
5097
        result.append((attr, str(val)))
5098

    
5099
    if new_role != old_role:
5100
      # Tell the node to demote itself, if no longer MC and not offline
5101
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5102
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5103
        if msg:
5104
          self.LogWarning("Node failed to demote itself: %s", msg)
5105

    
5106
      new_flags = self._R2F[new_role]
5107
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5108
        if of != nf:
5109
          result.append((desc, str(nf)))
5110
      (node.master_candidate, node.drained, node.offline) = new_flags
5111

    
5112
      # we locked all nodes, we adjust the CP before updating this node
5113
      if self.lock_all:
5114
        _AdjustCandidatePool(self, [node.name])
5115

    
5116
    if self.op.secondary_ip:
5117
      node.secondary_ip = self.op.secondary_ip
5118
      result.append(("secondary_ip", self.op.secondary_ip))
5119

    
5120
    # this will trigger configuration file update, if needed
5121
    self.cfg.Update(node, feedback_fn)
5122

    
5123
    # this will trigger job queue propagation or cleanup if the mc
5124
    # flag changed
5125
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5126
      self.context.ReaddNode(node)
5127

    
5128
    return result
5129

    
5130

    
5131
class LUNodePowercycle(NoHooksLU):
5132
  """Powercycles a node.
5133

5134
  """
5135
  REQ_BGL = False
5136

    
5137
  def CheckArguments(self):
5138
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5139
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5140
      raise errors.OpPrereqError("The node is the master and the force"
5141
                                 " parameter was not set",
5142
                                 errors.ECODE_INVAL)
5143

    
5144
  def ExpandNames(self):
5145
    """Locking for PowercycleNode.
5146

5147
    This is a last-resort option and shouldn't block on other
5148
    jobs. Therefore, we grab no locks.
5149

5150
    """
5151
    self.needed_locks = {}
5152

    
5153
  def Exec(self, feedback_fn):
5154
    """Reboots a node.
5155

5156
    """
5157
    result = self.rpc.call_node_powercycle(self.op.node_name,
5158
                                           self.cfg.GetHypervisorType())
5159
    result.Raise("Failed to schedule the reboot")
5160
    return result.payload
5161

    
5162

    
5163
class LUClusterQuery(NoHooksLU):
5164
  """Query cluster configuration.
5165

5166
  """
5167
  REQ_BGL = False
5168

    
5169
  def ExpandNames(self):
5170
    self.needed_locks = {}
5171

    
5172
  def Exec(self, feedback_fn):
5173
    """Return cluster config.
5174

5175
    """
5176
    cluster = self.cfg.GetClusterInfo()
5177
    os_hvp = {}
5178

    
5179
    # Filter just for enabled hypervisors
5180
    for os_name, hv_dict in cluster.os_hvp.items():
5181
      os_hvp[os_name] = {}
5182
      for hv_name, hv_params in hv_dict.items():
5183
        if hv_name in cluster.enabled_hypervisors:
5184
          os_hvp[os_name][hv_name] = hv_params
5185

    
5186
    # Convert ip_family to ip_version
5187
    primary_ip_version = constants.IP4_VERSION
5188
    if cluster.primary_ip_family == netutils.IP6Address.family:
5189
      primary_ip_version = constants.IP6_VERSION
5190

    
5191
    result = {
5192
      "software_version": constants.RELEASE_VERSION,
5193
      "protocol_version": constants.PROTOCOL_VERSION,
5194
      "config_version": constants.CONFIG_VERSION,
5195
      "os_api_version": max(constants.OS_API_VERSIONS),
5196
      "export_version": constants.EXPORT_VERSION,
5197
      "architecture": (platform.architecture()[0], platform.machine()),
5198
      "name": cluster.cluster_name,
5199
      "master": cluster.master_node,
5200
      "default_hypervisor": cluster.enabled_hypervisors[0],
5201
      "enabled_hypervisors": cluster.enabled_hypervisors,
5202
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5203
                        for hypervisor_name in cluster.enabled_hypervisors]),
5204
      "os_hvp": os_hvp,
5205
      "beparams": cluster.beparams,
5206
      "osparams": cluster.osparams,
5207
      "nicparams": cluster.nicparams,
5208
      "ndparams": cluster.ndparams,
5209
      "candidate_pool_size": cluster.candidate_pool_size,
5210
      "master_netdev": cluster.master_netdev,
5211
      "volume_group_name": cluster.volume_group_name,
5212
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5213
      "file_storage_dir": cluster.file_storage_dir,
5214
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5215
      "maintain_node_health": cluster.maintain_node_health,
5216
      "ctime": cluster.ctime,
5217
      "mtime": cluster.mtime,
5218
      "uuid": cluster.uuid,
5219
      "tags": list(cluster.GetTags()),
5220
      "uid_pool": cluster.uid_pool,
5221
      "default_iallocator": cluster.default_iallocator,
5222
      "reserved_lvs": cluster.reserved_lvs,
5223
      "primary_ip_version": primary_ip_version,
5224
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5225
      "hidden_os": cluster.hidden_os,
5226
      "blacklisted_os": cluster.blacklisted_os,
5227
      }
5228

    
5229
    return result
5230

    
5231

    
5232
class LUClusterConfigQuery(NoHooksLU):
5233
  """Return configuration values.
5234

5235
  """
5236
  REQ_BGL = False
5237
  _FIELDS_DYNAMIC = utils.FieldSet()
5238
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5239
                                  "watcher_pause", "volume_group_name")
5240

    
5241
  def CheckArguments(self):
5242
    _CheckOutputFields(static=self._FIELDS_STATIC,
5243
                       dynamic=self._FIELDS_DYNAMIC,
5244
                       selected=self.op.output_fields)
5245

    
5246
  def ExpandNames(self):
5247
    self.needed_locks = {}
5248

    
5249
  def Exec(self, feedback_fn):
5250
    """Dump a representation of the cluster config to the standard output.
5251

5252
    """
5253
    values = []
5254
    for field in self.op.output_fields:
5255
      if field == "cluster_name":
5256
        entry = self.cfg.GetClusterName()
5257
      elif field == "master_node":
5258
        entry = self.cfg.GetMasterNode()
5259
      elif field == "drain_flag":
5260
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5261
      elif field == "watcher_pause":
5262
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5263
      elif field == "volume_group_name":
5264
        entry = self.cfg.GetVGName()
5265
      else:
5266
        raise errors.ParameterError(field)
5267
      values.append(entry)
5268
    return values
5269

    
5270

    
5271
class LUInstanceActivateDisks(NoHooksLU):
5272
  """Bring up an instance's disks.
5273

5274
  """
5275
  REQ_BGL = False
5276

    
5277
  def ExpandNames(self):
5278
    self._ExpandAndLockInstance()
5279
    self.needed_locks[locking.LEVEL_NODE] = []
5280
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5281

    
5282
  def DeclareLocks(self, level):
5283
    if level == locking.LEVEL_NODE:
5284
      self._LockInstancesNodes()
5285

    
5286
  def CheckPrereq(self):
5287
    """Check prerequisites.
5288

5289
    This checks that the instance is in the cluster.
5290

5291
    """
5292
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5293
    assert self.instance is not None, \
5294
      "Cannot retrieve locked instance %s" % self.op.instance_name
5295
    _CheckNodeOnline(self, self.instance.primary_node)
5296

    
5297
  def Exec(self, feedback_fn):
5298
    """Activate the disks.
5299

5300
    """
5301
    disks_ok, disks_info = \
5302
              _AssembleInstanceDisks(self, self.instance,
5303
                                     ignore_size=self.op.ignore_size)
5304
    if not disks_ok:
5305
      raise errors.OpExecError("Cannot activate block devices")
5306

    
5307
    return disks_info
5308

    
5309

    
5310
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5311
                           ignore_size=False):
5312
  """Prepare the block devices for an instance.
5313

5314
  This sets up the block devices on all nodes.
5315

5316
  @type lu: L{LogicalUnit}
5317
  @param lu: the logical unit on whose behalf we execute
5318
  @type instance: L{objects.Instance}
5319
  @param instance: the instance for whose disks we assemble
5320
  @type disks: list of L{objects.Disk} or None
5321
  @param disks: which disks to assemble (or all, if None)
5322
  @type ignore_secondaries: boolean
5323
  @param ignore_secondaries: if true, errors on secondary nodes
5324
      won't result in an error return from the function
5325
  @type ignore_size: boolean
5326
  @param ignore_size: if true, the current known size of the disk
5327
      will not be used during the disk activation, useful for cases
5328
      when the size is wrong
5329
  @return: False if the operation failed, otherwise a list of
5330
      (host, instance_visible_name, node_visible_name)
5331
      with the mapping from node devices to instance devices
5332

5333
  """
5334
  device_info = []
5335
  disks_ok = True
5336
  iname = instance.name
5337
  disks = _ExpandCheckDisks(instance, disks)
5338

    
5339
  # With the two passes mechanism we try to reduce the window of
5340
  # opportunity for the race condition of switching DRBD to primary
5341
  # before handshaking occured, but we do not eliminate it
5342

    
5343
  # The proper fix would be to wait (with some limits) until the
5344
  # connection has been made and drbd transitions from WFConnection
5345
  # into any other network-connected state (Connected, SyncTarget,
5346
  # SyncSource, etc.)
5347

    
5348
  # 1st pass, assemble on all nodes in secondary mode
5349
  for idx, inst_disk in enumerate(disks):
5350
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5351
      if ignore_size:
5352
        node_disk = node_disk.Copy()
5353
        node_disk.UnsetSize()
5354
      lu.cfg.SetDiskID(node_disk, node)
5355
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5356
      msg = result.fail_msg
5357
      if msg:
5358
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5359
                           " (is_primary=False, pass=1): %s",
5360
                           inst_disk.iv_name, node, msg)
5361
        if not ignore_secondaries:
5362
          disks_ok = False
5363

    
5364
  # FIXME: race condition on drbd migration to primary
5365

    
5366
  # 2nd pass, do only the primary node
5367
  for idx, inst_disk in enumerate(disks):
5368
    dev_path = None
5369

    
5370
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5371
      if node != instance.primary_node:
5372
        continue
5373
      if ignore_size:
5374
        node_disk = node_disk.Copy()
5375
        node_disk.UnsetSize()
5376
      lu.cfg.SetDiskID(node_disk, node)
5377
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5378
      msg = result.fail_msg
5379
      if msg:
5380
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5381
                           " (is_primary=True, pass=2): %s",
5382
                           inst_disk.iv_name, node, msg)
5383
        disks_ok = False
5384
      else:
5385
        dev_path = result.payload
5386

    
5387
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5388

    
5389
  # leave the disks configured for the primary node
5390
  # this is a workaround that would be fixed better by
5391
  # improving the logical/physical id handling
5392
  for disk in disks:
5393
    lu.cfg.SetDiskID(disk, instance.primary_node)
5394

    
5395
  return disks_ok, device_info
5396

    
5397

    
5398
def _StartInstanceDisks(lu, instance, force):
5399
  """Start the disks of an instance.
5400

5401
  """
5402
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5403
                                           ignore_secondaries=force)
5404
  if not disks_ok:
5405
    _ShutdownInstanceDisks(lu, instance)
5406
    if force is not None and not force:
5407
      lu.proc.LogWarning("", hint="If the message above refers to a"
5408
                         " secondary node,"
5409
                         " you can retry the operation using '--force'.")
5410
    raise errors.OpExecError("Disk consistency error")
5411

    
5412

    
5413
class LUInstanceDeactivateDisks(NoHooksLU):
5414
  """Shutdown an instance's disks.
5415

5416
  """
5417
  REQ_BGL = False
5418

    
5419
  def ExpandNames(self):
5420
    self._ExpandAndLockInstance()
5421
    self.needed_locks[locking.LEVEL_NODE] = []
5422
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5423

    
5424
  def DeclareLocks(self, level):
5425
    if level == locking.LEVEL_NODE:
5426
      self._LockInstancesNodes()
5427

    
5428
  def CheckPrereq(self):
5429
    """Check prerequisites.
5430

5431
    This checks that the instance is in the cluster.
5432

5433
    """
5434
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5435
    assert self.instance is not None, \
5436
      "Cannot retrieve locked instance %s" % self.op.instance_name
5437

    
5438
  def Exec(self, feedback_fn):
5439
    """Deactivate the disks
5440

5441
    """
5442
    instance = self.instance
5443
    if self.op.force:
5444
      _ShutdownInstanceDisks(self, instance)
5445
    else:
5446
      _SafeShutdownInstanceDisks(self, instance)
5447

    
5448

    
5449
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5450
  """Shutdown block devices of an instance.
5451

5452
  This function checks if an instance is running, before calling
5453
  _ShutdownInstanceDisks.
5454

5455
  """
5456
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5457
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5458

    
5459

    
5460
def _ExpandCheckDisks(instance, disks):
5461
  """Return the instance disks selected by the disks list
5462

5463
  @type disks: list of L{objects.Disk} or None
5464
  @param disks: selected disks
5465
  @rtype: list of L{objects.Disk}
5466
  @return: selected instance disks to act on
5467

5468
  """
5469
  if disks is None:
5470
    return instance.disks
5471
  else:
5472
    if not set(disks).issubset(instance.disks):
5473
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5474
                                   " target instance")
5475
    return disks
5476

    
5477

    
5478
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5479
  """Shutdown block devices of an instance.
5480

5481
  This does the shutdown on all nodes of the instance.
5482

5483
  If the ignore_primary is false, errors on the primary node are
5484
  ignored.
5485

5486
  """
5487
  all_result = True
5488
  disks = _ExpandCheckDisks(instance, disks)
5489

    
5490
  for disk in disks:
5491
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5492
      lu.cfg.SetDiskID(top_disk, node)
5493
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5494
      msg = result.fail_msg
5495
      if msg:
5496
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5497
                      disk.iv_name, node, msg)
5498
        if ((node == instance.primary_node and not ignore_primary) or
5499
            (node != instance.primary_node and not result.offline)):
5500
          all_result = False
5501
  return all_result
5502

    
5503

    
5504
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5505
  """Checks if a node has enough free memory.
5506

5507
  This function check if a given node has the needed amount of free
5508
  memory. In case the node has less memory or we cannot get the
5509
  information from the node, this function raise an OpPrereqError
5510
  exception.
5511

5512
  @type lu: C{LogicalUnit}
5513
  @param lu: a logical unit from which we get configuration data
5514
  @type node: C{str}
5515
  @param node: the node to check
5516
  @type reason: C{str}
5517
  @param reason: string to use in the error message
5518
  @type requested: C{int}
5519
  @param requested: the amount of memory in MiB to check for
5520
  @type hypervisor_name: C{str}
5521
  @param hypervisor_name: the hypervisor to ask for memory stats
5522
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5523
      we cannot check the node
5524

5525
  """
5526
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5527
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5528
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5529
  free_mem = nodeinfo[node].payload.get('memory_free', None)
5530
  if not isinstance(free_mem, int):
5531
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5532
                               " was '%s'" % (node, free_mem),
5533
                               errors.ECODE_ENVIRON)
5534
  if requested > free_mem:
5535
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5536
                               " needed %s MiB, available %s MiB" %
5537
                               (node, reason, requested, free_mem),
5538
                               errors.ECODE_NORES)
5539

    
5540

    
5541
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5542
  """Checks if nodes have enough free disk space in the all VGs.
5543

5544
  This function check if all given nodes have the needed amount of
5545
  free disk. In case any node has less disk or we cannot get the
5546
  information from the node, this function raise an OpPrereqError
5547
  exception.
5548

5549
  @type lu: C{LogicalUnit}
5550
  @param lu: a logical unit from which we get configuration data
5551
  @type nodenames: C{list}
5552
  @param nodenames: the list of node names to check
5553
  @type req_sizes: C{dict}
5554
  @param req_sizes: the hash of vg and corresponding amount of disk in
5555
      MiB to check for
5556
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5557
      or we cannot check the node
5558

5559
  """
5560
  for vg, req_size in req_sizes.items():
5561
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5562

    
5563

    
5564
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5565
  """Checks if nodes have enough free disk space in the specified VG.
5566

5567
  This function check if all given nodes have the needed amount of
5568
  free disk. In case any node has less disk or we cannot get the
5569
  information from the node, this function raise an OpPrereqError
5570
  exception.
5571

5572
  @type lu: C{LogicalUnit}
5573
  @param lu: a logical unit from which we get configuration data
5574
  @type nodenames: C{list}
5575
  @param nodenames: the list of node names to check
5576
  @type vg: C{str}
5577
  @param vg: the volume group to check
5578
  @type requested: C{int}
5579
  @param requested: the amount of disk in MiB to check for
5580
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5581
      or we cannot check the node
5582

5583
  """
5584
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5585
  for node in nodenames:
5586
    info = nodeinfo[node]
5587
    info.Raise("Cannot get current information from node %s" % node,
5588
               prereq=True, ecode=errors.ECODE_ENVIRON)
5589
    vg_free = info.payload.get("vg_free", None)
5590
    if not isinstance(vg_free, int):
5591
      raise errors.OpPrereqError("Can't compute free disk space on node"
5592
                                 " %s for vg %s, result was '%s'" %
5593
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5594
    if requested > vg_free:
5595
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5596
                                 " vg %s: required %d MiB, available %d MiB" %
5597
                                 (node, vg, requested, vg_free),
5598
                                 errors.ECODE_NORES)
5599

    
5600

    
5601
class LUInstanceStartup(LogicalUnit):
5602
  """Starts an instance.
5603

5604
  """
5605
  HPATH = "instance-start"
5606
  HTYPE = constants.HTYPE_INSTANCE
5607
  REQ_BGL = False
5608

    
5609
  def CheckArguments(self):
5610
    # extra beparams
5611
    if self.op.beparams:
5612
      # fill the beparams dict
5613
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5614

    
5615
  def ExpandNames(self):
5616
    self._ExpandAndLockInstance()
5617

    
5618
  def BuildHooksEnv(self):
5619
    """Build hooks env.
5620

5621
    This runs on master, primary and secondary nodes of the instance.
5622

5623
    """
5624
    env = {
5625
      "FORCE": self.op.force,
5626
      }
5627

    
5628
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5629

    
5630
    return env
5631

    
5632
  def BuildHooksNodes(self):
5633
    """Build hooks nodes.
5634

5635
    """
5636
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5637
    return (nl, nl)
5638

    
5639
  def CheckPrereq(self):
5640
    """Check prerequisites.
5641

5642
    This checks that the instance is in the cluster.
5643

5644
    """
5645
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5646
    assert self.instance is not None, \
5647
      "Cannot retrieve locked instance %s" % self.op.instance_name
5648

    
5649
    # extra hvparams
5650
    if self.op.hvparams:
5651
      # check hypervisor parameter syntax (locally)
5652
      cluster = self.cfg.GetClusterInfo()
5653
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5654
      filled_hvp = cluster.FillHV(instance)
5655
      filled_hvp.update(self.op.hvparams)
5656
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5657
      hv_type.CheckParameterSyntax(filled_hvp)
5658
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5659

    
5660
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5661

    
5662
    if self.primary_offline and self.op.ignore_offline_nodes:
5663
      self.proc.LogWarning("Ignoring offline primary node")
5664

    
5665
      if self.op.hvparams or self.op.beparams:
5666
        self.proc.LogWarning("Overridden parameters are ignored")
5667
    else:
5668
      _CheckNodeOnline(self, instance.primary_node)
5669

    
5670
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5671

    
5672
      # check bridges existence
5673
      _CheckInstanceBridgesExist(self, instance)
5674

    
5675
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5676
                                                instance.name,
5677
                                                instance.hypervisor)
5678
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5679
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5680
      if not remote_info.payload: # not running already
5681
        _CheckNodeFreeMemory(self, instance.primary_node,
5682
                             "starting instance %s" % instance.name,
5683
                             bep[constants.BE_MEMORY], instance.hypervisor)
5684

    
5685
  def Exec(self, feedback_fn):
5686
    """Start the instance.
5687

5688
    """
5689
    instance = self.instance
5690
    force = self.op.force
5691

    
5692
    if not self.op.no_remember:
5693
      self.cfg.MarkInstanceUp(instance.name)
5694

    
5695
    if self.primary_offline:
5696
      assert self.op.ignore_offline_nodes
5697
      self.proc.LogInfo("Primary node offline, marked instance as started")
5698
    else:
5699
      node_current = instance.primary_node
5700

    
5701
      _StartInstanceDisks(self, instance, force)
5702

    
5703
      result = self.rpc.call_instance_start(node_current, instance,
5704
                                            self.op.hvparams, self.op.beparams,
5705
                                            self.op.startup_paused)
5706
      msg = result.fail_msg
5707
      if msg:
5708
        _ShutdownInstanceDisks(self, instance)
5709
        raise errors.OpExecError("Could not start instance: %s" % msg)
5710

    
5711

    
5712
class LUInstanceReboot(LogicalUnit):
5713
  """Reboot an instance.
5714

5715
  """
5716
  HPATH = "instance-reboot"
5717
  HTYPE = constants.HTYPE_INSTANCE
5718
  REQ_BGL = False
5719

    
5720
  def ExpandNames(self):
5721
    self._ExpandAndLockInstance()
5722

    
5723
  def BuildHooksEnv(self):
5724
    """Build hooks env.
5725

5726
    This runs on master, primary and secondary nodes of the instance.
5727

5728
    """
5729
    env = {
5730
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5731
      "REBOOT_TYPE": self.op.reboot_type,
5732
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5733
      }
5734

    
5735
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5736

    
5737
    return env
5738

    
5739
  def BuildHooksNodes(self):
5740
    """Build hooks nodes.
5741

5742
    """
5743
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5744
    return (nl, nl)
5745

    
5746
  def CheckPrereq(self):
5747
    """Check prerequisites.
5748

5749
    This checks that the instance is in the cluster.
5750

5751
    """
5752
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5753
    assert self.instance is not None, \
5754
      "Cannot retrieve locked instance %s" % self.op.instance_name
5755

    
5756
    _CheckNodeOnline(self, instance.primary_node)
5757

    
5758
    # check bridges existence
5759
    _CheckInstanceBridgesExist(self, instance)
5760

    
5761
  def Exec(self, feedback_fn):
5762
    """Reboot the instance.
5763

5764
    """
5765
    instance = self.instance
5766
    ignore_secondaries = self.op.ignore_secondaries
5767
    reboot_type = self.op.reboot_type
5768

    
5769
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5770
                                              instance.name,
5771
                                              instance.hypervisor)
5772
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5773
    instance_running = bool(remote_info.payload)
5774

    
5775
    node_current = instance.primary_node
5776

    
5777
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5778
                                            constants.INSTANCE_REBOOT_HARD]:
5779
      for disk in instance.disks:
5780
        self.cfg.SetDiskID(disk, node_current)
5781
      result = self.rpc.call_instance_reboot(node_current, instance,
5782
                                             reboot_type,
5783
                                             self.op.shutdown_timeout)
5784
      result.Raise("Could not reboot instance")
5785
    else:
5786
      if instance_running:
5787
        result = self.rpc.call_instance_shutdown(node_current, instance,
5788
                                                 self.op.shutdown_timeout)
5789
        result.Raise("Could not shutdown instance for full reboot")
5790
        _ShutdownInstanceDisks(self, instance)
5791
      else:
5792
        self.LogInfo("Instance %s was already stopped, starting now",
5793
                     instance.name)
5794
      _StartInstanceDisks(self, instance, ignore_secondaries)
5795
      result = self.rpc.call_instance_start(node_current, instance,
5796
                                            None, None, False)
5797
      msg = result.fail_msg
5798
      if msg:
5799
        _ShutdownInstanceDisks(self, instance)
5800
        raise errors.OpExecError("Could not start instance for"
5801
                                 " full reboot: %s" % msg)
5802

    
5803
    self.cfg.MarkInstanceUp(instance.name)
5804

    
5805

    
5806
class LUInstanceShutdown(LogicalUnit):
5807
  """Shutdown an instance.
5808

5809
  """
5810
  HPATH = "instance-stop"
5811
  HTYPE = constants.HTYPE_INSTANCE
5812
  REQ_BGL = False
5813

    
5814
  def ExpandNames(self):
5815
    self._ExpandAndLockInstance()
5816

    
5817
  def BuildHooksEnv(self):
5818
    """Build hooks env.
5819

5820
    This runs on master, primary and secondary nodes of the instance.
5821

5822
    """
5823
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5824
    env["TIMEOUT"] = self.op.timeout
5825
    return env
5826

    
5827
  def BuildHooksNodes(self):
5828
    """Build hooks nodes.
5829

5830
    """
5831
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5832
    return (nl, nl)
5833

    
5834
  def CheckPrereq(self):
5835
    """Check prerequisites.
5836

5837
    This checks that the instance is in the cluster.
5838

5839
    """
5840
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5841
    assert self.instance is not None, \
5842
      "Cannot retrieve locked instance %s" % self.op.instance_name
5843

    
5844
    self.primary_offline = \
5845
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5846

    
5847
    if self.primary_offline and self.op.ignore_offline_nodes:
5848
      self.proc.LogWarning("Ignoring offline primary node")
5849
    else:
5850
      _CheckNodeOnline(self, self.instance.primary_node)
5851

    
5852
  def Exec(self, feedback_fn):
5853
    """Shutdown the instance.
5854

5855
    """
5856
    instance = self.instance
5857
    node_current = instance.primary_node
5858
    timeout = self.op.timeout
5859

    
5860
    if not self.op.no_remember:
5861
      self.cfg.MarkInstanceDown(instance.name)
5862

    
5863
    if self.primary_offline:
5864
      assert self.op.ignore_offline_nodes
5865
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5866
    else:
5867
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5868
      msg = result.fail_msg
5869
      if msg:
5870
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5871

    
5872
      _ShutdownInstanceDisks(self, instance)
5873

    
5874

    
5875
class LUInstanceReinstall(LogicalUnit):
5876
  """Reinstall an instance.
5877

5878
  """
5879
  HPATH = "instance-reinstall"
5880
  HTYPE = constants.HTYPE_INSTANCE
5881
  REQ_BGL = False
5882

    
5883
  def ExpandNames(self):
5884
    self._ExpandAndLockInstance()
5885

    
5886
  def BuildHooksEnv(self):
5887
    """Build hooks env.
5888

5889
    This runs on master, primary and secondary nodes of the instance.
5890

5891
    """
5892
    return _BuildInstanceHookEnvByObject(self, self.instance)
5893

    
5894
  def BuildHooksNodes(self):
5895
    """Build hooks nodes.
5896

5897
    """
5898
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5899
    return (nl, nl)
5900

    
5901
  def CheckPrereq(self):
5902
    """Check prerequisites.
5903

5904
    This checks that the instance is in the cluster and is not running.
5905

5906
    """
5907
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5908
    assert instance is not None, \
5909
      "Cannot retrieve locked instance %s" % self.op.instance_name
5910
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5911
                     " offline, cannot reinstall")
5912
    for node in instance.secondary_nodes:
5913
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5914
                       " cannot reinstall")
5915

    
5916
    if instance.disk_template == constants.DT_DISKLESS:
5917
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5918
                                 self.op.instance_name,
5919
                                 errors.ECODE_INVAL)
5920
    _CheckInstanceDown(self, instance, "cannot reinstall")
5921

    
5922
    if self.op.os_type is not None:
5923
      # OS verification
5924
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5925
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5926
      instance_os = self.op.os_type
5927
    else:
5928
      instance_os = instance.os
5929

    
5930
    nodelist = list(instance.all_nodes)
5931

    
5932
    if self.op.osparams:
5933
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5934
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5935
      self.os_inst = i_osdict # the new dict (without defaults)
5936
    else:
5937
      self.os_inst = None
5938

    
5939
    self.instance = instance
5940

    
5941
  def Exec(self, feedback_fn):
5942
    """Reinstall the instance.
5943

5944
    """
5945
    inst = self.instance
5946

    
5947
    if self.op.os_type is not None:
5948
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5949
      inst.os = self.op.os_type
5950
      # Write to configuration
5951
      self.cfg.Update(inst, feedback_fn)
5952

    
5953
    _StartInstanceDisks(self, inst, None)
5954
    try:
5955
      feedback_fn("Running the instance OS create scripts...")
5956
      # FIXME: pass debug option from opcode to backend
5957
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5958
                                             self.op.debug_level,
5959
                                             osparams=self.os_inst)
5960
      result.Raise("Could not install OS for instance %s on node %s" %
5961
                   (inst.name, inst.primary_node))
5962
    finally:
5963
      _ShutdownInstanceDisks(self, inst)
5964

    
5965

    
5966
class LUInstanceRecreateDisks(LogicalUnit):
5967
  """Recreate an instance's missing disks.
5968

5969
  """
5970
  HPATH = "instance-recreate-disks"
5971
  HTYPE = constants.HTYPE_INSTANCE
5972
  REQ_BGL = False
5973

    
5974
  def CheckArguments(self):
5975
    # normalise the disk list
5976
    self.op.disks = sorted(frozenset(self.op.disks))
5977

    
5978
  def ExpandNames(self):
5979
    self._ExpandAndLockInstance()
5980
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5981
    if self.op.nodes:
5982
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
5983
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
5984
    else:
5985
      self.needed_locks[locking.LEVEL_NODE] = []
5986

    
5987
  def DeclareLocks(self, level):
5988
    if level == locking.LEVEL_NODE:
5989
      # if we replace the nodes, we only need to lock the old primary,
5990
      # otherwise we need to lock all nodes for disk re-creation
5991
      primary_only = bool(self.op.nodes)
5992
      self._LockInstancesNodes(primary_only=primary_only)
5993

    
5994
  def BuildHooksEnv(self):
5995
    """Build hooks env.
5996

5997
    This runs on master, primary and secondary nodes of the instance.
5998

5999
    """
6000
    return _BuildInstanceHookEnvByObject(self, self.instance)
6001

    
6002
  def BuildHooksNodes(self):
6003
    """Build hooks nodes.
6004

6005
    """
6006
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6007
    return (nl, nl)
6008

    
6009
  def CheckPrereq(self):
6010
    """Check prerequisites.
6011

6012
    This checks that the instance is in the cluster and is not running.
6013

6014
    """
6015
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6016
    assert instance is not None, \
6017
      "Cannot retrieve locked instance %s" % self.op.instance_name
6018
    if self.op.nodes:
6019
      if len(self.op.nodes) != len(instance.all_nodes):
6020
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6021
                                   " %d replacement nodes were specified" %
6022
                                   (instance.name, len(instance.all_nodes),
6023
                                    len(self.op.nodes)),
6024
                                   errors.ECODE_INVAL)
6025
      assert instance.disk_template != constants.DT_DRBD8 or \
6026
          len(self.op.nodes) == 2
6027
      assert instance.disk_template != constants.DT_PLAIN or \
6028
          len(self.op.nodes) == 1
6029
      primary_node = self.op.nodes[0]
6030
    else:
6031
      primary_node = instance.primary_node
6032
    _CheckNodeOnline(self, primary_node)
6033

    
6034
    if instance.disk_template == constants.DT_DISKLESS:
6035
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6036
                                 self.op.instance_name, errors.ECODE_INVAL)
6037
    # if we replace nodes *and* the old primary is offline, we don't
6038
    # check
6039
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6040
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6041
    if not (self.op.nodes and old_pnode.offline):
6042
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6043

    
6044
    if not self.op.disks:
6045
      self.op.disks = range(len(instance.disks))
6046
    else:
6047
      for idx in self.op.disks:
6048
        if idx >= len(instance.disks):
6049
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6050
                                     errors.ECODE_INVAL)
6051
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6052
      raise errors.OpPrereqError("Can't recreate disks partially and"
6053
                                 " change the nodes at the same time",
6054
                                 errors.ECODE_INVAL)
6055
    self.instance = instance
6056

    
6057
  def Exec(self, feedback_fn):
6058
    """Recreate the disks.
6059

6060
    """
6061
    instance = self.instance
6062

    
6063
    to_skip = []
6064
    mods = [] # keeps track of needed logical_id changes
6065

    
6066
    for idx, disk in enumerate(instance.disks):
6067
      if idx not in self.op.disks: # disk idx has not been passed in
6068
        to_skip.append(idx)
6069
        continue
6070
      # update secondaries for disks, if needed
6071
      if self.op.nodes:
6072
        if disk.dev_type == constants.LD_DRBD8:
6073
          # need to update the nodes and minors
6074
          assert len(self.op.nodes) == 2
6075
          assert len(disk.logical_id) == 6 # otherwise disk internals
6076
                                           # have changed
6077
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6078
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6079
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6080
                    new_minors[0], new_minors[1], old_secret)
6081
          assert len(disk.logical_id) == len(new_id)
6082
          mods.append((idx, new_id))
6083

    
6084
    # now that we have passed all asserts above, we can apply the mods
6085
    # in a single run (to avoid partial changes)
6086
    for idx, new_id in mods:
6087
      instance.disks[idx].logical_id = new_id
6088

    
6089
    # change primary node, if needed
6090
    if self.op.nodes:
6091
      instance.primary_node = self.op.nodes[0]
6092
      self.LogWarning("Changing the instance's nodes, you will have to"
6093
                      " remove any disks left on the older nodes manually")
6094

    
6095
    if self.op.nodes:
6096
      self.cfg.Update(instance, feedback_fn)
6097

    
6098
    _CreateDisks(self, instance, to_skip=to_skip)
6099

    
6100

    
6101
class LUInstanceRename(LogicalUnit):
6102
  """Rename an instance.
6103

6104
  """
6105
  HPATH = "instance-rename"
6106
  HTYPE = constants.HTYPE_INSTANCE
6107

    
6108
  def CheckArguments(self):
6109
    """Check arguments.
6110

6111
    """
6112
    if self.op.ip_check and not self.op.name_check:
6113
      # TODO: make the ip check more flexible and not depend on the name check
6114
      raise errors.OpPrereqError("IP address check requires a name check",
6115
                                 errors.ECODE_INVAL)
6116

    
6117
  def BuildHooksEnv(self):
6118
    """Build hooks env.
6119

6120
    This runs on master, primary and secondary nodes of the instance.
6121

6122
    """
6123
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6124
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6125
    return env
6126

    
6127
  def BuildHooksNodes(self):
6128
    """Build hooks nodes.
6129

6130
    """
6131
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6132
    return (nl, nl)
6133

    
6134
  def CheckPrereq(self):
6135
    """Check prerequisites.
6136

6137
    This checks that the instance is in the cluster and is not running.
6138

6139
    """
6140
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6141
                                                self.op.instance_name)
6142
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6143
    assert instance is not None
6144
    _CheckNodeOnline(self, instance.primary_node)
6145
    _CheckInstanceDown(self, instance, "cannot rename")
6146
    self.instance = instance
6147

    
6148
    new_name = self.op.new_name
6149
    if self.op.name_check:
6150
      hostname = netutils.GetHostname(name=new_name)
6151
      if hostname != new_name:
6152
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6153
                     hostname.name)
6154
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6155
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6156
                                    " same as given hostname '%s'") %
6157
                                    (hostname.name, self.op.new_name),
6158
                                    errors.ECODE_INVAL)
6159
      new_name = self.op.new_name = hostname.name
6160
      if (self.op.ip_check and
6161
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6162
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6163
                                   (hostname.ip, new_name),
6164
                                   errors.ECODE_NOTUNIQUE)
6165

    
6166
    instance_list = self.cfg.GetInstanceList()
6167
    if new_name in instance_list and new_name != instance.name:
6168
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6169
                                 new_name, errors.ECODE_EXISTS)
6170

    
6171
  def Exec(self, feedback_fn):
6172
    """Rename the instance.
6173

6174
    """
6175
    inst = self.instance
6176
    old_name = inst.name
6177

    
6178
    rename_file_storage = False
6179
    if (inst.disk_template in constants.DTS_FILEBASED and
6180
        self.op.new_name != inst.name):
6181
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6182
      rename_file_storage = True
6183

    
6184
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6185
    # Change the instance lock. This is definitely safe while we hold the BGL.
6186
    # Otherwise the new lock would have to be added in acquired mode.
6187
    assert self.REQ_BGL
6188
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6189
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6190

    
6191
    # re-read the instance from the configuration after rename
6192
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6193

    
6194
    if rename_file_storage:
6195
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6196
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6197
                                                     old_file_storage_dir,
6198
                                                     new_file_storage_dir)
6199
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6200
                   " (but the instance has been renamed in Ganeti)" %
6201
                   (inst.primary_node, old_file_storage_dir,
6202
                    new_file_storage_dir))
6203

    
6204
    _StartInstanceDisks(self, inst, None)
6205
    try:
6206
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6207
                                                 old_name, self.op.debug_level)
6208
      msg = result.fail_msg
6209
      if msg:
6210
        msg = ("Could not run OS rename script for instance %s on node %s"
6211
               " (but the instance has been renamed in Ganeti): %s" %
6212
               (inst.name, inst.primary_node, msg))
6213
        self.proc.LogWarning(msg)
6214
    finally:
6215
      _ShutdownInstanceDisks(self, inst)
6216

    
6217
    return inst.name
6218

    
6219

    
6220
class LUInstanceRemove(LogicalUnit):
6221
  """Remove an instance.
6222

6223
  """
6224
  HPATH = "instance-remove"
6225
  HTYPE = constants.HTYPE_INSTANCE
6226
  REQ_BGL = False
6227

    
6228
  def ExpandNames(self):
6229
    self._ExpandAndLockInstance()
6230
    self.needed_locks[locking.LEVEL_NODE] = []
6231
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6232

    
6233
  def DeclareLocks(self, level):
6234
    if level == locking.LEVEL_NODE:
6235
      self._LockInstancesNodes()
6236

    
6237
  def BuildHooksEnv(self):
6238
    """Build hooks env.
6239

6240
    This runs on master, primary and secondary nodes of the instance.
6241

6242
    """
6243
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6244
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6245
    return env
6246

    
6247
  def BuildHooksNodes(self):
6248
    """Build hooks nodes.
6249

6250
    """
6251
    nl = [self.cfg.GetMasterNode()]
6252
    nl_post = list(self.instance.all_nodes) + nl
6253
    return (nl, nl_post)
6254

    
6255
  def CheckPrereq(self):
6256
    """Check prerequisites.
6257

6258
    This checks that the instance is in the cluster.
6259

6260
    """
6261
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6262
    assert self.instance is not None, \
6263
      "Cannot retrieve locked instance %s" % self.op.instance_name
6264

    
6265
  def Exec(self, feedback_fn):
6266
    """Remove the instance.
6267

6268
    """
6269
    instance = self.instance
6270
    logging.info("Shutting down instance %s on node %s",
6271
                 instance.name, instance.primary_node)
6272

    
6273
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6274
                                             self.op.shutdown_timeout)
6275
    msg = result.fail_msg
6276
    if msg:
6277
      if self.op.ignore_failures:
6278
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6279
      else:
6280
        raise errors.OpExecError("Could not shutdown instance %s on"
6281
                                 " node %s: %s" %
6282
                                 (instance.name, instance.primary_node, msg))
6283

    
6284
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6285

    
6286

    
6287
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6288
  """Utility function to remove an instance.
6289

6290
  """
6291
  logging.info("Removing block devices for instance %s", instance.name)
6292

    
6293
  if not _RemoveDisks(lu, instance):
6294
    if not ignore_failures:
6295
      raise errors.OpExecError("Can't remove instance's disks")
6296
    feedback_fn("Warning: can't remove instance's disks")
6297

    
6298
  logging.info("Removing instance %s out of cluster config", instance.name)
6299

    
6300
  lu.cfg.RemoveInstance(instance.name)
6301

    
6302
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6303
    "Instance lock removal conflict"
6304

    
6305
  # Remove lock for the instance
6306
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6307

    
6308

    
6309
class LUInstanceQuery(NoHooksLU):
6310
  """Logical unit for querying instances.
6311

6312
  """
6313
  # pylint: disable-msg=W0142
6314
  REQ_BGL = False
6315

    
6316
  def CheckArguments(self):
6317
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6318
                             self.op.output_fields, self.op.use_locking)
6319

    
6320
  def ExpandNames(self):
6321
    self.iq.ExpandNames(self)
6322

    
6323
  def DeclareLocks(self, level):
6324
    self.iq.DeclareLocks(self, level)
6325

    
6326
  def Exec(self, feedback_fn):
6327
    return self.iq.OldStyleQuery(self)
6328

    
6329

    
6330
class LUInstanceFailover(LogicalUnit):
6331
  """Failover an instance.
6332

6333
  """
6334
  HPATH = "instance-failover"
6335
  HTYPE = constants.HTYPE_INSTANCE
6336
  REQ_BGL = False
6337

    
6338
  def CheckArguments(self):
6339
    """Check the arguments.
6340

6341
    """
6342
    self.iallocator = getattr(self.op, "iallocator", None)
6343
    self.target_node = getattr(self.op, "target_node", None)
6344

    
6345
  def ExpandNames(self):
6346
    self._ExpandAndLockInstance()
6347

    
6348
    if self.op.target_node is not None:
6349
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6350

    
6351
    self.needed_locks[locking.LEVEL_NODE] = []
6352
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6353

    
6354
    ignore_consistency = self.op.ignore_consistency
6355
    shutdown_timeout = self.op.shutdown_timeout
6356
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6357
                                       cleanup=False,
6358
                                       failover=True,
6359
                                       ignore_consistency=ignore_consistency,
6360
                                       shutdown_timeout=shutdown_timeout)
6361
    self.tasklets = [self._migrater]
6362

    
6363
  def DeclareLocks(self, level):
6364
    if level == locking.LEVEL_NODE:
6365
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6366
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6367
        if self.op.target_node is None:
6368
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6369
        else:
6370
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6371
                                                   self.op.target_node]
6372
        del self.recalculate_locks[locking.LEVEL_NODE]
6373
      else:
6374
        self._LockInstancesNodes()
6375

    
6376
  def BuildHooksEnv(self):
6377
    """Build hooks env.
6378

6379
    This runs on master, primary and secondary nodes of the instance.
6380

6381
    """
6382
    instance = self._migrater.instance
6383
    source_node = instance.primary_node
6384
    target_node = self.op.target_node
6385
    env = {
6386
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6387
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6388
      "OLD_PRIMARY": source_node,
6389
      "NEW_PRIMARY": target_node,
6390
      }
6391

    
6392
    if instance.disk_template in constants.DTS_INT_MIRROR:
6393
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6394
      env["NEW_SECONDARY"] = source_node
6395
    else:
6396
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6397

    
6398
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6399

    
6400
    return env
6401

    
6402
  def BuildHooksNodes(self):
6403
    """Build hooks nodes.
6404

6405
    """
6406
    instance = self._migrater.instance
6407
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6408
    return (nl, nl + [instance.primary_node])
6409

    
6410

    
6411
class LUInstanceMigrate(LogicalUnit):
6412
  """Migrate an instance.
6413

6414
  This is migration without shutting down, compared to the failover,
6415
  which is done with shutdown.
6416

6417
  """
6418
  HPATH = "instance-migrate"
6419
  HTYPE = constants.HTYPE_INSTANCE
6420
  REQ_BGL = False
6421

    
6422
  def ExpandNames(self):
6423
    self._ExpandAndLockInstance()
6424

    
6425
    if self.op.target_node is not None:
6426
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6427

    
6428
    self.needed_locks[locking.LEVEL_NODE] = []
6429
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6430

    
6431
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6432
                                       cleanup=self.op.cleanup,
6433
                                       failover=False,
6434
                                       fallback=self.op.allow_failover)
6435
    self.tasklets = [self._migrater]
6436

    
6437
  def DeclareLocks(self, level):
6438
    if level == locking.LEVEL_NODE:
6439
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6440
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6441
        if self.op.target_node is None:
6442
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6443
        else:
6444
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6445
                                                   self.op.target_node]
6446
        del self.recalculate_locks[locking.LEVEL_NODE]
6447
      else:
6448
        self._LockInstancesNodes()
6449

    
6450
  def BuildHooksEnv(self):
6451
    """Build hooks env.
6452

6453
    This runs on master, primary and secondary nodes of the instance.
6454

6455
    """
6456
    instance = self._migrater.instance
6457
    source_node = instance.primary_node
6458
    target_node = self.op.target_node
6459
    env = _BuildInstanceHookEnvByObject(self, instance)
6460
    env.update({
6461
      "MIGRATE_LIVE": self._migrater.live,
6462
      "MIGRATE_CLEANUP": self.op.cleanup,
6463
      "OLD_PRIMARY": source_node,
6464
      "NEW_PRIMARY": target_node,
6465
      })
6466

    
6467
    if instance.disk_template in constants.DTS_INT_MIRROR:
6468
      env["OLD_SECONDARY"] = target_node
6469
      env["NEW_SECONDARY"] = source_node
6470
    else:
6471
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6472

    
6473
    return env
6474

    
6475
  def BuildHooksNodes(self):
6476
    """Build hooks nodes.
6477

6478
    """
6479
    instance = self._migrater.instance
6480
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6481
    return (nl, nl + [instance.primary_node])
6482

    
6483

    
6484
class LUInstanceMove(LogicalUnit):
6485
  """Move an instance by data-copying.
6486

6487
  """
6488
  HPATH = "instance-move"
6489
  HTYPE = constants.HTYPE_INSTANCE
6490
  REQ_BGL = False
6491

    
6492
  def ExpandNames(self):
6493
    self._ExpandAndLockInstance()
6494
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6495
    self.op.target_node = target_node
6496
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6497
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6498

    
6499
  def DeclareLocks(self, level):
6500
    if level == locking.LEVEL_NODE:
6501
      self._LockInstancesNodes(primary_only=True)
6502

    
6503
  def BuildHooksEnv(self):
6504
    """Build hooks env.
6505

6506
    This runs on master, primary and secondary nodes of the instance.
6507

6508
    """
6509
    env = {
6510
      "TARGET_NODE": self.op.target_node,
6511
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6512
      }
6513
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6514
    return env
6515

    
6516
  def BuildHooksNodes(self):
6517
    """Build hooks nodes.
6518

6519
    """
6520
    nl = [
6521
      self.cfg.GetMasterNode(),
6522
      self.instance.primary_node,
6523
      self.op.target_node,
6524
      ]
6525
    return (nl, nl)
6526

    
6527
  def CheckPrereq(self):
6528
    """Check prerequisites.
6529

6530
    This checks that the instance is in the cluster.
6531

6532
    """
6533
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6534
    assert self.instance is not None, \
6535
      "Cannot retrieve locked instance %s" % self.op.instance_name
6536

    
6537
    node = self.cfg.GetNodeInfo(self.op.target_node)
6538
    assert node is not None, \
6539
      "Cannot retrieve locked node %s" % self.op.target_node
6540

    
6541
    self.target_node = target_node = node.name
6542

    
6543
    if target_node == instance.primary_node:
6544
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6545
                                 (instance.name, target_node),
6546
                                 errors.ECODE_STATE)
6547

    
6548
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6549

    
6550
    for idx, dsk in enumerate(instance.disks):
6551
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6552
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6553
                                   " cannot copy" % idx, errors.ECODE_STATE)
6554

    
6555
    _CheckNodeOnline(self, target_node)
6556
    _CheckNodeNotDrained(self, target_node)
6557
    _CheckNodeVmCapable(self, target_node)
6558

    
6559
    if instance.admin_up:
6560
      # check memory requirements on the secondary node
6561
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6562
                           instance.name, bep[constants.BE_MEMORY],
6563
                           instance.hypervisor)
6564
    else:
6565
      self.LogInfo("Not checking memory on the secondary node as"
6566
                   " instance will not be started")
6567

    
6568
    # check bridge existance
6569
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6570

    
6571
  def Exec(self, feedback_fn):
6572
    """Move an instance.
6573

6574
    The move is done by shutting it down on its present node, copying
6575
    the data over (slow) and starting it on the new node.
6576

6577
    """
6578
    instance = self.instance
6579

    
6580
    source_node = instance.primary_node
6581
    target_node = self.target_node
6582

    
6583
    self.LogInfo("Shutting down instance %s on source node %s",
6584
                 instance.name, source_node)
6585

    
6586
    result = self.rpc.call_instance_shutdown(source_node, instance,
6587
                                             self.op.shutdown_timeout)
6588
    msg = result.fail_msg
6589
    if msg:
6590
      if self.op.ignore_consistency:
6591
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6592
                             " Proceeding anyway. Please make sure node"
6593
                             " %s is down. Error details: %s",
6594
                             instance.name, source_node, source_node, msg)
6595
      else:
6596
        raise errors.OpExecError("Could not shutdown instance %s on"
6597
                                 " node %s: %s" %
6598
                                 (instance.name, source_node, msg))
6599

    
6600
    # create the target disks
6601
    try:
6602
      _CreateDisks(self, instance, target_node=target_node)
6603
    except errors.OpExecError:
6604
      self.LogWarning("Device creation failed, reverting...")
6605
      try:
6606
        _RemoveDisks(self, instance, target_node=target_node)
6607
      finally:
6608
        self.cfg.ReleaseDRBDMinors(instance.name)
6609
        raise
6610

    
6611
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6612

    
6613
    errs = []
6614
    # activate, get path, copy the data over
6615
    for idx, disk in enumerate(instance.disks):
6616
      self.LogInfo("Copying data for disk %d", idx)
6617
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6618
                                               instance.name, True, idx)
6619
      if result.fail_msg:
6620
        self.LogWarning("Can't assemble newly created disk %d: %s",
6621
                        idx, result.fail_msg)
6622
        errs.append(result.fail_msg)
6623
        break
6624
      dev_path = result.payload
6625
      result = self.rpc.call_blockdev_export(source_node, disk,
6626
                                             target_node, dev_path,
6627
                                             cluster_name)
6628
      if result.fail_msg:
6629
        self.LogWarning("Can't copy data over for disk %d: %s",
6630
                        idx, result.fail_msg)
6631
        errs.append(result.fail_msg)
6632
        break
6633

    
6634
    if errs:
6635
      self.LogWarning("Some disks failed to copy, aborting")
6636
      try:
6637
        _RemoveDisks(self, instance, target_node=target_node)
6638
      finally:
6639
        self.cfg.ReleaseDRBDMinors(instance.name)
6640
        raise errors.OpExecError("Errors during disk copy: %s" %
6641
                                 (",".join(errs),))
6642

    
6643
    instance.primary_node = target_node
6644
    self.cfg.Update(instance, feedback_fn)
6645

    
6646
    self.LogInfo("Removing the disks on the original node")
6647
    _RemoveDisks(self, instance, target_node=source_node)
6648

    
6649
    # Only start the instance if it's marked as up
6650
    if instance.admin_up:
6651
      self.LogInfo("Starting instance %s on node %s",
6652
                   instance.name, target_node)
6653

    
6654
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6655
                                           ignore_secondaries=True)
6656
      if not disks_ok:
6657
        _ShutdownInstanceDisks(self, instance)
6658
        raise errors.OpExecError("Can't activate the instance's disks")
6659

    
6660
      result = self.rpc.call_instance_start(target_node, instance,
6661
                                            None, None, False)
6662
      msg = result.fail_msg
6663
      if msg:
6664
        _ShutdownInstanceDisks(self, instance)
6665
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6666
                                 (instance.name, target_node, msg))
6667

    
6668

    
6669
class LUNodeMigrate(LogicalUnit):
6670
  """Migrate all instances from a node.
6671

6672
  """
6673
  HPATH = "node-migrate"
6674
  HTYPE = constants.HTYPE_NODE
6675
  REQ_BGL = False
6676

    
6677
  def CheckArguments(self):
6678
    pass
6679

    
6680
  def ExpandNames(self):
6681
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6682

    
6683
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6684
    self.needed_locks = {
6685
      locking.LEVEL_NODE: [self.op.node_name],
6686
      }
6687

    
6688
  def BuildHooksEnv(self):
6689
    """Build hooks env.
6690

6691
    This runs on the master, the primary and all the secondaries.
6692

6693
    """
6694
    return {
6695
      "NODE_NAME": self.op.node_name,
6696
      }
6697

    
6698
  def BuildHooksNodes(self):
6699
    """Build hooks nodes.
6700

6701
    """
6702
    nl = [self.cfg.GetMasterNode()]
6703
    return (nl, nl)
6704

    
6705
  def CheckPrereq(self):
6706
    pass
6707

    
6708
  def Exec(self, feedback_fn):
6709
    # Prepare jobs for migration instances
6710
    jobs = [
6711
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
6712
                                 mode=self.op.mode,
6713
                                 live=self.op.live,
6714
                                 iallocator=self.op.iallocator,
6715
                                 target_node=self.op.target_node)]
6716
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6717
      ]
6718

    
6719
    # TODO: Run iallocator in this opcode and pass correct placement options to
6720
    # OpInstanceMigrate. Since other jobs can modify the cluster between
6721
    # running the iallocator and the actual migration, a good consistency model
6722
    # will have to be found.
6723

    
6724
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
6725
            frozenset([self.op.node_name]))
6726

    
6727
    return ResultWithJobs(jobs)
6728

    
6729

    
6730
class TLMigrateInstance(Tasklet):
6731
  """Tasklet class for instance migration.
6732

6733
  @type live: boolean
6734
  @ivar live: whether the migration will be done live or non-live;
6735
      this variable is initalized only after CheckPrereq has run
6736
  @type cleanup: boolean
6737
  @ivar cleanup: Wheater we cleanup from a failed migration
6738
  @type iallocator: string
6739
  @ivar iallocator: The iallocator used to determine target_node
6740
  @type target_node: string
6741
  @ivar target_node: If given, the target_node to reallocate the instance to
6742
  @type failover: boolean
6743
  @ivar failover: Whether operation results in failover or migration
6744
  @type fallback: boolean
6745
  @ivar fallback: Whether fallback to failover is allowed if migration not
6746
                  possible
6747
  @type ignore_consistency: boolean
6748
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6749
                            and target node
6750
  @type shutdown_timeout: int
6751
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6752

6753
  """
6754
  def __init__(self, lu, instance_name, cleanup=False,
6755
               failover=False, fallback=False,
6756
               ignore_consistency=False,
6757
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6758
    """Initializes this class.
6759

6760
    """
6761
    Tasklet.__init__(self, lu)
6762

    
6763
    # Parameters
6764
    self.instance_name = instance_name
6765
    self.cleanup = cleanup
6766
    self.live = False # will be overridden later
6767
    self.failover = failover
6768
    self.fallback = fallback
6769
    self.ignore_consistency = ignore_consistency
6770
    self.shutdown_timeout = shutdown_timeout
6771

    
6772
  def CheckPrereq(self):
6773
    """Check prerequisites.
6774

6775
    This checks that the instance is in the cluster.
6776

6777
    """
6778
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6779
    instance = self.cfg.GetInstanceInfo(instance_name)
6780
    assert instance is not None
6781
    self.instance = instance
6782

    
6783
    if (not self.cleanup and not instance.admin_up and not self.failover and
6784
        self.fallback):
6785
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6786
                      " to failover")
6787
      self.failover = True
6788

    
6789
    if instance.disk_template not in constants.DTS_MIRRORED:
6790
      if self.failover:
6791
        text = "failovers"
6792
      else:
6793
        text = "migrations"
6794
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6795
                                 " %s" % (instance.disk_template, text),
6796
                                 errors.ECODE_STATE)
6797

    
6798
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6799
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6800

    
6801
      if self.lu.op.iallocator:
6802
        self._RunAllocator()
6803
      else:
6804
        # We set set self.target_node as it is required by
6805
        # BuildHooksEnv
6806
        self.target_node = self.lu.op.target_node
6807

    
6808
      # self.target_node is already populated, either directly or by the
6809
      # iallocator run
6810
      target_node = self.target_node
6811
      if self.target_node == instance.primary_node:
6812
        raise errors.OpPrereqError("Cannot migrate instance %s"
6813
                                   " to its primary (%s)" %
6814
                                   (instance.name, instance.primary_node))
6815

    
6816
      if len(self.lu.tasklets) == 1:
6817
        # It is safe to release locks only when we're the only tasklet
6818
        # in the LU
6819
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6820
                      keep=[instance.primary_node, self.target_node])
6821

    
6822
    else:
6823
      secondary_nodes = instance.secondary_nodes
6824
      if not secondary_nodes:
6825
        raise errors.ConfigurationError("No secondary node but using"
6826
                                        " %s disk template" %
6827
                                        instance.disk_template)
6828
      target_node = secondary_nodes[0]
6829
      if self.lu.op.iallocator or (self.lu.op.target_node and
6830
                                   self.lu.op.target_node != target_node):
6831
        if self.failover:
6832
          text = "failed over"
6833
        else:
6834
          text = "migrated"
6835
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6836
                                   " be %s to arbitrary nodes"
6837
                                   " (neither an iallocator nor a target"
6838
                                   " node can be passed)" %
6839
                                   (instance.disk_template, text),
6840
                                   errors.ECODE_INVAL)
6841

    
6842
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6843

    
6844
    # check memory requirements on the secondary node
6845
    if not self.failover or instance.admin_up:
6846
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6847
                           instance.name, i_be[constants.BE_MEMORY],
6848
                           instance.hypervisor)
6849
    else:
6850
      self.lu.LogInfo("Not checking memory on the secondary node as"
6851
                      " instance will not be started")
6852

    
6853
    # check bridge existance
6854
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6855

    
6856
    if not self.cleanup:
6857
      _CheckNodeNotDrained(self.lu, target_node)
6858
      if not self.failover:
6859
        result = self.rpc.call_instance_migratable(instance.primary_node,
6860
                                                   instance)
6861
        if result.fail_msg and self.fallback:
6862
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6863
                          " failover")
6864
          self.failover = True
6865
        else:
6866
          result.Raise("Can't migrate, please use failover",
6867
                       prereq=True, ecode=errors.ECODE_STATE)
6868

    
6869
    assert not (self.failover and self.cleanup)
6870

    
6871
    if not self.failover:
6872
      if self.lu.op.live is not None and self.lu.op.mode is not None:
6873
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6874
                                   " parameters are accepted",
6875
                                   errors.ECODE_INVAL)
6876
      if self.lu.op.live is not None:
6877
        if self.lu.op.live:
6878
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
6879
        else:
6880
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6881
        # reset the 'live' parameter to None so that repeated
6882
        # invocations of CheckPrereq do not raise an exception
6883
        self.lu.op.live = None
6884
      elif self.lu.op.mode is None:
6885
        # read the default value from the hypervisor
6886
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6887
                                                skip_globals=False)
6888
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6889

    
6890
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6891
    else:
6892
      # Failover is never live
6893
      self.live = False
6894

    
6895
  def _RunAllocator(self):
6896
    """Run the allocator based on input opcode.
6897

6898
    """
6899
    ial = IAllocator(self.cfg, self.rpc,
6900
                     mode=constants.IALLOCATOR_MODE_RELOC,
6901
                     name=self.instance_name,
6902
                     # TODO See why hail breaks with a single node below
6903
                     relocate_from=[self.instance.primary_node,
6904
                                    self.instance.primary_node],
6905
                     )
6906

    
6907
    ial.Run(self.lu.op.iallocator)
6908

    
6909
    if not ial.success:
6910
      raise errors.OpPrereqError("Can't compute nodes using"
6911
                                 " iallocator '%s': %s" %
6912
                                 (self.lu.op.iallocator, ial.info),
6913
                                 errors.ECODE_NORES)
6914
    if len(ial.result) != ial.required_nodes:
6915
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6916
                                 " of nodes (%s), required %s" %
6917
                                 (self.lu.op.iallocator, len(ial.result),
6918
                                  ial.required_nodes), errors.ECODE_FAULT)
6919
    self.target_node = ial.result[0]
6920
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6921
                 self.instance_name, self.lu.op.iallocator,
6922
                 utils.CommaJoin(ial.result))
6923

    
6924
  def _WaitUntilSync(self):
6925
    """Poll with custom rpc for disk sync.
6926

6927
    This uses our own step-based rpc call.
6928

6929
    """
6930
    self.feedback_fn("* wait until resync is done")
6931
    all_done = False
6932
    while not all_done:
6933
      all_done = True
6934
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6935
                                            self.nodes_ip,
6936
                                            self.instance.disks)
6937
      min_percent = 100
6938
      for node, nres in result.items():
6939
        nres.Raise("Cannot resync disks on node %s" % node)
6940
        node_done, node_percent = nres.payload
6941
        all_done = all_done and node_done
6942
        if node_percent is not None:
6943
          min_percent = min(min_percent, node_percent)
6944
      if not all_done:
6945
        if min_percent < 100:
6946
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6947
        time.sleep(2)
6948

    
6949
  def _EnsureSecondary(self, node):
6950
    """Demote a node to secondary.
6951

6952
    """
6953
    self.feedback_fn("* switching node %s to secondary mode" % node)
6954

    
6955
    for dev in self.instance.disks:
6956
      self.cfg.SetDiskID(dev, node)
6957

    
6958
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6959
                                          self.instance.disks)
6960
    result.Raise("Cannot change disk to secondary on node %s" % node)
6961

    
6962
  def _GoStandalone(self):
6963
    """Disconnect from the network.
6964

6965
    """
6966
    self.feedback_fn("* changing into standalone mode")
6967
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6968
                                               self.instance.disks)
6969
    for node, nres in result.items():
6970
      nres.Raise("Cannot disconnect disks node %s" % node)
6971

    
6972
  def _GoReconnect(self, multimaster):
6973
    """Reconnect to the network.
6974

6975
    """
6976
    if multimaster:
6977
      msg = "dual-master"
6978
    else:
6979
      msg = "single-master"
6980
    self.feedback_fn("* changing disks into %s mode" % msg)
6981
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6982
                                           self.instance.disks,
6983
                                           self.instance.name, multimaster)
6984
    for node, nres in result.items():
6985
      nres.Raise("Cannot change disks config on node %s" % node)
6986

    
6987
  def _ExecCleanup(self):
6988
    """Try to cleanup after a failed migration.
6989

6990
    The cleanup is done by:
6991
      - check that the instance is running only on one node
6992
        (and update the config if needed)
6993
      - change disks on its secondary node to secondary
6994
      - wait until disks are fully synchronized
6995
      - disconnect from the network
6996
      - change disks into single-master mode
6997
      - wait again until disks are fully synchronized
6998

6999
    """
7000
    instance = self.instance
7001
    target_node = self.target_node
7002
    source_node = self.source_node
7003

    
7004
    # check running on only one node
7005
    self.feedback_fn("* checking where the instance actually runs"
7006
                     " (if this hangs, the hypervisor might be in"
7007
                     " a bad state)")
7008
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7009
    for node, result in ins_l.items():
7010
      result.Raise("Can't contact node %s" % node)
7011

    
7012
    runningon_source = instance.name in ins_l[source_node].payload
7013
    runningon_target = instance.name in ins_l[target_node].payload
7014

    
7015
    if runningon_source and runningon_target:
7016
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7017
                               " or the hypervisor is confused; you will have"
7018
                               " to ensure manually that it runs only on one"
7019
                               " and restart this operation")
7020

    
7021
    if not (runningon_source or runningon_target):
7022
      raise errors.OpExecError("Instance does not seem to be running at all;"
7023
                               " in this case it's safer to repair by"
7024
                               " running 'gnt-instance stop' to ensure disk"
7025
                               " shutdown, and then restarting it")
7026

    
7027
    if runningon_target:
7028
      # the migration has actually succeeded, we need to update the config
7029
      self.feedback_fn("* instance running on secondary node (%s),"
7030
                       " updating config" % target_node)
7031
      instance.primary_node = target_node
7032
      self.cfg.Update(instance, self.feedback_fn)
7033
      demoted_node = source_node
7034
    else:
7035
      self.feedback_fn("* instance confirmed to be running on its"
7036
                       " primary node (%s)" % source_node)
7037
      demoted_node = target_node
7038

    
7039
    if instance.disk_template in constants.DTS_INT_MIRROR:
7040
      self._EnsureSecondary(demoted_node)
7041
      try:
7042
        self._WaitUntilSync()
7043
      except errors.OpExecError:
7044
        # we ignore here errors, since if the device is standalone, it
7045
        # won't be able to sync
7046
        pass
7047
      self._GoStandalone()
7048
      self._GoReconnect(False)
7049
      self._WaitUntilSync()
7050

    
7051
    self.feedback_fn("* done")
7052

    
7053
  def _RevertDiskStatus(self):
7054
    """Try to revert the disk status after a failed migration.
7055

7056
    """
7057
    target_node = self.target_node
7058
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7059
      return
7060

    
7061
    try:
7062
      self._EnsureSecondary(target_node)
7063
      self._GoStandalone()
7064
      self._GoReconnect(False)
7065
      self._WaitUntilSync()
7066
    except errors.OpExecError, err:
7067
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7068
                         " please try to recover the instance manually;"
7069
                         " error '%s'" % str(err))
7070

    
7071
  def _AbortMigration(self):
7072
    """Call the hypervisor code to abort a started migration.
7073

7074
    """
7075
    instance = self.instance
7076
    target_node = self.target_node
7077
    migration_info = self.migration_info
7078

    
7079
    abort_result = self.rpc.call_finalize_migration(target_node,
7080
                                                    instance,
7081
                                                    migration_info,
7082
                                                    False)
7083
    abort_msg = abort_result.fail_msg
7084
    if abort_msg:
7085
      logging.error("Aborting migration failed on target node %s: %s",
7086
                    target_node, abort_msg)
7087
      # Don't raise an exception here, as we stil have to try to revert the
7088
      # disk status, even if this step failed.
7089

    
7090
  def _ExecMigration(self):
7091
    """Migrate an instance.
7092

7093
    The migrate is done by:
7094
      - change the disks into dual-master mode
7095
      - wait until disks are fully synchronized again
7096
      - migrate the instance
7097
      - change disks on the new secondary node (the old primary) to secondary
7098
      - wait until disks are fully synchronized
7099
      - change disks into single-master mode
7100

7101
    """
7102
    instance = self.instance
7103
    target_node = self.target_node
7104
    source_node = self.source_node
7105

    
7106
    self.feedback_fn("* checking disk consistency between source and target")
7107
    for dev in instance.disks:
7108
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7109
        raise errors.OpExecError("Disk %s is degraded or not fully"
7110
                                 " synchronized on target node,"
7111
                                 " aborting migration" % dev.iv_name)
7112

    
7113
    # First get the migration information from the remote node
7114
    result = self.rpc.call_migration_info(source_node, instance)
7115
    msg = result.fail_msg
7116
    if msg:
7117
      log_err = ("Failed fetching source migration information from %s: %s" %
7118
                 (source_node, msg))
7119
      logging.error(log_err)
7120
      raise errors.OpExecError(log_err)
7121

    
7122
    self.migration_info = migration_info = result.payload
7123

    
7124
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7125
      # Then switch the disks to master/master mode
7126
      self._EnsureSecondary(target_node)
7127
      self._GoStandalone()
7128
      self._GoReconnect(True)
7129
      self._WaitUntilSync()
7130

    
7131
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7132
    result = self.rpc.call_accept_instance(target_node,
7133
                                           instance,
7134
                                           migration_info,
7135
                                           self.nodes_ip[target_node])
7136

    
7137
    msg = result.fail_msg
7138
    if msg:
7139
      logging.error("Instance pre-migration failed, trying to revert"
7140
                    " disk status: %s", msg)
7141
      self.feedback_fn("Pre-migration failed, aborting")
7142
      self._AbortMigration()
7143
      self._RevertDiskStatus()
7144
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7145
                               (instance.name, msg))
7146

    
7147
    self.feedback_fn("* migrating instance to %s" % target_node)
7148
    result = self.rpc.call_instance_migrate(source_node, instance,
7149
                                            self.nodes_ip[target_node],
7150
                                            self.live)
7151
    msg = result.fail_msg
7152
    if msg:
7153
      logging.error("Instance migration failed, trying to revert"
7154
                    " disk status: %s", msg)
7155
      self.feedback_fn("Migration failed, aborting")
7156
      self._AbortMigration()
7157
      self._RevertDiskStatus()
7158
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7159
                               (instance.name, msg))
7160

    
7161
    instance.primary_node = target_node
7162
    # distribute new instance config to the other nodes
7163
    self.cfg.Update(instance, self.feedback_fn)
7164

    
7165
    result = self.rpc.call_finalize_migration(target_node,
7166
                                              instance,
7167
                                              migration_info,
7168
                                              True)
7169
    msg = result.fail_msg
7170
    if msg:
7171
      logging.error("Instance migration succeeded, but finalization failed:"
7172
                    " %s", msg)
7173
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7174
                               msg)
7175

    
7176
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7177
      self._EnsureSecondary(source_node)
7178
      self._WaitUntilSync()
7179
      self._GoStandalone()
7180
      self._GoReconnect(False)
7181
      self._WaitUntilSync()
7182

    
7183
    self.feedback_fn("* done")
7184

    
7185
  def _ExecFailover(self):
7186
    """Failover an instance.
7187

7188
    The failover is done by shutting it down on its present node and
7189
    starting it on the secondary.
7190

7191
    """
7192
    instance = self.instance
7193
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7194

    
7195
    source_node = instance.primary_node
7196
    target_node = self.target_node
7197

    
7198
    if instance.admin_up:
7199
      self.feedback_fn("* checking disk consistency between source and target")
7200
      for dev in instance.disks:
7201
        # for drbd, these are drbd over lvm
7202
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7203
          if primary_node.offline:
7204
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7205
                             " target node %s" %
7206
                             (primary_node.name, dev.iv_name, target_node))
7207
          elif not self.ignore_consistency:
7208
            raise errors.OpExecError("Disk %s is degraded on target node,"
7209
                                     " aborting failover" % dev.iv_name)
7210
    else:
7211
      self.feedback_fn("* not checking disk consistency as instance is not"
7212
                       " running")
7213

    
7214
    self.feedback_fn("* shutting down instance on source node")
7215
    logging.info("Shutting down instance %s on node %s",
7216
                 instance.name, source_node)
7217

    
7218
    result = self.rpc.call_instance_shutdown(source_node, instance,
7219
                                             self.shutdown_timeout)
7220
    msg = result.fail_msg
7221
    if msg:
7222
      if self.ignore_consistency or primary_node.offline:
7223
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7224
                           " proceeding anyway; please make sure node"
7225
                           " %s is down; error details: %s",
7226
                           instance.name, source_node, source_node, msg)
7227
      else:
7228
        raise errors.OpExecError("Could not shutdown instance %s on"
7229
                                 " node %s: %s" %
7230
                                 (instance.name, source_node, msg))
7231

    
7232
    self.feedback_fn("* deactivating the instance's disks on source node")
7233
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7234
      raise errors.OpExecError("Can't shut down the instance's disks")
7235

    
7236
    instance.primary_node = target_node
7237
    # distribute new instance config to the other nodes
7238
    self.cfg.Update(instance, self.feedback_fn)
7239

    
7240
    # Only start the instance if it's marked as up
7241
    if instance.admin_up:
7242
      self.feedback_fn("* activating the instance's disks on target node %s" %
7243
                       target_node)
7244
      logging.info("Starting instance %s on node %s",
7245
                   instance.name, target_node)
7246

    
7247
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7248
                                           ignore_secondaries=True)
7249
      if not disks_ok:
7250
        _ShutdownInstanceDisks(self.lu, instance)
7251
        raise errors.OpExecError("Can't activate the instance's disks")
7252

    
7253
      self.feedback_fn("* starting the instance on the target node %s" %
7254
                       target_node)
7255
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7256
                                            False)
7257
      msg = result.fail_msg
7258
      if msg:
7259
        _ShutdownInstanceDisks(self.lu, instance)
7260
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7261
                                 (instance.name, target_node, msg))
7262

    
7263
  def Exec(self, feedback_fn):
7264
    """Perform the migration.
7265

7266
    """
7267
    self.feedback_fn = feedback_fn
7268
    self.source_node = self.instance.primary_node
7269

    
7270
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7271
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7272
      self.target_node = self.instance.secondary_nodes[0]
7273
      # Otherwise self.target_node has been populated either
7274
      # directly, or through an iallocator.
7275

    
7276
    self.all_nodes = [self.source_node, self.target_node]
7277
    self.nodes_ip = {
7278
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
7279
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
7280
      }
7281

    
7282
    if self.failover:
7283
      feedback_fn("Failover instance %s" % self.instance.name)
7284
      self._ExecFailover()
7285
    else:
7286
      feedback_fn("Migrating instance %s" % self.instance.name)
7287

    
7288
      if self.cleanup:
7289
        return self._ExecCleanup()
7290
      else:
7291
        return self._ExecMigration()
7292

    
7293

    
7294
def _CreateBlockDev(lu, node, instance, device, force_create,
7295
                    info, force_open):
7296
  """Create a tree of block devices on a given node.
7297

7298
  If this device type has to be created on secondaries, create it and
7299
  all its children.
7300

7301
  If not, just recurse to children keeping the same 'force' value.
7302

7303
  @param lu: the lu on whose behalf we execute
7304
  @param node: the node on which to create the device
7305
  @type instance: L{objects.Instance}
7306
  @param instance: the instance which owns the device
7307
  @type device: L{objects.Disk}
7308
  @param device: the device to create
7309
  @type force_create: boolean
7310
  @param force_create: whether to force creation of this device; this
7311
      will be change to True whenever we find a device which has
7312
      CreateOnSecondary() attribute
7313
  @param info: the extra 'metadata' we should attach to the device
7314
      (this will be represented as a LVM tag)
7315
  @type force_open: boolean
7316
  @param force_open: this parameter will be passes to the
7317
      L{backend.BlockdevCreate} function where it specifies
7318
      whether we run on primary or not, and it affects both
7319
      the child assembly and the device own Open() execution
7320

7321
  """
7322
  if device.CreateOnSecondary():
7323
    force_create = True
7324

    
7325
  if device.children:
7326
    for child in device.children:
7327
      _CreateBlockDev(lu, node, instance, child, force_create,
7328
                      info, force_open)
7329

    
7330
  if not force_create:
7331
    return
7332

    
7333
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7334

    
7335

    
7336
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7337
  """Create a single block device on a given node.
7338

7339
  This will not recurse over children of the device, so they must be
7340
  created in advance.
7341

7342
  @param lu: the lu on whose behalf we execute
7343
  @param node: the node on which to create the device
7344
  @type instance: L{objects.Instance}
7345
  @param instance: the instance which owns the device
7346
  @type device: L{objects.Disk}
7347
  @param device: the device to create
7348
  @param info: the extra 'metadata' we should attach to the device
7349
      (this will be represented as a LVM tag)
7350
  @type force_open: boolean
7351
  @param force_open: this parameter will be passes to the
7352
      L{backend.BlockdevCreate} function where it specifies
7353
      whether we run on primary or not, and it affects both
7354
      the child assembly and the device own Open() execution
7355

7356
  """
7357
  lu.cfg.SetDiskID(device, node)
7358
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7359
                                       instance.name, force_open, info)
7360
  result.Raise("Can't create block device %s on"
7361
               " node %s for instance %s" % (device, node, instance.name))
7362
  if device.physical_id is None:
7363
    device.physical_id = result.payload
7364

    
7365

    
7366
def _GenerateUniqueNames(lu, exts):
7367
  """Generate a suitable LV name.
7368

7369
  This will generate a logical volume name for the given instance.
7370

7371
  """
7372
  results = []
7373
  for val in exts:
7374
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7375
    results.append("%s%s" % (new_id, val))
7376
  return results
7377

    
7378

    
7379
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7380
                         iv_name, p_minor, s_minor):
7381
  """Generate a drbd8 device complete with its children.
7382

7383
  """
7384
  assert len(vgnames) == len(names) == 2
7385
  port = lu.cfg.AllocatePort()
7386
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7387
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7388
                          logical_id=(vgnames[0], names[0]))
7389
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7390
                          logical_id=(vgnames[1], names[1]))
7391
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7392
                          logical_id=(primary, secondary, port,
7393
                                      p_minor, s_minor,
7394
                                      shared_secret),
7395
                          children=[dev_data, dev_meta],
7396
                          iv_name=iv_name)
7397
  return drbd_dev
7398

    
7399

    
7400
def _GenerateDiskTemplate(lu, template_name,
7401
                          instance_name, primary_node,
7402
                          secondary_nodes, disk_info,
7403
                          file_storage_dir, file_driver,
7404
                          base_index, feedback_fn):
7405
  """Generate the entire disk layout for a given template type.
7406

7407
  """
7408
  #TODO: compute space requirements
7409

    
7410
  vgname = lu.cfg.GetVGName()
7411
  disk_count = len(disk_info)
7412
  disks = []
7413
  if template_name == constants.DT_DISKLESS:
7414
    pass
7415
  elif template_name == constants.DT_PLAIN:
7416
    if len(secondary_nodes) != 0:
7417
      raise errors.ProgrammerError("Wrong template configuration")
7418

    
7419
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7420
                                      for i in range(disk_count)])
7421
    for idx, disk in enumerate(disk_info):
7422
      disk_index = idx + base_index
7423
      vg = disk.get(constants.IDISK_VG, vgname)
7424
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7425
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7426
                              size=disk[constants.IDISK_SIZE],
7427
                              logical_id=(vg, names[idx]),
7428
                              iv_name="disk/%d" % disk_index,
7429
                              mode=disk[constants.IDISK_MODE])
7430
      disks.append(disk_dev)
7431
  elif template_name == constants.DT_DRBD8:
7432
    if len(secondary_nodes) != 1:
7433
      raise errors.ProgrammerError("Wrong template configuration")
7434
    remote_node = secondary_nodes[0]
7435
    minors = lu.cfg.AllocateDRBDMinor(
7436
      [primary_node, remote_node] * len(disk_info), instance_name)
7437

    
7438
    names = []
7439
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7440
                                               for i in range(disk_count)]):
7441
      names.append(lv_prefix + "_data")
7442
      names.append(lv_prefix + "_meta")
7443
    for idx, disk in enumerate(disk_info):
7444
      disk_index = idx + base_index
7445
      data_vg = disk.get(constants.IDISK_VG, vgname)
7446
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7447
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7448
                                      disk[constants.IDISK_SIZE],
7449
                                      [data_vg, meta_vg],
7450
                                      names[idx * 2:idx * 2 + 2],
7451
                                      "disk/%d" % disk_index,
7452
                                      minors[idx * 2], minors[idx * 2 + 1])
7453
      disk_dev.mode = disk[constants.IDISK_MODE]
7454
      disks.append(disk_dev)
7455
  elif template_name == constants.DT_FILE:
7456
    if len(secondary_nodes) != 0:
7457
      raise errors.ProgrammerError("Wrong template configuration")
7458

    
7459
    opcodes.RequireFileStorage()
7460

    
7461
    for idx, disk in enumerate(disk_info):
7462
      disk_index = idx + base_index
7463
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7464
                              size=disk[constants.IDISK_SIZE],
7465
                              iv_name="disk/%d" % disk_index,
7466
                              logical_id=(file_driver,
7467
                                          "%s/disk%d" % (file_storage_dir,
7468
                                                         disk_index)),
7469
                              mode=disk[constants.IDISK_MODE])
7470
      disks.append(disk_dev)
7471
  elif template_name == constants.DT_SHARED_FILE:
7472
    if len(secondary_nodes) != 0:
7473
      raise errors.ProgrammerError("Wrong template configuration")
7474

    
7475
    opcodes.RequireSharedFileStorage()
7476

    
7477
    for idx, disk in enumerate(disk_info):
7478
      disk_index = idx + base_index
7479
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7480
                              size=disk[constants.IDISK_SIZE],
7481
                              iv_name="disk/%d" % disk_index,
7482
                              logical_id=(file_driver,
7483
                                          "%s/disk%d" % (file_storage_dir,
7484
                                                         disk_index)),
7485
                              mode=disk[constants.IDISK_MODE])
7486
      disks.append(disk_dev)
7487
  elif template_name == constants.DT_BLOCK:
7488
    if len(secondary_nodes) != 0:
7489
      raise errors.ProgrammerError("Wrong template configuration")
7490

    
7491
    for idx, disk in enumerate(disk_info):
7492
      disk_index = idx + base_index
7493
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7494
                              size=disk[constants.IDISK_SIZE],
7495
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7496
                                          disk[constants.IDISK_ADOPT]),
7497
                              iv_name="disk/%d" % disk_index,
7498
                              mode=disk[constants.IDISK_MODE])
7499
      disks.append(disk_dev)
7500

    
7501
  else:
7502
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7503
  return disks
7504

    
7505

    
7506
def _GetInstanceInfoText(instance):
7507
  """Compute that text that should be added to the disk's metadata.
7508

7509
  """
7510
  return "originstname+%s" % instance.name
7511

    
7512

    
7513
def _CalcEta(time_taken, written, total_size):
7514
  """Calculates the ETA based on size written and total size.
7515

7516
  @param time_taken: The time taken so far
7517
  @param written: amount written so far
7518
  @param total_size: The total size of data to be written
7519
  @return: The remaining time in seconds
7520

7521
  """
7522
  avg_time = time_taken / float(written)
7523
  return (total_size - written) * avg_time
7524

    
7525

    
7526
def _WipeDisks(lu, instance):
7527
  """Wipes instance disks.
7528

7529
  @type lu: L{LogicalUnit}
7530
  @param lu: the logical unit on whose behalf we execute
7531
  @type instance: L{objects.Instance}
7532
  @param instance: the instance whose disks we should create
7533
  @return: the success of the wipe
7534

7535
  """
7536
  node = instance.primary_node
7537

    
7538
  for device in instance.disks:
7539
    lu.cfg.SetDiskID(device, node)
7540

    
7541
  logging.info("Pause sync of instance %s disks", instance.name)
7542
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7543

    
7544
  for idx, success in enumerate(result.payload):
7545
    if not success:
7546
      logging.warn("pause-sync of instance %s for disks %d failed",
7547
                   instance.name, idx)
7548

    
7549
  try:
7550
    for idx, device in enumerate(instance.disks):
7551
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7552
      # MAX_WIPE_CHUNK at max
7553
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7554
                            constants.MIN_WIPE_CHUNK_PERCENT)
7555
      # we _must_ make this an int, otherwise rounding errors will
7556
      # occur
7557
      wipe_chunk_size = int(wipe_chunk_size)
7558

    
7559
      lu.LogInfo("* Wiping disk %d", idx)
7560
      logging.info("Wiping disk %d for instance %s, node %s using"
7561
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7562

    
7563
      offset = 0
7564
      size = device.size
7565
      last_output = 0
7566
      start_time = time.time()
7567

    
7568
      while offset < size:
7569
        wipe_size = min(wipe_chunk_size, size - offset)
7570
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7571
                      idx, offset, wipe_size)
7572
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7573
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7574
                     (idx, offset, wipe_size))
7575
        now = time.time()
7576
        offset += wipe_size
7577
        if now - last_output >= 60:
7578
          eta = _CalcEta(now - start_time, offset, size)
7579
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7580
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7581
          last_output = now
7582
  finally:
7583
    logging.info("Resume sync of instance %s disks", instance.name)
7584

    
7585
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7586

    
7587
    for idx, success in enumerate(result.payload):
7588
      if not success:
7589
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7590
                      " look at the status and troubleshoot the issue", idx)
7591
        logging.warn("resume-sync of instance %s for disks %d failed",
7592
                     instance.name, idx)
7593

    
7594

    
7595
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7596
  """Create all disks for an instance.
7597

7598
  This abstracts away some work from AddInstance.
7599

7600
  @type lu: L{LogicalUnit}
7601
  @param lu: the logical unit on whose behalf we execute
7602
  @type instance: L{objects.Instance}
7603
  @param instance: the instance whose disks we should create
7604
  @type to_skip: list
7605
  @param to_skip: list of indices to skip
7606
  @type target_node: string
7607
  @param target_node: if passed, overrides the target node for creation
7608
  @rtype: boolean
7609
  @return: the success of the creation
7610

7611
  """
7612
  info = _GetInstanceInfoText(instance)
7613
  if target_node is None:
7614
    pnode = instance.primary_node
7615
    all_nodes = instance.all_nodes
7616
  else:
7617
    pnode = target_node
7618
    all_nodes = [pnode]
7619

    
7620
  if instance.disk_template in constants.DTS_FILEBASED:
7621
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7622
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7623

    
7624
    result.Raise("Failed to create directory '%s' on"
7625
                 " node %s" % (file_storage_dir, pnode))
7626

    
7627
  # Note: this needs to be kept in sync with adding of disks in
7628
  # LUInstanceSetParams
7629
  for idx, device in enumerate(instance.disks):
7630
    if to_skip and idx in to_skip:
7631
      continue
7632
    logging.info("Creating volume %s for instance %s",
7633
                 device.iv_name, instance.name)
7634
    #HARDCODE
7635
    for node in all_nodes:
7636
      f_create = node == pnode
7637
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7638

    
7639

    
7640
def _RemoveDisks(lu, instance, target_node=None):
7641
  """Remove all disks for an instance.
7642

7643
  This abstracts away some work from `AddInstance()` and
7644
  `RemoveInstance()`. Note that in case some of the devices couldn't
7645
  be removed, the removal will continue with the other ones (compare
7646
  with `_CreateDisks()`).
7647

7648
  @type lu: L{LogicalUnit}
7649
  @param lu: the logical unit on whose behalf we execute
7650
  @type instance: L{objects.Instance}
7651
  @param instance: the instance whose disks we should remove
7652
  @type target_node: string
7653
  @param target_node: used to override the node on which to remove the disks
7654
  @rtype: boolean
7655
  @return: the success of the removal
7656

7657
  """
7658
  logging.info("Removing block devices for instance %s", instance.name)
7659

    
7660
  all_result = True
7661
  for device in instance.disks:
7662
    if target_node:
7663
      edata = [(target_node, device)]
7664
    else:
7665
      edata = device.ComputeNodeTree(instance.primary_node)
7666
    for node, disk in edata:
7667
      lu.cfg.SetDiskID(disk, node)
7668
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7669
      if msg:
7670
        lu.LogWarning("Could not remove block device %s on node %s,"
7671
                      " continuing anyway: %s", device.iv_name, node, msg)
7672
        all_result = False
7673

    
7674
  if instance.disk_template == constants.DT_FILE:
7675
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7676
    if target_node:
7677
      tgt = target_node
7678
    else:
7679
      tgt = instance.primary_node
7680
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7681
    if result.fail_msg:
7682
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7683
                    file_storage_dir, instance.primary_node, result.fail_msg)
7684
      all_result = False
7685

    
7686
  return all_result
7687

    
7688

    
7689
def _ComputeDiskSizePerVG(disk_template, disks):
7690
  """Compute disk size requirements in the volume group
7691

7692
  """
7693
  def _compute(disks, payload):
7694
    """Universal algorithm.
7695

7696
    """
7697
    vgs = {}
7698
    for disk in disks:
7699
      vgs[disk[constants.IDISK_VG]] = \
7700
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7701

    
7702
    return vgs
7703

    
7704
  # Required free disk space as a function of disk and swap space
7705
  req_size_dict = {
7706
    constants.DT_DISKLESS: {},
7707
    constants.DT_PLAIN: _compute(disks, 0),
7708
    # 128 MB are added for drbd metadata for each disk
7709
    constants.DT_DRBD8: _compute(disks, 128),
7710
    constants.DT_FILE: {},
7711
    constants.DT_SHARED_FILE: {},
7712
  }
7713

    
7714
  if disk_template not in req_size_dict:
7715
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7716
                                 " is unknown" %  disk_template)
7717

    
7718
  return req_size_dict[disk_template]
7719

    
7720

    
7721
def _ComputeDiskSize(disk_template, disks):
7722
  """Compute disk size requirements in the volume group
7723

7724
  """
7725
  # Required free disk space as a function of disk and swap space
7726
  req_size_dict = {
7727
    constants.DT_DISKLESS: None,
7728
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7729
    # 128 MB are added for drbd metadata for each disk
7730
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7731
    constants.DT_FILE: None,
7732
    constants.DT_SHARED_FILE: 0,
7733
    constants.DT_BLOCK: 0,
7734
  }
7735

    
7736
  if disk_template not in req_size_dict:
7737
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7738
                                 " is unknown" %  disk_template)
7739

    
7740
  return req_size_dict[disk_template]
7741

    
7742

    
7743
def _FilterVmNodes(lu, nodenames):
7744
  """Filters out non-vm_capable nodes from a list.
7745

7746
  @type lu: L{LogicalUnit}
7747
  @param lu: the logical unit for which we check
7748
  @type nodenames: list
7749
  @param nodenames: the list of nodes on which we should check
7750
  @rtype: list
7751
  @return: the list of vm-capable nodes
7752

7753
  """
7754
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7755
  return [name for name in nodenames if name not in vm_nodes]
7756

    
7757

    
7758
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7759
  """Hypervisor parameter validation.
7760

7761
  This function abstract the hypervisor parameter validation to be
7762
  used in both instance create and instance modify.
7763

7764
  @type lu: L{LogicalUnit}
7765
  @param lu: the logical unit for which we check
7766
  @type nodenames: list
7767
  @param nodenames: the list of nodes on which we should check
7768
  @type hvname: string
7769
  @param hvname: the name of the hypervisor we should use
7770
  @type hvparams: dict
7771
  @param hvparams: the parameters which we need to check
7772
  @raise errors.OpPrereqError: if the parameters are not valid
7773

7774
  """
7775
  nodenames = _FilterVmNodes(lu, nodenames)
7776
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7777
                                                  hvname,
7778
                                                  hvparams)
7779
  for node in nodenames:
7780
    info = hvinfo[node]
7781
    if info.offline:
7782
      continue
7783
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7784

    
7785

    
7786
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7787
  """OS parameters validation.
7788

7789
  @type lu: L{LogicalUnit}
7790
  @param lu: the logical unit for which we check
7791
  @type required: boolean
7792
  @param required: whether the validation should fail if the OS is not
7793
      found
7794
  @type nodenames: list
7795
  @param nodenames: the list of nodes on which we should check
7796
  @type osname: string
7797
  @param osname: the name of the hypervisor we should use
7798
  @type osparams: dict
7799
  @param osparams: the parameters which we need to check
7800
  @raise errors.OpPrereqError: if the parameters are not valid
7801

7802
  """
7803
  nodenames = _FilterVmNodes(lu, nodenames)
7804
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7805
                                   [constants.OS_VALIDATE_PARAMETERS],
7806
                                   osparams)
7807
  for node, nres in result.items():
7808
    # we don't check for offline cases since this should be run only
7809
    # against the master node and/or an instance's nodes
7810
    nres.Raise("OS Parameters validation failed on node %s" % node)
7811
    if not nres.payload:
7812
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7813
                 osname, node)
7814

    
7815

    
7816
class LUInstanceCreate(LogicalUnit):
7817
  """Create an instance.
7818

7819
  """
7820
  HPATH = "instance-add"
7821
  HTYPE = constants.HTYPE_INSTANCE
7822
  REQ_BGL = False
7823

    
7824
  def CheckArguments(self):
7825
    """Check arguments.
7826

7827
    """
7828
    # do not require name_check to ease forward/backward compatibility
7829
    # for tools
7830
    if self.op.no_install and self.op.start:
7831
      self.LogInfo("No-installation mode selected, disabling startup")
7832
      self.op.start = False
7833
    # validate/normalize the instance name
7834
    self.op.instance_name = \
7835
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7836

    
7837
    if self.op.ip_check and not self.op.name_check:
7838
      # TODO: make the ip check more flexible and not depend on the name check
7839
      raise errors.OpPrereqError("Cannot do IP address check without a name"
7840
                                 " check", errors.ECODE_INVAL)
7841

    
7842
    # check nics' parameter names
7843
    for nic in self.op.nics:
7844
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7845

    
7846
    # check disks. parameter names and consistent adopt/no-adopt strategy
7847
    has_adopt = has_no_adopt = False
7848
    for disk in self.op.disks:
7849
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7850
      if constants.IDISK_ADOPT in disk:
7851
        has_adopt = True
7852
      else:
7853
        has_no_adopt = True
7854
    if has_adopt and has_no_adopt:
7855
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7856
                                 errors.ECODE_INVAL)
7857
    if has_adopt:
7858
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7859
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7860
                                   " '%s' disk template" %
7861
                                   self.op.disk_template,
7862
                                   errors.ECODE_INVAL)
7863
      if self.op.iallocator is not None:
7864
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7865
                                   " iallocator script", errors.ECODE_INVAL)
7866
      if self.op.mode == constants.INSTANCE_IMPORT:
7867
        raise errors.OpPrereqError("Disk adoption not allowed for"
7868
                                   " instance import", errors.ECODE_INVAL)
7869
    else:
7870
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
7871
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7872
                                   " but no 'adopt' parameter given" %
7873
                                   self.op.disk_template,
7874
                                   errors.ECODE_INVAL)
7875

    
7876
    self.adopt_disks = has_adopt
7877

    
7878
    # instance name verification
7879
    if self.op.name_check:
7880
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7881
      self.op.instance_name = self.hostname1.name
7882
      # used in CheckPrereq for ip ping check
7883
      self.check_ip = self.hostname1.ip
7884
    else:
7885
      self.check_ip = None
7886

    
7887
    # file storage checks
7888
    if (self.op.file_driver and
7889
        not self.op.file_driver in constants.FILE_DRIVER):
7890
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7891
                                 self.op.file_driver, errors.ECODE_INVAL)
7892

    
7893
    if self.op.disk_template == constants.DT_FILE:
7894
      opcodes.RequireFileStorage()
7895
    elif self.op.disk_template == constants.DT_SHARED_FILE:
7896
      opcodes.RequireSharedFileStorage()
7897

    
7898
    ### Node/iallocator related checks
7899
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7900

    
7901
    if self.op.pnode is not None:
7902
      if self.op.disk_template in constants.DTS_INT_MIRROR:
7903
        if self.op.snode is None:
7904
          raise errors.OpPrereqError("The networked disk templates need"
7905
                                     " a mirror node", errors.ECODE_INVAL)
7906
      elif self.op.snode:
7907
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7908
                        " template")
7909
        self.op.snode = None
7910

    
7911
    self._cds = _GetClusterDomainSecret()
7912

    
7913
    if self.op.mode == constants.INSTANCE_IMPORT:
7914
      # On import force_variant must be True, because if we forced it at
7915
      # initial install, our only chance when importing it back is that it
7916
      # works again!
7917
      self.op.force_variant = True
7918

    
7919
      if self.op.no_install:
7920
        self.LogInfo("No-installation mode has no effect during import")
7921

    
7922
    elif self.op.mode == constants.INSTANCE_CREATE:
7923
      if self.op.os_type is None:
7924
        raise errors.OpPrereqError("No guest OS specified",
7925
                                   errors.ECODE_INVAL)
7926
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7927
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7928
                                   " installation" % self.op.os_type,
7929
                                   errors.ECODE_STATE)
7930
      if self.op.disk_template is None:
7931
        raise errors.OpPrereqError("No disk template specified",
7932
                                   errors.ECODE_INVAL)
7933

    
7934
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7935
      # Check handshake to ensure both clusters have the same domain secret
7936
      src_handshake = self.op.source_handshake
7937
      if not src_handshake:
7938
        raise errors.OpPrereqError("Missing source handshake",
7939
                                   errors.ECODE_INVAL)
7940

    
7941
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7942
                                                           src_handshake)
7943
      if errmsg:
7944
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7945
                                   errors.ECODE_INVAL)
7946

    
7947
      # Load and check source CA
7948
      self.source_x509_ca_pem = self.op.source_x509_ca
7949
      if not self.source_x509_ca_pem:
7950
        raise errors.OpPrereqError("Missing source X509 CA",
7951
                                   errors.ECODE_INVAL)
7952

    
7953
      try:
7954
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7955
                                                    self._cds)
7956
      except OpenSSL.crypto.Error, err:
7957
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7958
                                   (err, ), errors.ECODE_INVAL)
7959

    
7960
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7961
      if errcode is not None:
7962
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7963
                                   errors.ECODE_INVAL)
7964

    
7965
      self.source_x509_ca = cert
7966

    
7967
      src_instance_name = self.op.source_instance_name
7968
      if not src_instance_name:
7969
        raise errors.OpPrereqError("Missing source instance name",
7970
                                   errors.ECODE_INVAL)
7971

    
7972
      self.source_instance_name = \
7973
          netutils.GetHostname(name=src_instance_name).name
7974

    
7975
    else:
7976
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
7977
                                 self.op.mode, errors.ECODE_INVAL)
7978

    
7979
  def ExpandNames(self):
7980
    """ExpandNames for CreateInstance.
7981

7982
    Figure out the right locks for instance creation.
7983

7984
    """
7985
    self.needed_locks = {}
7986

    
7987
    instance_name = self.op.instance_name
7988
    # this is just a preventive check, but someone might still add this
7989
    # instance in the meantime, and creation will fail at lock-add time
7990
    if instance_name in self.cfg.GetInstanceList():
7991
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7992
                                 instance_name, errors.ECODE_EXISTS)
7993

    
7994
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7995

    
7996
    if self.op.iallocator:
7997
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7998
    else:
7999
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8000
      nodelist = [self.op.pnode]
8001
      if self.op.snode is not None:
8002
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8003
        nodelist.append(self.op.snode)
8004
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8005

    
8006
    # in case of import lock the source node too
8007
    if self.op.mode == constants.INSTANCE_IMPORT:
8008
      src_node = self.op.src_node
8009
      src_path = self.op.src_path
8010

    
8011
      if src_path is None:
8012
        self.op.src_path = src_path = self.op.instance_name
8013

    
8014
      if src_node is None:
8015
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8016
        self.op.src_node = None
8017
        if os.path.isabs(src_path):
8018
          raise errors.OpPrereqError("Importing an instance from an absolute"
8019
                                     " path requires a source node option",
8020
                                     errors.ECODE_INVAL)
8021
      else:
8022
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8023
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8024
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8025
        if not os.path.isabs(src_path):
8026
          self.op.src_path = src_path = \
8027
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8028

    
8029
  def _RunAllocator(self):
8030
    """Run the allocator based on input opcode.
8031

8032
    """
8033
    nics = [n.ToDict() for n in self.nics]
8034
    ial = IAllocator(self.cfg, self.rpc,
8035
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8036
                     name=self.op.instance_name,
8037
                     disk_template=self.op.disk_template,
8038
                     tags=self.op.tags,
8039
                     os=self.op.os_type,
8040
                     vcpus=self.be_full[constants.BE_VCPUS],
8041
                     memory=self.be_full[constants.BE_MEMORY],
8042
                     disks=self.disks,
8043
                     nics=nics,
8044
                     hypervisor=self.op.hypervisor,
8045
                     )
8046

    
8047
    ial.Run(self.op.iallocator)
8048

    
8049
    if not ial.success:
8050
      raise errors.OpPrereqError("Can't compute nodes using"
8051
                                 " iallocator '%s': %s" %
8052
                                 (self.op.iallocator, ial.info),
8053
                                 errors.ECODE_NORES)
8054
    if len(ial.result) != ial.required_nodes:
8055
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8056
                                 " of nodes (%s), required %s" %
8057
                                 (self.op.iallocator, len(ial.result),
8058
                                  ial.required_nodes), errors.ECODE_FAULT)
8059
    self.op.pnode = ial.result[0]
8060
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8061
                 self.op.instance_name, self.op.iallocator,
8062
                 utils.CommaJoin(ial.result))
8063
    if ial.required_nodes == 2:
8064
      self.op.snode = ial.result[1]
8065

    
8066
  def BuildHooksEnv(self):
8067
    """Build hooks env.
8068

8069
    This runs on master, primary and secondary nodes of the instance.
8070

8071
    """
8072
    env = {
8073
      "ADD_MODE": self.op.mode,
8074
      }
8075
    if self.op.mode == constants.INSTANCE_IMPORT:
8076
      env["SRC_NODE"] = self.op.src_node
8077
      env["SRC_PATH"] = self.op.src_path
8078
      env["SRC_IMAGES"] = self.src_images
8079

    
8080
    env.update(_BuildInstanceHookEnv(
8081
      name=self.op.instance_name,
8082
      primary_node=self.op.pnode,
8083
      secondary_nodes=self.secondaries,
8084
      status=self.op.start,
8085
      os_type=self.op.os_type,
8086
      memory=self.be_full[constants.BE_MEMORY],
8087
      vcpus=self.be_full[constants.BE_VCPUS],
8088
      nics=_NICListToTuple(self, self.nics),
8089
      disk_template=self.op.disk_template,
8090
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8091
             for d in self.disks],
8092
      bep=self.be_full,
8093
      hvp=self.hv_full,
8094
      hypervisor_name=self.op.hypervisor,
8095
      tags=self.op.tags,
8096
    ))
8097

    
8098
    return env
8099

    
8100
  def BuildHooksNodes(self):
8101
    """Build hooks nodes.
8102

8103
    """
8104
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8105
    return nl, nl
8106

    
8107
  def _ReadExportInfo(self):
8108
    """Reads the export information from disk.
8109

8110
    It will override the opcode source node and path with the actual
8111
    information, if these two were not specified before.
8112

8113
    @return: the export information
8114

8115
    """
8116
    assert self.op.mode == constants.INSTANCE_IMPORT
8117

    
8118
    src_node = self.op.src_node
8119
    src_path = self.op.src_path
8120

    
8121
    if src_node is None:
8122
      locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
8123
      exp_list = self.rpc.call_export_list(locked_nodes)
8124
      found = False
8125
      for node in exp_list:
8126
        if exp_list[node].fail_msg:
8127
          continue
8128
        if src_path in exp_list[node].payload:
8129
          found = True
8130
          self.op.src_node = src_node = node
8131
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8132
                                                       src_path)
8133
          break
8134
      if not found:
8135
        raise errors.OpPrereqError("No export found for relative path %s" %
8136
                                    src_path, errors.ECODE_INVAL)
8137

    
8138
    _CheckNodeOnline(self, src_node)
8139
    result = self.rpc.call_export_info(src_node, src_path)
8140
    result.Raise("No export or invalid export found in dir %s" % src_path)
8141

    
8142
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8143
    if not export_info.has_section(constants.INISECT_EXP):
8144
      raise errors.ProgrammerError("Corrupted export config",
8145
                                   errors.ECODE_ENVIRON)
8146

    
8147
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8148
    if (int(ei_version) != constants.EXPORT_VERSION):
8149
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8150
                                 (ei_version, constants.EXPORT_VERSION),
8151
                                 errors.ECODE_ENVIRON)
8152
    return export_info
8153

    
8154
  def _ReadExportParams(self, einfo):
8155
    """Use export parameters as defaults.
8156

8157
    In case the opcode doesn't specify (as in override) some instance
8158
    parameters, then try to use them from the export information, if
8159
    that declares them.
8160

8161
    """
8162
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8163

    
8164
    if self.op.disk_template is None:
8165
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8166
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8167
                                          "disk_template")
8168
      else:
8169
        raise errors.OpPrereqError("No disk template specified and the export"
8170
                                   " is missing the disk_template information",
8171
                                   errors.ECODE_INVAL)
8172

    
8173
    if not self.op.disks:
8174
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8175
        disks = []
8176
        # TODO: import the disk iv_name too
8177
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8178
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8179
          disks.append({constants.IDISK_SIZE: disk_sz})
8180
        self.op.disks = disks
8181
      else:
8182
        raise errors.OpPrereqError("No disk info specified and the export"
8183
                                   " is missing the disk information",
8184
                                   errors.ECODE_INVAL)
8185

    
8186
    if (not self.op.nics and
8187
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8188
      nics = []
8189
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8190
        ndict = {}
8191
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8192
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8193
          ndict[name] = v
8194
        nics.append(ndict)
8195
      self.op.nics = nics
8196

    
8197
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8198
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8199

    
8200
    if (self.op.hypervisor is None and
8201
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8202
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8203

    
8204
    if einfo.has_section(constants.INISECT_HYP):
8205
      # use the export parameters but do not override the ones
8206
      # specified by the user
8207
      for name, value in einfo.items(constants.INISECT_HYP):
8208
        if name not in self.op.hvparams:
8209
          self.op.hvparams[name] = value
8210

    
8211
    if einfo.has_section(constants.INISECT_BEP):
8212
      # use the parameters, without overriding
8213
      for name, value in einfo.items(constants.INISECT_BEP):
8214
        if name not in self.op.beparams:
8215
          self.op.beparams[name] = value
8216
    else:
8217
      # try to read the parameters old style, from the main section
8218
      for name in constants.BES_PARAMETERS:
8219
        if (name not in self.op.beparams and
8220
            einfo.has_option(constants.INISECT_INS, name)):
8221
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8222

    
8223
    if einfo.has_section(constants.INISECT_OSP):
8224
      # use the parameters, without overriding
8225
      for name, value in einfo.items(constants.INISECT_OSP):
8226
        if name not in self.op.osparams:
8227
          self.op.osparams[name] = value
8228

    
8229
  def _RevertToDefaults(self, cluster):
8230
    """Revert the instance parameters to the default values.
8231

8232
    """
8233
    # hvparams
8234
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8235
    for name in self.op.hvparams.keys():
8236
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8237
        del self.op.hvparams[name]
8238
    # beparams
8239
    be_defs = cluster.SimpleFillBE({})
8240
    for name in self.op.beparams.keys():
8241
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8242
        del self.op.beparams[name]
8243
    # nic params
8244
    nic_defs = cluster.SimpleFillNIC({})
8245
    for nic in self.op.nics:
8246
      for name in constants.NICS_PARAMETERS:
8247
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8248
          del nic[name]
8249
    # osparams
8250
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8251
    for name in self.op.osparams.keys():
8252
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8253
        del self.op.osparams[name]
8254

    
8255
  def _CalculateFileStorageDir(self):
8256
    """Calculate final instance file storage dir.
8257

8258
    """
8259
    # file storage dir calculation/check
8260
    self.instance_file_storage_dir = None
8261
    if self.op.disk_template in constants.DTS_FILEBASED:
8262
      # build the full file storage dir path
8263
      joinargs = []
8264

    
8265
      if self.op.disk_template == constants.DT_SHARED_FILE:
8266
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8267
      else:
8268
        get_fsd_fn = self.cfg.GetFileStorageDir
8269

    
8270
      cfg_storagedir = get_fsd_fn()
8271
      if not cfg_storagedir:
8272
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8273
      joinargs.append(cfg_storagedir)
8274

    
8275
      if self.op.file_storage_dir is not None:
8276
        joinargs.append(self.op.file_storage_dir)
8277

    
8278
      joinargs.append(self.op.instance_name)
8279

    
8280
      # pylint: disable-msg=W0142
8281
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8282

    
8283
  def CheckPrereq(self):
8284
    """Check prerequisites.
8285

8286
    """
8287
    self._CalculateFileStorageDir()
8288

    
8289
    if self.op.mode == constants.INSTANCE_IMPORT:
8290
      export_info = self._ReadExportInfo()
8291
      self._ReadExportParams(export_info)
8292

    
8293
    if (not self.cfg.GetVGName() and
8294
        self.op.disk_template not in constants.DTS_NOT_LVM):
8295
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8296
                                 " instances", errors.ECODE_STATE)
8297

    
8298
    if self.op.hypervisor is None:
8299
      self.op.hypervisor = self.cfg.GetHypervisorType()
8300

    
8301
    cluster = self.cfg.GetClusterInfo()
8302
    enabled_hvs = cluster.enabled_hypervisors
8303
    if self.op.hypervisor not in enabled_hvs:
8304
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8305
                                 " cluster (%s)" % (self.op.hypervisor,
8306
                                  ",".join(enabled_hvs)),
8307
                                 errors.ECODE_STATE)
8308

    
8309
    # Check tag validity
8310
    for tag in self.op.tags:
8311
      objects.TaggableObject.ValidateTag(tag)
8312

    
8313
    # check hypervisor parameter syntax (locally)
8314
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8315
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8316
                                      self.op.hvparams)
8317
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8318
    hv_type.CheckParameterSyntax(filled_hvp)
8319
    self.hv_full = filled_hvp
8320
    # check that we don't specify global parameters on an instance
8321
    _CheckGlobalHvParams(self.op.hvparams)
8322

    
8323
    # fill and remember the beparams dict
8324
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8325
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8326

    
8327
    # build os parameters
8328
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8329

    
8330
    # now that hvp/bep are in final format, let's reset to defaults,
8331
    # if told to do so
8332
    if self.op.identify_defaults:
8333
      self._RevertToDefaults(cluster)
8334

    
8335
    # NIC buildup
8336
    self.nics = []
8337
    for idx, nic in enumerate(self.op.nics):
8338
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8339
      nic_mode = nic_mode_req
8340
      if nic_mode is None:
8341
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8342

    
8343
      # in routed mode, for the first nic, the default ip is 'auto'
8344
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8345
        default_ip_mode = constants.VALUE_AUTO
8346
      else:
8347
        default_ip_mode = constants.VALUE_NONE
8348

    
8349
      # ip validity checks
8350
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8351
      if ip is None or ip.lower() == constants.VALUE_NONE:
8352
        nic_ip = None
8353
      elif ip.lower() == constants.VALUE_AUTO:
8354
        if not self.op.name_check:
8355
          raise errors.OpPrereqError("IP address set to auto but name checks"
8356
                                     " have been skipped",
8357
                                     errors.ECODE_INVAL)
8358
        nic_ip = self.hostname1.ip
8359
      else:
8360
        if not netutils.IPAddress.IsValid(ip):
8361
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8362
                                     errors.ECODE_INVAL)
8363
        nic_ip = ip
8364

    
8365
      # TODO: check the ip address for uniqueness
8366
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8367
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8368
                                   errors.ECODE_INVAL)
8369

    
8370
      # MAC address verification
8371
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8372
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8373
        mac = utils.NormalizeAndValidateMac(mac)
8374

    
8375
        try:
8376
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8377
        except errors.ReservationError:
8378
          raise errors.OpPrereqError("MAC address %s already in use"
8379
                                     " in cluster" % mac,
8380
                                     errors.ECODE_NOTUNIQUE)
8381

    
8382
      #  Build nic parameters
8383
      link = nic.get(constants.INIC_LINK, None)
8384
      nicparams = {}
8385
      if nic_mode_req:
8386
        nicparams[constants.NIC_MODE] = nic_mode_req
8387
      if link:
8388
        nicparams[constants.NIC_LINK] = link
8389

    
8390
      check_params = cluster.SimpleFillNIC(nicparams)
8391
      objects.NIC.CheckParameterSyntax(check_params)
8392
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8393

    
8394
    # disk checks/pre-build
8395
    default_vg = self.cfg.GetVGName()
8396
    self.disks = []
8397
    for disk in self.op.disks:
8398
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8399
      if mode not in constants.DISK_ACCESS_SET:
8400
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8401
                                   mode, errors.ECODE_INVAL)
8402
      size = disk.get(constants.IDISK_SIZE, None)
8403
      if size is None:
8404
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8405
      try:
8406
        size = int(size)
8407
      except (TypeError, ValueError):
8408
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8409
                                   errors.ECODE_INVAL)
8410

    
8411
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8412
      new_disk = {
8413
        constants.IDISK_SIZE: size,
8414
        constants.IDISK_MODE: mode,
8415
        constants.IDISK_VG: data_vg,
8416
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8417
        }
8418
      if constants.IDISK_ADOPT in disk:
8419
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8420
      self.disks.append(new_disk)
8421

    
8422
    if self.op.mode == constants.INSTANCE_IMPORT:
8423

    
8424
      # Check that the new instance doesn't have less disks than the export
8425
      instance_disks = len(self.disks)
8426
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8427
      if instance_disks < export_disks:
8428
        raise errors.OpPrereqError("Not enough disks to import."
8429
                                   " (instance: %d, export: %d)" %
8430
                                   (instance_disks, export_disks),
8431
                                   errors.ECODE_INVAL)
8432

    
8433
      disk_images = []
8434
      for idx in range(export_disks):
8435
        option = 'disk%d_dump' % idx
8436
        if export_info.has_option(constants.INISECT_INS, option):
8437
          # FIXME: are the old os-es, disk sizes, etc. useful?
8438
          export_name = export_info.get(constants.INISECT_INS, option)
8439
          image = utils.PathJoin(self.op.src_path, export_name)
8440
          disk_images.append(image)
8441
        else:
8442
          disk_images.append(False)
8443

    
8444
      self.src_images = disk_images
8445

    
8446
      old_name = export_info.get(constants.INISECT_INS, 'name')
8447
      try:
8448
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8449
      except (TypeError, ValueError), err:
8450
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8451
                                   " an integer: %s" % str(err),
8452
                                   errors.ECODE_STATE)
8453
      if self.op.instance_name == old_name:
8454
        for idx, nic in enumerate(self.nics):
8455
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8456
            nic_mac_ini = 'nic%d_mac' % idx
8457
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8458

    
8459
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8460

    
8461
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8462
    if self.op.ip_check:
8463
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8464
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8465
                                   (self.check_ip, self.op.instance_name),
8466
                                   errors.ECODE_NOTUNIQUE)
8467

    
8468
    #### mac address generation
8469
    # By generating here the mac address both the allocator and the hooks get
8470
    # the real final mac address rather than the 'auto' or 'generate' value.
8471
    # There is a race condition between the generation and the instance object
8472
    # creation, which means that we know the mac is valid now, but we're not
8473
    # sure it will be when we actually add the instance. If things go bad
8474
    # adding the instance will abort because of a duplicate mac, and the
8475
    # creation job will fail.
8476
    for nic in self.nics:
8477
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8478
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8479

    
8480
    #### allocator run
8481

    
8482
    if self.op.iallocator is not None:
8483
      self._RunAllocator()
8484

    
8485
    #### node related checks
8486

    
8487
    # check primary node
8488
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8489
    assert self.pnode is not None, \
8490
      "Cannot retrieve locked node %s" % self.op.pnode
8491
    if pnode.offline:
8492
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8493
                                 pnode.name, errors.ECODE_STATE)
8494
    if pnode.drained:
8495
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8496
                                 pnode.name, errors.ECODE_STATE)
8497
    if not pnode.vm_capable:
8498
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8499
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8500

    
8501
    self.secondaries = []
8502

    
8503
    # mirror node verification
8504
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8505
      if self.op.snode == pnode.name:
8506
        raise errors.OpPrereqError("The secondary node cannot be the"
8507
                                   " primary node", errors.ECODE_INVAL)
8508
      _CheckNodeOnline(self, self.op.snode)
8509
      _CheckNodeNotDrained(self, self.op.snode)
8510
      _CheckNodeVmCapable(self, self.op.snode)
8511
      self.secondaries.append(self.op.snode)
8512

    
8513
    nodenames = [pnode.name] + self.secondaries
8514

    
8515
    if not self.adopt_disks:
8516
      # Check lv size requirements, if not adopting
8517
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8518
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8519

    
8520
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8521
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8522
                                disk[constants.IDISK_ADOPT])
8523
                     for disk in self.disks])
8524
      if len(all_lvs) != len(self.disks):
8525
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8526
                                   errors.ECODE_INVAL)
8527
      for lv_name in all_lvs:
8528
        try:
8529
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8530
          # to ReserveLV uses the same syntax
8531
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8532
        except errors.ReservationError:
8533
          raise errors.OpPrereqError("LV named %s used by another instance" %
8534
                                     lv_name, errors.ECODE_NOTUNIQUE)
8535

    
8536
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8537
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8538

    
8539
      node_lvs = self.rpc.call_lv_list([pnode.name],
8540
                                       vg_names.payload.keys())[pnode.name]
8541
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8542
      node_lvs = node_lvs.payload
8543

    
8544
      delta = all_lvs.difference(node_lvs.keys())
8545
      if delta:
8546
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8547
                                   utils.CommaJoin(delta),
8548
                                   errors.ECODE_INVAL)
8549
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8550
      if online_lvs:
8551
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8552
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8553
                                   errors.ECODE_STATE)
8554
      # update the size of disk based on what is found
8555
      for dsk in self.disks:
8556
        dsk[constants.IDISK_SIZE] = \
8557
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8558
                                        dsk[constants.IDISK_ADOPT])][0]))
8559

    
8560
    elif self.op.disk_template == constants.DT_BLOCK:
8561
      # Normalize and de-duplicate device paths
8562
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8563
                       for disk in self.disks])
8564
      if len(all_disks) != len(self.disks):
8565
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8566
                                   errors.ECODE_INVAL)
8567
      baddisks = [d for d in all_disks
8568
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8569
      if baddisks:
8570
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8571
                                   " cannot be adopted" %
8572
                                   (", ".join(baddisks),
8573
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8574
                                   errors.ECODE_INVAL)
8575

    
8576
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8577
                                            list(all_disks))[pnode.name]
8578
      node_disks.Raise("Cannot get block device information from node %s" %
8579
                       pnode.name)
8580
      node_disks = node_disks.payload
8581
      delta = all_disks.difference(node_disks.keys())
8582
      if delta:
8583
        raise errors.OpPrereqError("Missing block device(s): %s" %
8584
                                   utils.CommaJoin(delta),
8585
                                   errors.ECODE_INVAL)
8586
      for dsk in self.disks:
8587
        dsk[constants.IDISK_SIZE] = \
8588
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8589

    
8590
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8591

    
8592
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8593
    # check OS parameters (remotely)
8594
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8595

    
8596
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8597

    
8598
    # memory check on primary node
8599
    if self.op.start:
8600
      _CheckNodeFreeMemory(self, self.pnode.name,
8601
                           "creating instance %s" % self.op.instance_name,
8602
                           self.be_full[constants.BE_MEMORY],
8603
                           self.op.hypervisor)
8604

    
8605
    self.dry_run_result = list(nodenames)
8606

    
8607
  def Exec(self, feedback_fn):
8608
    """Create and add the instance to the cluster.
8609

8610
    """
8611
    instance = self.op.instance_name
8612
    pnode_name = self.pnode.name
8613

    
8614
    ht_kind = self.op.hypervisor
8615
    if ht_kind in constants.HTS_REQ_PORT:
8616
      network_port = self.cfg.AllocatePort()
8617
    else:
8618
      network_port = None
8619

    
8620
    disks = _GenerateDiskTemplate(self,
8621
                                  self.op.disk_template,
8622
                                  instance, pnode_name,
8623
                                  self.secondaries,
8624
                                  self.disks,
8625
                                  self.instance_file_storage_dir,
8626
                                  self.op.file_driver,
8627
                                  0,
8628
                                  feedback_fn)
8629

    
8630
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8631
                            primary_node=pnode_name,
8632
                            nics=self.nics, disks=disks,
8633
                            disk_template=self.op.disk_template,
8634
                            admin_up=False,
8635
                            network_port=network_port,
8636
                            beparams=self.op.beparams,
8637
                            hvparams=self.op.hvparams,
8638
                            hypervisor=self.op.hypervisor,
8639
                            osparams=self.op.osparams,
8640
                            )
8641

    
8642
    if self.op.tags:
8643
      for tag in self.op.tags:
8644
        iobj.AddTag(tag)
8645

    
8646
    if self.adopt_disks:
8647
      if self.op.disk_template == constants.DT_PLAIN:
8648
        # rename LVs to the newly-generated names; we need to construct
8649
        # 'fake' LV disks with the old data, plus the new unique_id
8650
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8651
        rename_to = []
8652
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8653
          rename_to.append(t_dsk.logical_id)
8654
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8655
          self.cfg.SetDiskID(t_dsk, pnode_name)
8656
        result = self.rpc.call_blockdev_rename(pnode_name,
8657
                                               zip(tmp_disks, rename_to))
8658
        result.Raise("Failed to rename adoped LVs")
8659
    else:
8660
      feedback_fn("* creating instance disks...")
8661
      try:
8662
        _CreateDisks(self, iobj)
8663
      except errors.OpExecError:
8664
        self.LogWarning("Device creation failed, reverting...")
8665
        try:
8666
          _RemoveDisks(self, iobj)
8667
        finally:
8668
          self.cfg.ReleaseDRBDMinors(instance)
8669
          raise
8670

    
8671
    feedback_fn("adding instance %s to cluster config" % instance)
8672

    
8673
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8674

    
8675
    # Declare that we don't want to remove the instance lock anymore, as we've
8676
    # added the instance to the config
8677
    del self.remove_locks[locking.LEVEL_INSTANCE]
8678

    
8679
    if self.op.mode == constants.INSTANCE_IMPORT:
8680
      # Release unused nodes
8681
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8682
    else:
8683
      # Release all nodes
8684
      _ReleaseLocks(self, locking.LEVEL_NODE)
8685

    
8686
    disk_abort = False
8687
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8688
      feedback_fn("* wiping instance disks...")
8689
      try:
8690
        _WipeDisks(self, iobj)
8691
      except errors.OpExecError, err:
8692
        logging.exception("Wiping disks failed")
8693
        self.LogWarning("Wiping instance disks failed (%s)", err)
8694
        disk_abort = True
8695

    
8696
    if disk_abort:
8697
      # Something is already wrong with the disks, don't do anything else
8698
      pass
8699
    elif self.op.wait_for_sync:
8700
      disk_abort = not _WaitForSync(self, iobj)
8701
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8702
      # make sure the disks are not degraded (still sync-ing is ok)
8703
      time.sleep(15)
8704
      feedback_fn("* checking mirrors status")
8705
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8706
    else:
8707
      disk_abort = False
8708

    
8709
    if disk_abort:
8710
      _RemoveDisks(self, iobj)
8711
      self.cfg.RemoveInstance(iobj.name)
8712
      # Make sure the instance lock gets removed
8713
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8714
      raise errors.OpExecError("There are some degraded disks for"
8715
                               " this instance")
8716

    
8717
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8718
      if self.op.mode == constants.INSTANCE_CREATE:
8719
        if not self.op.no_install:
8720
          feedback_fn("* running the instance OS create scripts...")
8721
          # FIXME: pass debug option from opcode to backend
8722
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8723
                                                 self.op.debug_level)
8724
          result.Raise("Could not add os for instance %s"
8725
                       " on node %s" % (instance, pnode_name))
8726

    
8727
      elif self.op.mode == constants.INSTANCE_IMPORT:
8728
        feedback_fn("* running the instance OS import scripts...")
8729

    
8730
        transfers = []
8731

    
8732
        for idx, image in enumerate(self.src_images):
8733
          if not image:
8734
            continue
8735

    
8736
          # FIXME: pass debug option from opcode to backend
8737
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8738
                                             constants.IEIO_FILE, (image, ),
8739
                                             constants.IEIO_SCRIPT,
8740
                                             (iobj.disks[idx], idx),
8741
                                             None)
8742
          transfers.append(dt)
8743

    
8744
        import_result = \
8745
          masterd.instance.TransferInstanceData(self, feedback_fn,
8746
                                                self.op.src_node, pnode_name,
8747
                                                self.pnode.secondary_ip,
8748
                                                iobj, transfers)
8749
        if not compat.all(import_result):
8750
          self.LogWarning("Some disks for instance %s on node %s were not"
8751
                          " imported successfully" % (instance, pnode_name))
8752

    
8753
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8754
        feedback_fn("* preparing remote import...")
8755
        # The source cluster will stop the instance before attempting to make a
8756
        # connection. In some cases stopping an instance can take a long time,
8757
        # hence the shutdown timeout is added to the connection timeout.
8758
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8759
                           self.op.source_shutdown_timeout)
8760
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8761

    
8762
        assert iobj.primary_node == self.pnode.name
8763
        disk_results = \
8764
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8765
                                        self.source_x509_ca,
8766
                                        self._cds, timeouts)
8767
        if not compat.all(disk_results):
8768
          # TODO: Should the instance still be started, even if some disks
8769
          # failed to import (valid for local imports, too)?
8770
          self.LogWarning("Some disks for instance %s on node %s were not"
8771
                          " imported successfully" % (instance, pnode_name))
8772

    
8773
        # Run rename script on newly imported instance
8774
        assert iobj.name == instance
8775
        feedback_fn("Running rename script for %s" % instance)
8776
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8777
                                                   self.source_instance_name,
8778
                                                   self.op.debug_level)
8779
        if result.fail_msg:
8780
          self.LogWarning("Failed to run rename script for %s on node"
8781
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8782

    
8783
      else:
8784
        # also checked in the prereq part
8785
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8786
                                     % self.op.mode)
8787

    
8788
    if self.op.start:
8789
      iobj.admin_up = True
8790
      self.cfg.Update(iobj, feedback_fn)
8791
      logging.info("Starting instance %s on node %s", instance, pnode_name)
8792
      feedback_fn("* starting instance...")
8793
      result = self.rpc.call_instance_start(pnode_name, iobj,
8794
                                            None, None, False)
8795
      result.Raise("Could not start instance")
8796

    
8797
    return list(iobj.all_nodes)
8798

    
8799

    
8800
class LUInstanceConsole(NoHooksLU):
8801
  """Connect to an instance's console.
8802

8803
  This is somewhat special in that it returns the command line that
8804
  you need to run on the master node in order to connect to the
8805
  console.
8806

8807
  """
8808
  REQ_BGL = False
8809

    
8810
  def ExpandNames(self):
8811
    self._ExpandAndLockInstance()
8812

    
8813
  def CheckPrereq(self):
8814
    """Check prerequisites.
8815

8816
    This checks that the instance is in the cluster.
8817

8818
    """
8819
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8820
    assert self.instance is not None, \
8821
      "Cannot retrieve locked instance %s" % self.op.instance_name
8822
    _CheckNodeOnline(self, self.instance.primary_node)
8823

    
8824
  def Exec(self, feedback_fn):
8825
    """Connect to the console of an instance
8826

8827
    """
8828
    instance = self.instance
8829
    node = instance.primary_node
8830

    
8831
    node_insts = self.rpc.call_instance_list([node],
8832
                                             [instance.hypervisor])[node]
8833
    node_insts.Raise("Can't get node information from %s" % node)
8834

    
8835
    if instance.name not in node_insts.payload:
8836
      if instance.admin_up:
8837
        state = constants.INSTST_ERRORDOWN
8838
      else:
8839
        state = constants.INSTST_ADMINDOWN
8840
      raise errors.OpExecError("Instance %s is not running (state %s)" %
8841
                               (instance.name, state))
8842

    
8843
    logging.debug("Connecting to console of %s on %s", instance.name, node)
8844

    
8845
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8846

    
8847

    
8848
def _GetInstanceConsole(cluster, instance):
8849
  """Returns console information for an instance.
8850

8851
  @type cluster: L{objects.Cluster}
8852
  @type instance: L{objects.Instance}
8853
  @rtype: dict
8854

8855
  """
8856
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
8857
  # beparams and hvparams are passed separately, to avoid editing the
8858
  # instance and then saving the defaults in the instance itself.
8859
  hvparams = cluster.FillHV(instance)
8860
  beparams = cluster.FillBE(instance)
8861
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8862

    
8863
  assert console.instance == instance.name
8864
  assert console.Validate()
8865

    
8866
  return console.ToDict()
8867

    
8868

    
8869
class LUInstanceReplaceDisks(LogicalUnit):
8870
  """Replace the disks of an instance.
8871

8872
  """
8873
  HPATH = "mirrors-replace"
8874
  HTYPE = constants.HTYPE_INSTANCE
8875
  REQ_BGL = False
8876

    
8877
  def CheckArguments(self):
8878
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8879
                                  self.op.iallocator)
8880

    
8881
  def ExpandNames(self):
8882
    self._ExpandAndLockInstance()
8883

    
8884
    assert locking.LEVEL_NODE not in self.needed_locks
8885
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
8886

    
8887
    assert self.op.iallocator is None or self.op.remote_node is None, \
8888
      "Conflicting options"
8889

    
8890
    if self.op.remote_node is not None:
8891
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8892

    
8893
      # Warning: do not remove the locking of the new secondary here
8894
      # unless DRBD8.AddChildren is changed to work in parallel;
8895
      # currently it doesn't since parallel invocations of
8896
      # FindUnusedMinor will conflict
8897
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
8898
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8899
    else:
8900
      self.needed_locks[locking.LEVEL_NODE] = []
8901
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8902

    
8903
      if self.op.iallocator is not None:
8904
        # iallocator will select a new node in the same group
8905
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
8906

    
8907
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8908
                                   self.op.iallocator, self.op.remote_node,
8909
                                   self.op.disks, False, self.op.early_release)
8910

    
8911
    self.tasklets = [self.replacer]
8912

    
8913
  def DeclareLocks(self, level):
8914
    if level == locking.LEVEL_NODEGROUP:
8915
      assert self.op.remote_node is None
8916
      assert self.op.iallocator is not None
8917
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
8918

    
8919
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
8920
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
8921
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8922

    
8923
    elif level == locking.LEVEL_NODE:
8924
      if self.op.iallocator is not None:
8925
        assert self.op.remote_node is None
8926
        assert not self.needed_locks[locking.LEVEL_NODE]
8927

    
8928
        # Lock member nodes of all locked groups
8929
        self.needed_locks[locking.LEVEL_NODE] = [node_name
8930
          for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
8931
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
8932
      else:
8933
        self._LockInstancesNodes()
8934

    
8935
  def BuildHooksEnv(self):
8936
    """Build hooks env.
8937

8938
    This runs on the master, the primary and all the secondaries.
8939

8940
    """
8941
    instance = self.replacer.instance
8942
    env = {
8943
      "MODE": self.op.mode,
8944
      "NEW_SECONDARY": self.op.remote_node,
8945
      "OLD_SECONDARY": instance.secondary_nodes[0],
8946
      }
8947
    env.update(_BuildInstanceHookEnvByObject(self, instance))
8948
    return env
8949

    
8950
  def BuildHooksNodes(self):
8951
    """Build hooks nodes.
8952

8953
    """
8954
    instance = self.replacer.instance
8955
    nl = [
8956
      self.cfg.GetMasterNode(),
8957
      instance.primary_node,
8958
      ]
8959
    if self.op.remote_node is not None:
8960
      nl.append(self.op.remote_node)
8961
    return nl, nl
8962

    
8963
  def CheckPrereq(self):
8964
    """Check prerequisites.
8965

8966
    """
8967
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
8968
            self.op.iallocator is None)
8969

    
8970
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
8971
    if owned_groups:
8972
      groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8973
      if owned_groups != groups:
8974
        raise errors.OpExecError("Node groups used by instance '%s' changed"
8975
                                 " since lock was acquired, current list is %r,"
8976
                                 " used to be '%s'" %
8977
                                 (self.op.instance_name,
8978
                                  utils.CommaJoin(groups),
8979
                                  utils.CommaJoin(owned_groups)))
8980

    
8981
    return LogicalUnit.CheckPrereq(self)
8982

    
8983

    
8984
class TLReplaceDisks(Tasklet):
8985
  """Replaces disks for an instance.
8986

8987
  Note: Locking is not within the scope of this class.
8988

8989
  """
8990
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8991
               disks, delay_iallocator, early_release):
8992
    """Initializes this class.
8993

8994
    """
8995
    Tasklet.__init__(self, lu)
8996

    
8997
    # Parameters
8998
    self.instance_name = instance_name
8999
    self.mode = mode
9000
    self.iallocator_name = iallocator_name
9001
    self.remote_node = remote_node
9002
    self.disks = disks
9003
    self.delay_iallocator = delay_iallocator
9004
    self.early_release = early_release
9005

    
9006
    # Runtime data
9007
    self.instance = None
9008
    self.new_node = None
9009
    self.target_node = None
9010
    self.other_node = None
9011
    self.remote_node_info = None
9012
    self.node_secondary_ip = None
9013

    
9014
  @staticmethod
9015
  def CheckArguments(mode, remote_node, iallocator):
9016
    """Helper function for users of this class.
9017

9018
    """
9019
    # check for valid parameter combination
9020
    if mode == constants.REPLACE_DISK_CHG:
9021
      if remote_node is None and iallocator is None:
9022
        raise errors.OpPrereqError("When changing the secondary either an"
9023
                                   " iallocator script must be used or the"
9024
                                   " new node given", errors.ECODE_INVAL)
9025

    
9026
      if remote_node is not None and iallocator is not None:
9027
        raise errors.OpPrereqError("Give either the iallocator or the new"
9028
                                   " secondary, not both", errors.ECODE_INVAL)
9029

    
9030
    elif remote_node is not None or iallocator is not None:
9031
      # Not replacing the secondary
9032
      raise errors.OpPrereqError("The iallocator and new node options can"
9033
                                 " only be used when changing the"
9034
                                 " secondary node", errors.ECODE_INVAL)
9035

    
9036
  @staticmethod
9037
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9038
    """Compute a new secondary node using an IAllocator.
9039

9040
    """
9041
    ial = IAllocator(lu.cfg, lu.rpc,
9042
                     mode=constants.IALLOCATOR_MODE_RELOC,
9043
                     name=instance_name,
9044
                     relocate_from=relocate_from)
9045

    
9046
    ial.Run(iallocator_name)
9047

    
9048
    if not ial.success:
9049
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9050
                                 " %s" % (iallocator_name, ial.info),
9051
                                 errors.ECODE_NORES)
9052

    
9053
    if len(ial.result) != ial.required_nodes:
9054
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9055
                                 " of nodes (%s), required %s" %
9056
                                 (iallocator_name,
9057
                                  len(ial.result), ial.required_nodes),
9058
                                 errors.ECODE_FAULT)
9059

    
9060
    remote_node_name = ial.result[0]
9061

    
9062
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9063
               instance_name, remote_node_name)
9064

    
9065
    return remote_node_name
9066

    
9067
  def _FindFaultyDisks(self, node_name):
9068
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9069
                                    node_name, True)
9070

    
9071
  def _CheckDisksActivated(self, instance):
9072
    """Checks if the instance disks are activated.
9073

9074
    @param instance: The instance to check disks
9075
    @return: True if they are activated, False otherwise
9076

9077
    """
9078
    nodes = instance.all_nodes
9079

    
9080
    for idx, dev in enumerate(instance.disks):
9081
      for node in nodes:
9082
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9083
        self.cfg.SetDiskID(dev, node)
9084

    
9085
        result = self.rpc.call_blockdev_find(node, dev)
9086

    
9087
        if result.offline:
9088
          continue
9089
        elif result.fail_msg or not result.payload:
9090
          return False
9091

    
9092
    return True
9093

    
9094
  def CheckPrereq(self):
9095
    """Check prerequisites.
9096

9097
    This checks that the instance is in the cluster.
9098

9099
    """
9100
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9101
    assert instance is not None, \
9102
      "Cannot retrieve locked instance %s" % self.instance_name
9103

    
9104
    if instance.disk_template != constants.DT_DRBD8:
9105
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9106
                                 " instances", errors.ECODE_INVAL)
9107

    
9108
    if len(instance.secondary_nodes) != 1:
9109
      raise errors.OpPrereqError("The instance has a strange layout,"
9110
                                 " expected one secondary but found %d" %
9111
                                 len(instance.secondary_nodes),
9112
                                 errors.ECODE_FAULT)
9113

    
9114
    if not self.delay_iallocator:
9115
      self._CheckPrereq2()
9116

    
9117
  def _CheckPrereq2(self):
9118
    """Check prerequisites, second part.
9119

9120
    This function should always be part of CheckPrereq. It was separated and is
9121
    now called from Exec because during node evacuation iallocator was only
9122
    called with an unmodified cluster model, not taking planned changes into
9123
    account.
9124

9125
    """
9126
    instance = self.instance
9127
    secondary_node = instance.secondary_nodes[0]
9128

    
9129
    if self.iallocator_name is None:
9130
      remote_node = self.remote_node
9131
    else:
9132
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9133
                                       instance.name, instance.secondary_nodes)
9134

    
9135
    if remote_node is None:
9136
      self.remote_node_info = None
9137
    else:
9138
      assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
9139
             "Remote node '%s' is not locked" % remote_node
9140

    
9141
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9142
      assert self.remote_node_info is not None, \
9143
        "Cannot retrieve locked node %s" % remote_node
9144

    
9145
    if remote_node == self.instance.primary_node:
9146
      raise errors.OpPrereqError("The specified node is the primary node of"
9147
                                 " the instance", errors.ECODE_INVAL)
9148

    
9149
    if remote_node == secondary_node:
9150
      raise errors.OpPrereqError("The specified node is already the"
9151
                                 " secondary node of the instance",
9152
                                 errors.ECODE_INVAL)
9153

    
9154
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9155
                                    constants.REPLACE_DISK_CHG):
9156
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9157
                                 errors.ECODE_INVAL)
9158

    
9159
    if self.mode == constants.REPLACE_DISK_AUTO:
9160
      if not self._CheckDisksActivated(instance):
9161
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9162
                                   " first" % self.instance_name,
9163
                                   errors.ECODE_STATE)
9164
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9165
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9166

    
9167
      if faulty_primary and faulty_secondary:
9168
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9169
                                   " one node and can not be repaired"
9170
                                   " automatically" % self.instance_name,
9171
                                   errors.ECODE_STATE)
9172

    
9173
      if faulty_primary:
9174
        self.disks = faulty_primary
9175
        self.target_node = instance.primary_node
9176
        self.other_node = secondary_node
9177
        check_nodes = [self.target_node, self.other_node]
9178
      elif faulty_secondary:
9179
        self.disks = faulty_secondary
9180
        self.target_node = secondary_node
9181
        self.other_node = instance.primary_node
9182
        check_nodes = [self.target_node, self.other_node]
9183
      else:
9184
        self.disks = []
9185
        check_nodes = []
9186

    
9187
    else:
9188
      # Non-automatic modes
9189
      if self.mode == constants.REPLACE_DISK_PRI:
9190
        self.target_node = instance.primary_node
9191
        self.other_node = secondary_node
9192
        check_nodes = [self.target_node, self.other_node]
9193

    
9194
      elif self.mode == constants.REPLACE_DISK_SEC:
9195
        self.target_node = secondary_node
9196
        self.other_node = instance.primary_node
9197
        check_nodes = [self.target_node, self.other_node]
9198

    
9199
      elif self.mode == constants.REPLACE_DISK_CHG:
9200
        self.new_node = remote_node
9201
        self.other_node = instance.primary_node
9202
        self.target_node = secondary_node
9203
        check_nodes = [self.new_node, self.other_node]
9204

    
9205
        _CheckNodeNotDrained(self.lu, remote_node)
9206
        _CheckNodeVmCapable(self.lu, remote_node)
9207

    
9208
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9209
        assert old_node_info is not None
9210
        if old_node_info.offline and not self.early_release:
9211
          # doesn't make sense to delay the release
9212
          self.early_release = True
9213
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9214
                          " early-release mode", secondary_node)
9215

    
9216
      else:
9217
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9218
                                     self.mode)
9219

    
9220
      # If not specified all disks should be replaced
9221
      if not self.disks:
9222
        self.disks = range(len(self.instance.disks))
9223

    
9224
    for node in check_nodes:
9225
      _CheckNodeOnline(self.lu, node)
9226

    
9227
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9228
                                                          self.other_node,
9229
                                                          self.target_node]
9230
                              if node_name is not None)
9231

    
9232
    # Release unneeded node locks
9233
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9234

    
9235
    # Release any owned node group
9236
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9237
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9238

    
9239
    # Check whether disks are valid
9240
    for disk_idx in self.disks:
9241
      instance.FindDisk(disk_idx)
9242

    
9243
    # Get secondary node IP addresses
9244
    self.node_secondary_ip = \
9245
      dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
9246
           for node_name in touched_nodes)
9247

    
9248
  def Exec(self, feedback_fn):
9249
    """Execute disk replacement.
9250

9251
    This dispatches the disk replacement to the appropriate handler.
9252

9253
    """
9254
    if self.delay_iallocator:
9255
      self._CheckPrereq2()
9256

    
9257
    if __debug__:
9258
      # Verify owned locks before starting operation
9259
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9260
      assert set(owned_locks) == set(self.node_secondary_ip), \
9261
          ("Incorrect node locks, owning %s, expected %s" %
9262
           (owned_locks, self.node_secondary_ip.keys()))
9263

    
9264
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9265
      assert list(owned_locks) == [self.instance_name], \
9266
          "Instance '%s' not locked" % self.instance_name
9267

    
9268
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9269
          "Should not own any node group lock at this point"
9270

    
9271
    if not self.disks:
9272
      feedback_fn("No disks need replacement")
9273
      return
9274

    
9275
    feedback_fn("Replacing disk(s) %s for %s" %
9276
                (utils.CommaJoin(self.disks), self.instance.name))
9277

    
9278
    activate_disks = (not self.instance.admin_up)
9279

    
9280
    # Activate the instance disks if we're replacing them on a down instance
9281
    if activate_disks:
9282
      _StartInstanceDisks(self.lu, self.instance, True)
9283

    
9284
    try:
9285
      # Should we replace the secondary node?
9286
      if self.new_node is not None:
9287
        fn = self._ExecDrbd8Secondary
9288
      else:
9289
        fn = self._ExecDrbd8DiskOnly
9290

    
9291
      result = fn(feedback_fn)
9292
    finally:
9293
      # Deactivate the instance disks if we're replacing them on a
9294
      # down instance
9295
      if activate_disks:
9296
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9297

    
9298
    if __debug__:
9299
      # Verify owned locks
9300
      owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9301
      nodes = frozenset(self.node_secondary_ip)
9302
      assert ((self.early_release and not owned_locks) or
9303
              (not self.early_release and not (set(owned_locks) - nodes))), \
9304
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9305
         " nodes=%r" % (self.early_release, owned_locks, nodes))
9306

    
9307
    return result
9308

    
9309
  def _CheckVolumeGroup(self, nodes):
9310
    self.lu.LogInfo("Checking volume groups")
9311

    
9312
    vgname = self.cfg.GetVGName()
9313

    
9314
    # Make sure volume group exists on all involved nodes
9315
    results = self.rpc.call_vg_list(nodes)
9316
    if not results:
9317
      raise errors.OpExecError("Can't list volume groups on the nodes")
9318

    
9319
    for node in nodes:
9320
      res = results[node]
9321
      res.Raise("Error checking node %s" % node)
9322
      if vgname not in res.payload:
9323
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9324
                                 (vgname, node))
9325

    
9326
  def _CheckDisksExistence(self, nodes):
9327
    # Check disk existence
9328
    for idx, dev in enumerate(self.instance.disks):
9329
      if idx not in self.disks:
9330
        continue
9331

    
9332
      for node in nodes:
9333
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9334
        self.cfg.SetDiskID(dev, node)
9335

    
9336
        result = self.rpc.call_blockdev_find(node, dev)
9337

    
9338
        msg = result.fail_msg
9339
        if msg or not result.payload:
9340
          if not msg:
9341
            msg = "disk not found"
9342
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9343
                                   (idx, node, msg))
9344

    
9345
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9346
    for idx, dev in enumerate(self.instance.disks):
9347
      if idx not in self.disks:
9348
        continue
9349

    
9350
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9351
                      (idx, node_name))
9352

    
9353
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9354
                                   ldisk=ldisk):
9355
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9356
                                 " replace disks for instance %s" %
9357
                                 (node_name, self.instance.name))
9358

    
9359
  def _CreateNewStorage(self, node_name):
9360
    """Create new storage on the primary or secondary node.
9361

9362
    This is only used for same-node replaces, not for changing the
9363
    secondary node, hence we don't want to modify the existing disk.
9364

9365
    """
9366
    iv_names = {}
9367

    
9368
    for idx, dev in enumerate(self.instance.disks):
9369
      if idx not in self.disks:
9370
        continue
9371

    
9372
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9373

    
9374
      self.cfg.SetDiskID(dev, node_name)
9375

    
9376
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9377
      names = _GenerateUniqueNames(self.lu, lv_names)
9378

    
9379
      vg_data = dev.children[0].logical_id[0]
9380
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9381
                             logical_id=(vg_data, names[0]))
9382
      vg_meta = dev.children[1].logical_id[0]
9383
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9384
                             logical_id=(vg_meta, names[1]))
9385

    
9386
      new_lvs = [lv_data, lv_meta]
9387
      old_lvs = [child.Copy() for child in dev.children]
9388
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9389

    
9390
      # we pass force_create=True to force the LVM creation
9391
      for new_lv in new_lvs:
9392
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9393
                        _GetInstanceInfoText(self.instance), False)
9394

    
9395
    return iv_names
9396

    
9397
  def _CheckDevices(self, node_name, iv_names):
9398
    for name, (dev, _, _) in iv_names.iteritems():
9399
      self.cfg.SetDiskID(dev, node_name)
9400

    
9401
      result = self.rpc.call_blockdev_find(node_name, dev)
9402

    
9403
      msg = result.fail_msg
9404
      if msg or not result.payload:
9405
        if not msg:
9406
          msg = "disk not found"
9407
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9408
                                 (name, msg))
9409

    
9410
      if result.payload.is_degraded:
9411
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9412

    
9413
  def _RemoveOldStorage(self, node_name, iv_names):
9414
    for name, (_, old_lvs, _) in iv_names.iteritems():
9415
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9416

    
9417
      for lv in old_lvs:
9418
        self.cfg.SetDiskID(lv, node_name)
9419

    
9420
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9421
        if msg:
9422
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9423
                             hint="remove unused LVs manually")
9424

    
9425
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
9426
    """Replace a disk on the primary or secondary for DRBD 8.
9427

9428
    The algorithm for replace is quite complicated:
9429

9430
      1. for each disk to be replaced:
9431

9432
        1. create new LVs on the target node with unique names
9433
        1. detach old LVs from the drbd device
9434
        1. rename old LVs to name_replaced.<time_t>
9435
        1. rename new LVs to old LVs
9436
        1. attach the new LVs (with the old names now) to the drbd device
9437

9438
      1. wait for sync across all devices
9439

9440
      1. for each modified disk:
9441

9442
        1. remove old LVs (which have the name name_replaces.<time_t>)
9443

9444
    Failures are not very well handled.
9445

9446
    """
9447
    steps_total = 6
9448

    
9449
    # Step: check device activation
9450
    self.lu.LogStep(1, steps_total, "Check device existence")
9451
    self._CheckDisksExistence([self.other_node, self.target_node])
9452
    self._CheckVolumeGroup([self.target_node, self.other_node])
9453

    
9454
    # Step: check other node consistency
9455
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9456
    self._CheckDisksConsistency(self.other_node,
9457
                                self.other_node == self.instance.primary_node,
9458
                                False)
9459

    
9460
    # Step: create new storage
9461
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9462
    iv_names = self._CreateNewStorage(self.target_node)
9463

    
9464
    # Step: for each lv, detach+rename*2+attach
9465
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9466
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9467
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9468

    
9469
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9470
                                                     old_lvs)
9471
      result.Raise("Can't detach drbd from local storage on node"
9472
                   " %s for device %s" % (self.target_node, dev.iv_name))
9473
      #dev.children = []
9474
      #cfg.Update(instance)
9475

    
9476
      # ok, we created the new LVs, so now we know we have the needed
9477
      # storage; as such, we proceed on the target node to rename
9478
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9479
      # using the assumption that logical_id == physical_id (which in
9480
      # turn is the unique_id on that node)
9481

    
9482
      # FIXME(iustin): use a better name for the replaced LVs
9483
      temp_suffix = int(time.time())
9484
      ren_fn = lambda d, suff: (d.physical_id[0],
9485
                                d.physical_id[1] + "_replaced-%s" % suff)
9486

    
9487
      # Build the rename list based on what LVs exist on the node
9488
      rename_old_to_new = []
9489
      for to_ren in old_lvs:
9490
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9491
        if not result.fail_msg and result.payload:
9492
          # device exists
9493
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9494

    
9495
      self.lu.LogInfo("Renaming the old LVs on the target node")
9496
      result = self.rpc.call_blockdev_rename(self.target_node,
9497
                                             rename_old_to_new)
9498
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9499

    
9500
      # Now we rename the new LVs to the old LVs
9501
      self.lu.LogInfo("Renaming the new LVs on the target node")
9502
      rename_new_to_old = [(new, old.physical_id)
9503
                           for old, new in zip(old_lvs, new_lvs)]
9504
      result = self.rpc.call_blockdev_rename(self.target_node,
9505
                                             rename_new_to_old)
9506
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9507

    
9508
      # Intermediate steps of in memory modifications
9509
      for old, new in zip(old_lvs, new_lvs):
9510
        new.logical_id = old.logical_id
9511
        self.cfg.SetDiskID(new, self.target_node)
9512

    
9513
      # We need to modify old_lvs so that removal later removes the
9514
      # right LVs, not the newly added ones; note that old_lvs is a
9515
      # copy here
9516
      for disk in old_lvs:
9517
        disk.logical_id = ren_fn(disk, temp_suffix)
9518
        self.cfg.SetDiskID(disk, self.target_node)
9519

    
9520
      # Now that the new lvs have the old name, we can add them to the device
9521
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9522
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9523
                                                  new_lvs)
9524
      msg = result.fail_msg
9525
      if msg:
9526
        for new_lv in new_lvs:
9527
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9528
                                               new_lv).fail_msg
9529
          if msg2:
9530
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9531
                               hint=("cleanup manually the unused logical"
9532
                                     "volumes"))
9533
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9534

    
9535
    cstep = 5
9536
    if self.early_release:
9537
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9538
      cstep += 1
9539
      self._RemoveOldStorage(self.target_node, iv_names)
9540
      # WARNING: we release both node locks here, do not do other RPCs
9541
      # than WaitForSync to the primary node
9542
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9543
                    names=[self.target_node, self.other_node])
9544

    
9545
    # Wait for sync
9546
    # This can fail as the old devices are degraded and _WaitForSync
9547
    # does a combined result over all disks, so we don't check its return value
9548
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9549
    cstep += 1
9550
    _WaitForSync(self.lu, self.instance)
9551

    
9552
    # Check all devices manually
9553
    self._CheckDevices(self.instance.primary_node, iv_names)
9554

    
9555
    # Step: remove old storage
9556
    if not self.early_release:
9557
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9558
      cstep += 1
9559
      self._RemoveOldStorage(self.target_node, iv_names)
9560

    
9561
  def _ExecDrbd8Secondary(self, feedback_fn):
9562
    """Replace the secondary node for DRBD 8.
9563

9564
    The algorithm for replace is quite complicated:
9565
      - for all disks of the instance:
9566
        - create new LVs on the new node with same names
9567
        - shutdown the drbd device on the old secondary
9568
        - disconnect the drbd network on the primary
9569
        - create the drbd device on the new secondary
9570
        - network attach the drbd on the primary, using an artifice:
9571
          the drbd code for Attach() will connect to the network if it
9572
          finds a device which is connected to the good local disks but
9573
          not network enabled
9574
      - wait for sync across all devices
9575
      - remove all disks from the old secondary
9576

9577
    Failures are not very well handled.
9578

9579
    """
9580
    steps_total = 6
9581

    
9582
    # Step: check device activation
9583
    self.lu.LogStep(1, steps_total, "Check device existence")
9584
    self._CheckDisksExistence([self.instance.primary_node])
9585
    self._CheckVolumeGroup([self.instance.primary_node])
9586

    
9587
    # Step: check other node consistency
9588
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9589
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9590

    
9591
    # Step: create new storage
9592
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9593
    for idx, dev in enumerate(self.instance.disks):
9594
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9595
                      (self.new_node, idx))
9596
      # we pass force_create=True to force LVM creation
9597
      for new_lv in dev.children:
9598
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9599
                        _GetInstanceInfoText(self.instance), False)
9600

    
9601
    # Step 4: dbrd minors and drbd setups changes
9602
    # after this, we must manually remove the drbd minors on both the
9603
    # error and the success paths
9604
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9605
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9606
                                         for dev in self.instance.disks],
9607
                                        self.instance.name)
9608
    logging.debug("Allocated minors %r", minors)
9609

    
9610
    iv_names = {}
9611
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9612
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9613
                      (self.new_node, idx))
9614
      # create new devices on new_node; note that we create two IDs:
9615
      # one without port, so the drbd will be activated without
9616
      # networking information on the new node at this stage, and one
9617
      # with network, for the latter activation in step 4
9618
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9619
      if self.instance.primary_node == o_node1:
9620
        p_minor = o_minor1
9621
      else:
9622
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9623
        p_minor = o_minor2
9624

    
9625
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9626
                      p_minor, new_minor, o_secret)
9627
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9628
                    p_minor, new_minor, o_secret)
9629

    
9630
      iv_names[idx] = (dev, dev.children, new_net_id)
9631
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9632
                    new_net_id)
9633
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9634
                              logical_id=new_alone_id,
9635
                              children=dev.children,
9636
                              size=dev.size)
9637
      try:
9638
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9639
                              _GetInstanceInfoText(self.instance), False)
9640
      except errors.GenericError:
9641
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9642
        raise
9643

    
9644
    # We have new devices, shutdown the drbd on the old secondary
9645
    for idx, dev in enumerate(self.instance.disks):
9646
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9647
      self.cfg.SetDiskID(dev, self.target_node)
9648
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9649
      if msg:
9650
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9651
                           "node: %s" % (idx, msg),
9652
                           hint=("Please cleanup this device manually as"
9653
                                 " soon as possible"))
9654

    
9655
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9656
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9657
                                               self.node_secondary_ip,
9658
                                               self.instance.disks)\
9659
                                              [self.instance.primary_node]
9660

    
9661
    msg = result.fail_msg
9662
    if msg:
9663
      # detaches didn't succeed (unlikely)
9664
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9665
      raise errors.OpExecError("Can't detach the disks from the network on"
9666
                               " old node: %s" % (msg,))
9667

    
9668
    # if we managed to detach at least one, we update all the disks of
9669
    # the instance to point to the new secondary
9670
    self.lu.LogInfo("Updating instance configuration")
9671
    for dev, _, new_logical_id in iv_names.itervalues():
9672
      dev.logical_id = new_logical_id
9673
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9674

    
9675
    self.cfg.Update(self.instance, feedback_fn)
9676

    
9677
    # and now perform the drbd attach
9678
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9679
                    " (standalone => connected)")
9680
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9681
                                            self.new_node],
9682
                                           self.node_secondary_ip,
9683
                                           self.instance.disks,
9684
                                           self.instance.name,
9685
                                           False)
9686
    for to_node, to_result in result.items():
9687
      msg = to_result.fail_msg
9688
      if msg:
9689
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9690
                           to_node, msg,
9691
                           hint=("please do a gnt-instance info to see the"
9692
                                 " status of disks"))
9693
    cstep = 5
9694
    if self.early_release:
9695
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9696
      cstep += 1
9697
      self._RemoveOldStorage(self.target_node, iv_names)
9698
      # WARNING: we release all node locks here, do not do other RPCs
9699
      # than WaitForSync to the primary node
9700
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9701
                    names=[self.instance.primary_node,
9702
                           self.target_node,
9703
                           self.new_node])
9704

    
9705
    # Wait for sync
9706
    # This can fail as the old devices are degraded and _WaitForSync
9707
    # does a combined result over all disks, so we don't check its return value
9708
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9709
    cstep += 1
9710
    _WaitForSync(self.lu, self.instance)
9711

    
9712
    # Check all devices manually
9713
    self._CheckDevices(self.instance.primary_node, iv_names)
9714

    
9715
    # Step: remove old storage
9716
    if not self.early_release:
9717
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9718
      self._RemoveOldStorage(self.target_node, iv_names)
9719

    
9720

    
9721
class LURepairNodeStorage(NoHooksLU):
9722
  """Repairs the volume group on a node.
9723

9724
  """
9725
  REQ_BGL = False
9726

    
9727
  def CheckArguments(self):
9728
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9729

    
9730
    storage_type = self.op.storage_type
9731

    
9732
    if (constants.SO_FIX_CONSISTENCY not in
9733
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9734
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9735
                                 " repaired" % storage_type,
9736
                                 errors.ECODE_INVAL)
9737

    
9738
  def ExpandNames(self):
9739
    self.needed_locks = {
9740
      locking.LEVEL_NODE: [self.op.node_name],
9741
      }
9742

    
9743
  def _CheckFaultyDisks(self, instance, node_name):
9744
    """Ensure faulty disks abort the opcode or at least warn."""
9745
    try:
9746
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9747
                                  node_name, True):
9748
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9749
                                   " node '%s'" % (instance.name, node_name),
9750
                                   errors.ECODE_STATE)
9751
    except errors.OpPrereqError, err:
9752
      if self.op.ignore_consistency:
9753
        self.proc.LogWarning(str(err.args[0]))
9754
      else:
9755
        raise
9756

    
9757
  def CheckPrereq(self):
9758
    """Check prerequisites.
9759

9760
    """
9761
    # Check whether any instance on this node has faulty disks
9762
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9763
      if not inst.admin_up:
9764
        continue
9765
      check_nodes = set(inst.all_nodes)
9766
      check_nodes.discard(self.op.node_name)
9767
      for inst_node_name in check_nodes:
9768
        self._CheckFaultyDisks(inst, inst_node_name)
9769

    
9770
  def Exec(self, feedback_fn):
9771
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9772
                (self.op.name, self.op.node_name))
9773

    
9774
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9775
    result = self.rpc.call_storage_execute(self.op.node_name,
9776
                                           self.op.storage_type, st_args,
9777
                                           self.op.name,
9778
                                           constants.SO_FIX_CONSISTENCY)
9779
    result.Raise("Failed to repair storage unit '%s' on %s" %
9780
                 (self.op.name, self.op.node_name))
9781

    
9782

    
9783
class LUNodeEvacuate(NoHooksLU):
9784
  """Evacuates instances off a list of nodes.
9785

9786
  """
9787
  REQ_BGL = False
9788

    
9789
  def CheckArguments(self):
9790
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9791

    
9792
  def ExpandNames(self):
9793
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9794

    
9795
    if self.op.remote_node is not None:
9796
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9797
      assert self.op.remote_node
9798

    
9799
      if self.op.remote_node == self.op.node_name:
9800
        raise errors.OpPrereqError("Can not use evacuated node as a new"
9801
                                   " secondary node", errors.ECODE_INVAL)
9802

    
9803
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
9804
        raise errors.OpPrereqError("Without the use of an iallocator only"
9805
                                   " secondary instances can be evacuated",
9806
                                   errors.ECODE_INVAL)
9807

    
9808
    # Declare locks
9809
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9810
    self.needed_locks = {
9811
      locking.LEVEL_INSTANCE: [],
9812
      locking.LEVEL_NODEGROUP: [],
9813
      locking.LEVEL_NODE: [],
9814
      }
9815

    
9816
    if self.op.remote_node is None:
9817
      # Iallocator will choose any node(s) in the same group
9818
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
9819
    else:
9820
      group_nodes = frozenset([self.op.remote_node])
9821

    
9822
    # Determine nodes to be locked
9823
    self.lock_nodes = set([self.op.node_name]) | group_nodes
9824

    
9825
  def _DetermineInstances(self):
9826
    """Builds list of instances to operate on.
9827

9828
    """
9829
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
9830

    
9831
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
9832
      # Primary instances only
9833
      inst_fn = _GetNodePrimaryInstances
9834
      assert self.op.remote_node is None, \
9835
        "Evacuating primary instances requires iallocator"
9836
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
9837
      # Secondary instances only
9838
      inst_fn = _GetNodeSecondaryInstances
9839
    else:
9840
      # All instances
9841
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
9842
      inst_fn = _GetNodeInstances
9843

    
9844
    return inst_fn(self.cfg, self.op.node_name)
9845

    
9846
  def DeclareLocks(self, level):
9847
    if level == locking.LEVEL_INSTANCE:
9848
      # Lock instances optimistically, needs verification once node and group
9849
      # locks have been acquired
9850
      self.needed_locks[locking.LEVEL_INSTANCE] = \
9851
        set(i.name for i in self._DetermineInstances())
9852

    
9853
    elif level == locking.LEVEL_NODEGROUP:
9854
      # Lock node groups optimistically, needs verification once nodes have
9855
      # been acquired
9856
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9857
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
9858

    
9859
    elif level == locking.LEVEL_NODE:
9860
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
9861

    
9862
  def CheckPrereq(self):
9863
    # Verify locks
9864
    owned_instances = self.glm.list_owned(locking.LEVEL_INSTANCE)
9865
    owned_nodes = self.glm.list_owned(locking.LEVEL_NODE)
9866
    owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
9867

    
9868
    assert owned_nodes == self.lock_nodes
9869

    
9870
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
9871
    if owned_groups != wanted_groups:
9872
      raise errors.OpExecError("Node groups changed since locks were acquired,"
9873
                               " current groups are '%s', used to be '%s'" %
9874
                               (utils.CommaJoin(wanted_groups),
9875
                                utils.CommaJoin(owned_groups)))
9876

    
9877
    # Determine affected instances
9878
    self.instances = self._DetermineInstances()
9879
    self.instance_names = [i.name for i in self.instances]
9880

    
9881
    if set(self.instance_names) != owned_instances:
9882
      raise errors.OpExecError("Instances on node '%s' changed since locks"
9883
                               " were acquired, current instances are '%s',"
9884
                               " used to be '%s'" %
9885
                               (self.op.node_name,
9886
                                utils.CommaJoin(self.instance_names),
9887
                                utils.CommaJoin(owned_instances)))
9888

    
9889
    if self.instance_names:
9890
      self.LogInfo("Evacuating instances from node '%s': %s",
9891
                   self.op.node_name,
9892
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
9893
    else:
9894
      self.LogInfo("No instances to evacuate from node '%s'",
9895
                   self.op.node_name)
9896

    
9897
    if self.op.remote_node is not None:
9898
      for i in self.instances:
9899
        if i.primary_node == self.op.remote_node:
9900
          raise errors.OpPrereqError("Node %s is the primary node of"
9901
                                     " instance %s, cannot use it as"
9902
                                     " secondary" %
9903
                                     (self.op.remote_node, i.name),
9904
                                     errors.ECODE_INVAL)
9905

    
9906
  def Exec(self, feedback_fn):
9907
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
9908

    
9909
    if not self.instance_names:
9910
      # No instances to evacuate
9911
      jobs = []
9912

    
9913
    elif self.op.iallocator is not None:
9914
      # TODO: Implement relocation to other group
9915
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
9916
                       evac_mode=self.op.mode,
9917
                       instances=list(self.instance_names))
9918

    
9919
      ial.Run(self.op.iallocator)
9920

    
9921
      if not ial.success:
9922
        raise errors.OpPrereqError("Can't compute node evacuation using"
9923
                                   " iallocator '%s': %s" %
9924
                                   (self.op.iallocator, ial.info),
9925
                                   errors.ECODE_NORES)
9926

    
9927
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
9928

    
9929
    elif self.op.remote_node is not None:
9930
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
9931
      jobs = [
9932
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
9933
                                        remote_node=self.op.remote_node,
9934
                                        disks=[],
9935
                                        mode=constants.REPLACE_DISK_CHG,
9936
                                        early_release=self.op.early_release)]
9937
        for instance_name in self.instance_names
9938
        ]
9939

    
9940
    else:
9941
      raise errors.ProgrammerError("No iallocator or remote node")
9942

    
9943
    return ResultWithJobs(jobs)
9944

    
9945

    
9946
def _SetOpEarlyRelease(early_release, op):
9947
  """Sets C{early_release} flag on opcodes if available.
9948

9949
  """
9950
  try:
9951
    op.early_release = early_release
9952
  except AttributeError:
9953
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
9954

    
9955
  return op
9956

    
9957

    
9958
def _NodeEvacDest(use_nodes, group, nodes):
9959
  """Returns group or nodes depending on caller's choice.
9960

9961
  """
9962
  if use_nodes:
9963
    return utils.CommaJoin(nodes)
9964
  else:
9965
    return group
9966

    
9967

    
9968
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
9969
  """Unpacks the result of change-group and node-evacuate iallocator requests.
9970

9971
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
9972
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
9973

9974
  @type lu: L{LogicalUnit}
9975
  @param lu: Logical unit instance
9976
  @type alloc_result: tuple/list
9977
  @param alloc_result: Result from iallocator
9978
  @type early_release: bool
9979
  @param early_release: Whether to release locks early if possible
9980
  @type use_nodes: bool
9981
  @param use_nodes: Whether to display node names instead of groups
9982

9983
  """
9984
  (moved, failed, jobs) = alloc_result
9985

    
9986
  if failed:
9987
    lu.LogWarning("Unable to evacuate instances %s",
9988
                  utils.CommaJoin("%s (%s)" % (name, reason)
9989
                                  for (name, reason) in failed))
9990

    
9991
  if moved:
9992
    lu.LogInfo("Instances to be moved: %s",
9993
               utils.CommaJoin("%s (to %s)" %
9994
                               (name, _NodeEvacDest(use_nodes, group, nodes))
9995
                               for (name, group, nodes) in moved))
9996

    
9997
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
9998
              map(opcodes.OpCode.LoadOpCode, ops))
9999
          for ops in jobs]
10000

    
10001

    
10002
class LUInstanceGrowDisk(LogicalUnit):
10003
  """Grow a disk of an instance.
10004

10005
  """
10006
  HPATH = "disk-grow"
10007
  HTYPE = constants.HTYPE_INSTANCE
10008
  REQ_BGL = False
10009

    
10010
  def ExpandNames(self):
10011
    self._ExpandAndLockInstance()
10012
    self.needed_locks[locking.LEVEL_NODE] = []
10013
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10014

    
10015
  def DeclareLocks(self, level):
10016
    if level == locking.LEVEL_NODE:
10017
      self._LockInstancesNodes()
10018

    
10019
  def BuildHooksEnv(self):
10020
    """Build hooks env.
10021

10022
    This runs on the master, the primary and all the secondaries.
10023

10024
    """
10025
    env = {
10026
      "DISK": self.op.disk,
10027
      "AMOUNT": self.op.amount,
10028
      }
10029
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10030
    return env
10031

    
10032
  def BuildHooksNodes(self):
10033
    """Build hooks nodes.
10034

10035
    """
10036
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10037
    return (nl, nl)
10038

    
10039
  def CheckPrereq(self):
10040
    """Check prerequisites.
10041

10042
    This checks that the instance is in the cluster.
10043

10044
    """
10045
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10046
    assert instance is not None, \
10047
      "Cannot retrieve locked instance %s" % self.op.instance_name
10048
    nodenames = list(instance.all_nodes)
10049
    for node in nodenames:
10050
      _CheckNodeOnline(self, node)
10051

    
10052
    self.instance = instance
10053

    
10054
    if instance.disk_template not in constants.DTS_GROWABLE:
10055
      raise errors.OpPrereqError("Instance's disk layout does not support"
10056
                                 " growing", errors.ECODE_INVAL)
10057

    
10058
    self.disk = instance.FindDisk(self.op.disk)
10059

    
10060
    if instance.disk_template not in (constants.DT_FILE,
10061
                                      constants.DT_SHARED_FILE):
10062
      # TODO: check the free disk space for file, when that feature will be
10063
      # supported
10064
      _CheckNodesFreeDiskPerVG(self, nodenames,
10065
                               self.disk.ComputeGrowth(self.op.amount))
10066

    
10067
  def Exec(self, feedback_fn):
10068
    """Execute disk grow.
10069

10070
    """
10071
    instance = self.instance
10072
    disk = self.disk
10073

    
10074
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10075
    if not disks_ok:
10076
      raise errors.OpExecError("Cannot activate block device to grow")
10077

    
10078
    # First run all grow ops in dry-run mode
10079
    for node in instance.all_nodes:
10080
      self.cfg.SetDiskID(disk, node)
10081
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10082
      result.Raise("Grow request failed to node %s" % node)
10083

    
10084
    # We know that (as far as we can test) operations across different
10085
    # nodes will succeed, time to run it for real
10086
    for node in instance.all_nodes:
10087
      self.cfg.SetDiskID(disk, node)
10088
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10089
      result.Raise("Grow request failed to node %s" % node)
10090

    
10091
      # TODO: Rewrite code to work properly
10092
      # DRBD goes into sync mode for a short amount of time after executing the
10093
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10094
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10095
      # time is a work-around.
10096
      time.sleep(5)
10097

    
10098
    disk.RecordGrow(self.op.amount)
10099
    self.cfg.Update(instance, feedback_fn)
10100
    if self.op.wait_for_sync:
10101
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10102
      if disk_abort:
10103
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10104
                             " status; please check the instance")
10105
      if not instance.admin_up:
10106
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10107
    elif not instance.admin_up:
10108
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10109
                           " not supposed to be running because no wait for"
10110
                           " sync mode was requested")
10111

    
10112

    
10113
class LUInstanceQueryData(NoHooksLU):
10114
  """Query runtime instance data.
10115

10116
  """
10117
  REQ_BGL = False
10118

    
10119
  def ExpandNames(self):
10120
    self.needed_locks = {}
10121

    
10122
    # Use locking if requested or when non-static information is wanted
10123
    if not (self.op.static or self.op.use_locking):
10124
      self.LogWarning("Non-static data requested, locks need to be acquired")
10125
      self.op.use_locking = True
10126

    
10127
    if self.op.instances or not self.op.use_locking:
10128
      # Expand instance names right here
10129
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10130
    else:
10131
      # Will use acquired locks
10132
      self.wanted_names = None
10133

    
10134
    if self.op.use_locking:
10135
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10136

    
10137
      if self.wanted_names is None:
10138
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10139
      else:
10140
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10141

    
10142
      self.needed_locks[locking.LEVEL_NODE] = []
10143
      self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10144
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10145

    
10146
  def DeclareLocks(self, level):
10147
    if self.op.use_locking and level == locking.LEVEL_NODE:
10148
      self._LockInstancesNodes()
10149

    
10150
  def CheckPrereq(self):
10151
    """Check prerequisites.
10152

10153
    This only checks the optional instance list against the existing names.
10154

10155
    """
10156
    if self.wanted_names is None:
10157
      assert self.op.use_locking, "Locking was not used"
10158
      self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
10159

    
10160
    self.wanted_instances = [self.cfg.GetInstanceInfo(name)
10161
                             for name in self.wanted_names]
10162

    
10163
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10164
    """Returns the status of a block device
10165

10166
    """
10167
    if self.op.static or not node:
10168
      return None
10169

    
10170
    self.cfg.SetDiskID(dev, node)
10171

    
10172
    result = self.rpc.call_blockdev_find(node, dev)
10173
    if result.offline:
10174
      return None
10175

    
10176
    result.Raise("Can't compute disk status for %s" % instance_name)
10177

    
10178
    status = result.payload
10179
    if status is None:
10180
      return None
10181

    
10182
    return (status.dev_path, status.major, status.minor,
10183
            status.sync_percent, status.estimated_time,
10184
            status.is_degraded, status.ldisk_status)
10185

    
10186
  def _ComputeDiskStatus(self, instance, snode, dev):
10187
    """Compute block device status.
10188

10189
    """
10190
    if dev.dev_type in constants.LDS_DRBD:
10191
      # we change the snode then (otherwise we use the one passed in)
10192
      if dev.logical_id[0] == instance.primary_node:
10193
        snode = dev.logical_id[1]
10194
      else:
10195
        snode = dev.logical_id[0]
10196

    
10197
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10198
                                              instance.name, dev)
10199
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10200

    
10201
    if dev.children:
10202
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
10203
                      for child in dev.children]
10204
    else:
10205
      dev_children = []
10206

    
10207
    return {
10208
      "iv_name": dev.iv_name,
10209
      "dev_type": dev.dev_type,
10210
      "logical_id": dev.logical_id,
10211
      "physical_id": dev.physical_id,
10212
      "pstatus": dev_pstatus,
10213
      "sstatus": dev_sstatus,
10214
      "children": dev_children,
10215
      "mode": dev.mode,
10216
      "size": dev.size,
10217
      }
10218

    
10219
  def Exec(self, feedback_fn):
10220
    """Gather and return data"""
10221
    result = {}
10222

    
10223
    cluster = self.cfg.GetClusterInfo()
10224

    
10225
    for instance in self.wanted_instances:
10226
      if not self.op.static:
10227
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10228
                                                  instance.name,
10229
                                                  instance.hypervisor)
10230
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10231
        remote_info = remote_info.payload
10232
        if remote_info and "state" in remote_info:
10233
          remote_state = "up"
10234
        else:
10235
          remote_state = "down"
10236
      else:
10237
        remote_state = None
10238
      if instance.admin_up:
10239
        config_state = "up"
10240
      else:
10241
        config_state = "down"
10242

    
10243
      disks = [self._ComputeDiskStatus(instance, None, device)
10244
               for device in instance.disks]
10245

    
10246
      result[instance.name] = {
10247
        "name": instance.name,
10248
        "config_state": config_state,
10249
        "run_state": remote_state,
10250
        "pnode": instance.primary_node,
10251
        "snodes": instance.secondary_nodes,
10252
        "os": instance.os,
10253
        # this happens to be the same format used for hooks
10254
        "nics": _NICListToTuple(self, instance.nics),
10255
        "disk_template": instance.disk_template,
10256
        "disks": disks,
10257
        "hypervisor": instance.hypervisor,
10258
        "network_port": instance.network_port,
10259
        "hv_instance": instance.hvparams,
10260
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10261
        "be_instance": instance.beparams,
10262
        "be_actual": cluster.FillBE(instance),
10263
        "os_instance": instance.osparams,
10264
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10265
        "serial_no": instance.serial_no,
10266
        "mtime": instance.mtime,
10267
        "ctime": instance.ctime,
10268
        "uuid": instance.uuid,
10269
        }
10270

    
10271
    return result
10272

    
10273

    
10274
class LUInstanceSetParams(LogicalUnit):
10275
  """Modifies an instances's parameters.
10276

10277
  """
10278
  HPATH = "instance-modify"
10279
  HTYPE = constants.HTYPE_INSTANCE
10280
  REQ_BGL = False
10281

    
10282
  def CheckArguments(self):
10283
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10284
            self.op.hvparams or self.op.beparams or self.op.os_name):
10285
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10286

    
10287
    if self.op.hvparams:
10288
      _CheckGlobalHvParams(self.op.hvparams)
10289

    
10290
    # Disk validation
10291
    disk_addremove = 0
10292
    for disk_op, disk_dict in self.op.disks:
10293
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10294
      if disk_op == constants.DDM_REMOVE:
10295
        disk_addremove += 1
10296
        continue
10297
      elif disk_op == constants.DDM_ADD:
10298
        disk_addremove += 1
10299
      else:
10300
        if not isinstance(disk_op, int):
10301
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10302
        if not isinstance(disk_dict, dict):
10303
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10304
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10305

    
10306
      if disk_op == constants.DDM_ADD:
10307
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10308
        if mode not in constants.DISK_ACCESS_SET:
10309
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10310
                                     errors.ECODE_INVAL)
10311
        size = disk_dict.get(constants.IDISK_SIZE, None)
10312
        if size is None:
10313
          raise errors.OpPrereqError("Required disk parameter size missing",
10314
                                     errors.ECODE_INVAL)
10315
        try:
10316
          size = int(size)
10317
        except (TypeError, ValueError), err:
10318
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10319
                                     str(err), errors.ECODE_INVAL)
10320
        disk_dict[constants.IDISK_SIZE] = size
10321
      else:
10322
        # modification of disk
10323
        if constants.IDISK_SIZE in disk_dict:
10324
          raise errors.OpPrereqError("Disk size change not possible, use"
10325
                                     " grow-disk", errors.ECODE_INVAL)
10326

    
10327
    if disk_addremove > 1:
10328
      raise errors.OpPrereqError("Only one disk add or remove operation"
10329
                                 " supported at a time", errors.ECODE_INVAL)
10330

    
10331
    if self.op.disks and self.op.disk_template is not None:
10332
      raise errors.OpPrereqError("Disk template conversion and other disk"
10333
                                 " changes not supported at the same time",
10334
                                 errors.ECODE_INVAL)
10335

    
10336
    if (self.op.disk_template and
10337
        self.op.disk_template in constants.DTS_INT_MIRROR and
10338
        self.op.remote_node is None):
10339
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10340
                                 " one requires specifying a secondary node",
10341
                                 errors.ECODE_INVAL)
10342

    
10343
    # NIC validation
10344
    nic_addremove = 0
10345
    for nic_op, nic_dict in self.op.nics:
10346
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10347
      if nic_op == constants.DDM_REMOVE:
10348
        nic_addremove += 1
10349
        continue
10350
      elif nic_op == constants.DDM_ADD:
10351
        nic_addremove += 1
10352
      else:
10353
        if not isinstance(nic_op, int):
10354
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10355
        if not isinstance(nic_dict, dict):
10356
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10357
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10358

    
10359
      # nic_dict should be a dict
10360
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10361
      if nic_ip is not None:
10362
        if nic_ip.lower() == constants.VALUE_NONE:
10363
          nic_dict[constants.INIC_IP] = None
10364
        else:
10365
          if not netutils.IPAddress.IsValid(nic_ip):
10366
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10367
                                       errors.ECODE_INVAL)
10368

    
10369
      nic_bridge = nic_dict.get('bridge', None)
10370
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10371
      if nic_bridge and nic_link:
10372
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10373
                                   " at the same time", errors.ECODE_INVAL)
10374
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10375
        nic_dict['bridge'] = None
10376
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10377
        nic_dict[constants.INIC_LINK] = None
10378

    
10379
      if nic_op == constants.DDM_ADD:
10380
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10381
        if nic_mac is None:
10382
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10383

    
10384
      if constants.INIC_MAC in nic_dict:
10385
        nic_mac = nic_dict[constants.INIC_MAC]
10386
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10387
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10388

    
10389
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10390
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10391
                                     " modifying an existing nic",
10392
                                     errors.ECODE_INVAL)
10393

    
10394
    if nic_addremove > 1:
10395
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10396
                                 " supported at a time", errors.ECODE_INVAL)
10397

    
10398
  def ExpandNames(self):
10399
    self._ExpandAndLockInstance()
10400
    self.needed_locks[locking.LEVEL_NODE] = []
10401
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10402

    
10403
  def DeclareLocks(self, level):
10404
    if level == locking.LEVEL_NODE:
10405
      self._LockInstancesNodes()
10406
      if self.op.disk_template and self.op.remote_node:
10407
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10408
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10409

    
10410
  def BuildHooksEnv(self):
10411
    """Build hooks env.
10412

10413
    This runs on the master, primary and secondaries.
10414

10415
    """
10416
    args = dict()
10417
    if constants.BE_MEMORY in self.be_new:
10418
      args['memory'] = self.be_new[constants.BE_MEMORY]
10419
    if constants.BE_VCPUS in self.be_new:
10420
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
10421
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10422
    # information at all.
10423
    if self.op.nics:
10424
      args['nics'] = []
10425
      nic_override = dict(self.op.nics)
10426
      for idx, nic in enumerate(self.instance.nics):
10427
        if idx in nic_override:
10428
          this_nic_override = nic_override[idx]
10429
        else:
10430
          this_nic_override = {}
10431
        if constants.INIC_IP in this_nic_override:
10432
          ip = this_nic_override[constants.INIC_IP]
10433
        else:
10434
          ip = nic.ip
10435
        if constants.INIC_MAC in this_nic_override:
10436
          mac = this_nic_override[constants.INIC_MAC]
10437
        else:
10438
          mac = nic.mac
10439
        if idx in self.nic_pnew:
10440
          nicparams = self.nic_pnew[idx]
10441
        else:
10442
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10443
        mode = nicparams[constants.NIC_MODE]
10444
        link = nicparams[constants.NIC_LINK]
10445
        args['nics'].append((ip, mac, mode, link))
10446
      if constants.DDM_ADD in nic_override:
10447
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10448
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10449
        nicparams = self.nic_pnew[constants.DDM_ADD]
10450
        mode = nicparams[constants.NIC_MODE]
10451
        link = nicparams[constants.NIC_LINK]
10452
        args['nics'].append((ip, mac, mode, link))
10453
      elif constants.DDM_REMOVE in nic_override:
10454
        del args['nics'][-1]
10455

    
10456
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10457
    if self.op.disk_template:
10458
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10459

    
10460
    return env
10461

    
10462
  def BuildHooksNodes(self):
10463
    """Build hooks nodes.
10464

10465
    """
10466
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10467
    return (nl, nl)
10468

    
10469
  def CheckPrereq(self):
10470
    """Check prerequisites.
10471

10472
    This only checks the instance list against the existing names.
10473

10474
    """
10475
    # checking the new params on the primary/secondary nodes
10476

    
10477
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10478
    cluster = self.cluster = self.cfg.GetClusterInfo()
10479
    assert self.instance is not None, \
10480
      "Cannot retrieve locked instance %s" % self.op.instance_name
10481
    pnode = instance.primary_node
10482
    nodelist = list(instance.all_nodes)
10483

    
10484
    # OS change
10485
    if self.op.os_name and not self.op.force:
10486
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10487
                      self.op.force_variant)
10488
      instance_os = self.op.os_name
10489
    else:
10490
      instance_os = instance.os
10491

    
10492
    if self.op.disk_template:
10493
      if instance.disk_template == self.op.disk_template:
10494
        raise errors.OpPrereqError("Instance already has disk template %s" %
10495
                                   instance.disk_template, errors.ECODE_INVAL)
10496

    
10497
      if (instance.disk_template,
10498
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10499
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10500
                                   " %s to %s" % (instance.disk_template,
10501
                                                  self.op.disk_template),
10502
                                   errors.ECODE_INVAL)
10503
      _CheckInstanceDown(self, instance, "cannot change disk template")
10504
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10505
        if self.op.remote_node == pnode:
10506
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10507
                                     " as the primary node of the instance" %
10508
                                     self.op.remote_node, errors.ECODE_STATE)
10509
        _CheckNodeOnline(self, self.op.remote_node)
10510
        _CheckNodeNotDrained(self, self.op.remote_node)
10511
        # FIXME: here we assume that the old instance type is DT_PLAIN
10512
        assert instance.disk_template == constants.DT_PLAIN
10513
        disks = [{constants.IDISK_SIZE: d.size,
10514
                  constants.IDISK_VG: d.logical_id[0]}
10515
                 for d in instance.disks]
10516
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10517
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10518

    
10519
    # hvparams processing
10520
    if self.op.hvparams:
10521
      hv_type = instance.hypervisor
10522
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10523
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10524
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10525

    
10526
      # local check
10527
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10528
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10529
      self.hv_new = hv_new # the new actual values
10530
      self.hv_inst = i_hvdict # the new dict (without defaults)
10531
    else:
10532
      self.hv_new = self.hv_inst = {}
10533

    
10534
    # beparams processing
10535
    if self.op.beparams:
10536
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10537
                                   use_none=True)
10538
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10539
      be_new = cluster.SimpleFillBE(i_bedict)
10540
      self.be_new = be_new # the new actual values
10541
      self.be_inst = i_bedict # the new dict (without defaults)
10542
    else:
10543
      self.be_new = self.be_inst = {}
10544
    be_old = cluster.FillBE(instance)
10545

    
10546
    # osparams processing
10547
    if self.op.osparams:
10548
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10549
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10550
      self.os_inst = i_osdict # the new dict (without defaults)
10551
    else:
10552
      self.os_inst = {}
10553

    
10554
    self.warn = []
10555

    
10556
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10557
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10558
      mem_check_list = [pnode]
10559
      if be_new[constants.BE_AUTO_BALANCE]:
10560
        # either we changed auto_balance to yes or it was from before
10561
        mem_check_list.extend(instance.secondary_nodes)
10562
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10563
                                                  instance.hypervisor)
10564
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10565
                                         instance.hypervisor)
10566
      pninfo = nodeinfo[pnode]
10567
      msg = pninfo.fail_msg
10568
      if msg:
10569
        # Assume the primary node is unreachable and go ahead
10570
        self.warn.append("Can't get info from primary node %s: %s" %
10571
                         (pnode,  msg))
10572
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
10573
        self.warn.append("Node data from primary node %s doesn't contain"
10574
                         " free memory information" % pnode)
10575
      elif instance_info.fail_msg:
10576
        self.warn.append("Can't get instance runtime information: %s" %
10577
                        instance_info.fail_msg)
10578
      else:
10579
        if instance_info.payload:
10580
          current_mem = int(instance_info.payload['memory'])
10581
        else:
10582
          # Assume instance not running
10583
          # (there is a slight race condition here, but it's not very probable,
10584
          # and we have no other way to check)
10585
          current_mem = 0
10586
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10587
                    pninfo.payload['memory_free'])
10588
        if miss_mem > 0:
10589
          raise errors.OpPrereqError("This change will prevent the instance"
10590
                                     " from starting, due to %d MB of memory"
10591
                                     " missing on its primary node" % miss_mem,
10592
                                     errors.ECODE_NORES)
10593

    
10594
      if be_new[constants.BE_AUTO_BALANCE]:
10595
        for node, nres in nodeinfo.items():
10596
          if node not in instance.secondary_nodes:
10597
            continue
10598
          nres.Raise("Can't get info from secondary node %s" % node,
10599
                     prereq=True, ecode=errors.ECODE_STATE)
10600
          if not isinstance(nres.payload.get('memory_free', None), int):
10601
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10602
                                       " memory information" % node,
10603
                                       errors.ECODE_STATE)
10604
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
10605
            raise errors.OpPrereqError("This change will prevent the instance"
10606
                                       " from failover to its secondary node"
10607
                                       " %s, due to not enough memory" % node,
10608
                                       errors.ECODE_STATE)
10609

    
10610
    # NIC processing
10611
    self.nic_pnew = {}
10612
    self.nic_pinst = {}
10613
    for nic_op, nic_dict in self.op.nics:
10614
      if nic_op == constants.DDM_REMOVE:
10615
        if not instance.nics:
10616
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10617
                                     errors.ECODE_INVAL)
10618
        continue
10619
      if nic_op != constants.DDM_ADD:
10620
        # an existing nic
10621
        if not instance.nics:
10622
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10623
                                     " no NICs" % nic_op,
10624
                                     errors.ECODE_INVAL)
10625
        if nic_op < 0 or nic_op >= len(instance.nics):
10626
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10627
                                     " are 0 to %d" %
10628
                                     (nic_op, len(instance.nics) - 1),
10629
                                     errors.ECODE_INVAL)
10630
        old_nic_params = instance.nics[nic_op].nicparams
10631
        old_nic_ip = instance.nics[nic_op].ip
10632
      else:
10633
        old_nic_params = {}
10634
        old_nic_ip = None
10635

    
10636
      update_params_dict = dict([(key, nic_dict[key])
10637
                                 for key in constants.NICS_PARAMETERS
10638
                                 if key in nic_dict])
10639

    
10640
      if 'bridge' in nic_dict:
10641
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
10642

    
10643
      new_nic_params = _GetUpdatedParams(old_nic_params,
10644
                                         update_params_dict)
10645
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10646
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10647
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10648
      self.nic_pinst[nic_op] = new_nic_params
10649
      self.nic_pnew[nic_op] = new_filled_nic_params
10650
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10651

    
10652
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10653
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10654
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10655
        if msg:
10656
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10657
          if self.op.force:
10658
            self.warn.append(msg)
10659
          else:
10660
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10661
      if new_nic_mode == constants.NIC_MODE_ROUTED:
10662
        if constants.INIC_IP in nic_dict:
10663
          nic_ip = nic_dict[constants.INIC_IP]
10664
        else:
10665
          nic_ip = old_nic_ip
10666
        if nic_ip is None:
10667
          raise errors.OpPrereqError('Cannot set the nic ip to None'
10668
                                     ' on a routed nic', errors.ECODE_INVAL)
10669
      if constants.INIC_MAC in nic_dict:
10670
        nic_mac = nic_dict[constants.INIC_MAC]
10671
        if nic_mac is None:
10672
          raise errors.OpPrereqError('Cannot set the nic mac to None',
10673
                                     errors.ECODE_INVAL)
10674
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10675
          # otherwise generate the mac
10676
          nic_dict[constants.INIC_MAC] = \
10677
            self.cfg.GenerateMAC(self.proc.GetECId())
10678
        else:
10679
          # or validate/reserve the current one
10680
          try:
10681
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10682
          except errors.ReservationError:
10683
            raise errors.OpPrereqError("MAC address %s already in use"
10684
                                       " in cluster" % nic_mac,
10685
                                       errors.ECODE_NOTUNIQUE)
10686

    
10687
    # DISK processing
10688
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10689
      raise errors.OpPrereqError("Disk operations not supported for"
10690
                                 " diskless instances",
10691
                                 errors.ECODE_INVAL)
10692
    for disk_op, _ in self.op.disks:
10693
      if disk_op == constants.DDM_REMOVE:
10694
        if len(instance.disks) == 1:
10695
          raise errors.OpPrereqError("Cannot remove the last disk of"
10696
                                     " an instance", errors.ECODE_INVAL)
10697
        _CheckInstanceDown(self, instance, "cannot remove disks")
10698

    
10699
      if (disk_op == constants.DDM_ADD and
10700
          len(instance.disks) >= constants.MAX_DISKS):
10701
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10702
                                   " add more" % constants.MAX_DISKS,
10703
                                   errors.ECODE_STATE)
10704
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10705
        # an existing disk
10706
        if disk_op < 0 or disk_op >= len(instance.disks):
10707
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10708
                                     " are 0 to %d" %
10709
                                     (disk_op, len(instance.disks)),
10710
                                     errors.ECODE_INVAL)
10711

    
10712
    return
10713

    
10714
  def _ConvertPlainToDrbd(self, feedback_fn):
10715
    """Converts an instance from plain to drbd.
10716

10717
    """
10718
    feedback_fn("Converting template to drbd")
10719
    instance = self.instance
10720
    pnode = instance.primary_node
10721
    snode = self.op.remote_node
10722

    
10723
    # create a fake disk info for _GenerateDiskTemplate
10724
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10725
                  constants.IDISK_VG: d.logical_id[0]}
10726
                 for d in instance.disks]
10727
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10728
                                      instance.name, pnode, [snode],
10729
                                      disk_info, None, None, 0, feedback_fn)
10730
    info = _GetInstanceInfoText(instance)
10731
    feedback_fn("Creating aditional volumes...")
10732
    # first, create the missing data and meta devices
10733
    for disk in new_disks:
10734
      # unfortunately this is... not too nice
10735
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10736
                            info, True)
10737
      for child in disk.children:
10738
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10739
    # at this stage, all new LVs have been created, we can rename the
10740
    # old ones
10741
    feedback_fn("Renaming original volumes...")
10742
    rename_list = [(o, n.children[0].logical_id)
10743
                   for (o, n) in zip(instance.disks, new_disks)]
10744
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10745
    result.Raise("Failed to rename original LVs")
10746

    
10747
    feedback_fn("Initializing DRBD devices...")
10748
    # all child devices are in place, we can now create the DRBD devices
10749
    for disk in new_disks:
10750
      for node in [pnode, snode]:
10751
        f_create = node == pnode
10752
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10753

    
10754
    # at this point, the instance has been modified
10755
    instance.disk_template = constants.DT_DRBD8
10756
    instance.disks = new_disks
10757
    self.cfg.Update(instance, feedback_fn)
10758

    
10759
    # disks are created, waiting for sync
10760
    disk_abort = not _WaitForSync(self, instance,
10761
                                  oneshot=not self.op.wait_for_sync)
10762
    if disk_abort:
10763
      raise errors.OpExecError("There are some degraded disks for"
10764
                               " this instance, please cleanup manually")
10765

    
10766
  def _ConvertDrbdToPlain(self, feedback_fn):
10767
    """Converts an instance from drbd to plain.
10768

10769
    """
10770
    instance = self.instance
10771
    assert len(instance.secondary_nodes) == 1
10772
    pnode = instance.primary_node
10773
    snode = instance.secondary_nodes[0]
10774
    feedback_fn("Converting template to plain")
10775

    
10776
    old_disks = instance.disks
10777
    new_disks = [d.children[0] for d in old_disks]
10778

    
10779
    # copy over size and mode
10780
    for parent, child in zip(old_disks, new_disks):
10781
      child.size = parent.size
10782
      child.mode = parent.mode
10783

    
10784
    # update instance structure
10785
    instance.disks = new_disks
10786
    instance.disk_template = constants.DT_PLAIN
10787
    self.cfg.Update(instance, feedback_fn)
10788

    
10789
    feedback_fn("Removing volumes on the secondary node...")
10790
    for disk in old_disks:
10791
      self.cfg.SetDiskID(disk, snode)
10792
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10793
      if msg:
10794
        self.LogWarning("Could not remove block device %s on node %s,"
10795
                        " continuing anyway: %s", disk.iv_name, snode, msg)
10796

    
10797
    feedback_fn("Removing unneeded volumes on the primary node...")
10798
    for idx, disk in enumerate(old_disks):
10799
      meta = disk.children[1]
10800
      self.cfg.SetDiskID(meta, pnode)
10801
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10802
      if msg:
10803
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
10804
                        " continuing anyway: %s", idx, pnode, msg)
10805

    
10806
  def Exec(self, feedback_fn):
10807
    """Modifies an instance.
10808

10809
    All parameters take effect only at the next restart of the instance.
10810

10811
    """
10812
    # Process here the warnings from CheckPrereq, as we don't have a
10813
    # feedback_fn there.
10814
    for warn in self.warn:
10815
      feedback_fn("WARNING: %s" % warn)
10816

    
10817
    result = []
10818
    instance = self.instance
10819
    # disk changes
10820
    for disk_op, disk_dict in self.op.disks:
10821
      if disk_op == constants.DDM_REMOVE:
10822
        # remove the last disk
10823
        device = instance.disks.pop()
10824
        device_idx = len(instance.disks)
10825
        for node, disk in device.ComputeNodeTree(instance.primary_node):
10826
          self.cfg.SetDiskID(disk, node)
10827
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10828
          if msg:
10829
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
10830
                            " continuing anyway", device_idx, node, msg)
10831
        result.append(("disk/%d" % device_idx, "remove"))
10832
      elif disk_op == constants.DDM_ADD:
10833
        # add a new disk
10834
        if instance.disk_template in (constants.DT_FILE,
10835
                                        constants.DT_SHARED_FILE):
10836
          file_driver, file_path = instance.disks[0].logical_id
10837
          file_path = os.path.dirname(file_path)
10838
        else:
10839
          file_driver = file_path = None
10840
        disk_idx_base = len(instance.disks)
10841
        new_disk = _GenerateDiskTemplate(self,
10842
                                         instance.disk_template,
10843
                                         instance.name, instance.primary_node,
10844
                                         instance.secondary_nodes,
10845
                                         [disk_dict],
10846
                                         file_path,
10847
                                         file_driver,
10848
                                         disk_idx_base, feedback_fn)[0]
10849
        instance.disks.append(new_disk)
10850
        info = _GetInstanceInfoText(instance)
10851

    
10852
        logging.info("Creating volume %s for instance %s",
10853
                     new_disk.iv_name, instance.name)
10854
        # Note: this needs to be kept in sync with _CreateDisks
10855
        #HARDCODE
10856
        for node in instance.all_nodes:
10857
          f_create = node == instance.primary_node
10858
          try:
10859
            _CreateBlockDev(self, node, instance, new_disk,
10860
                            f_create, info, f_create)
10861
          except errors.OpExecError, err:
10862
            self.LogWarning("Failed to create volume %s (%s) on"
10863
                            " node %s: %s",
10864
                            new_disk.iv_name, new_disk, node, err)
10865
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10866
                       (new_disk.size, new_disk.mode)))
10867
      else:
10868
        # change a given disk
10869
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10870
        result.append(("disk.mode/%d" % disk_op,
10871
                       disk_dict[constants.IDISK_MODE]))
10872

    
10873
    if self.op.disk_template:
10874
      r_shut = _ShutdownInstanceDisks(self, instance)
10875
      if not r_shut:
10876
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10877
                                 " proceed with disk template conversion")
10878
      mode = (instance.disk_template, self.op.disk_template)
10879
      try:
10880
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
10881
      except:
10882
        self.cfg.ReleaseDRBDMinors(instance.name)
10883
        raise
10884
      result.append(("disk_template", self.op.disk_template))
10885

    
10886
    # NIC changes
10887
    for nic_op, nic_dict in self.op.nics:
10888
      if nic_op == constants.DDM_REMOVE:
10889
        # remove the last nic
10890
        del instance.nics[-1]
10891
        result.append(("nic.%d" % len(instance.nics), "remove"))
10892
      elif nic_op == constants.DDM_ADD:
10893
        # mac and bridge should be set, by now
10894
        mac = nic_dict[constants.INIC_MAC]
10895
        ip = nic_dict.get(constants.INIC_IP, None)
10896
        nicparams = self.nic_pinst[constants.DDM_ADD]
10897
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10898
        instance.nics.append(new_nic)
10899
        result.append(("nic.%d" % (len(instance.nics) - 1),
10900
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
10901
                       (new_nic.mac, new_nic.ip,
10902
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10903
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10904
                       )))
10905
      else:
10906
        for key in (constants.INIC_MAC, constants.INIC_IP):
10907
          if key in nic_dict:
10908
            setattr(instance.nics[nic_op], key, nic_dict[key])
10909
        if nic_op in self.nic_pinst:
10910
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10911
        for key, val in nic_dict.iteritems():
10912
          result.append(("nic.%s/%d" % (key, nic_op), val))
10913

    
10914
    # hvparams changes
10915
    if self.op.hvparams:
10916
      instance.hvparams = self.hv_inst
10917
      for key, val in self.op.hvparams.iteritems():
10918
        result.append(("hv/%s" % key, val))
10919

    
10920
    # beparams changes
10921
    if self.op.beparams:
10922
      instance.beparams = self.be_inst
10923
      for key, val in self.op.beparams.iteritems():
10924
        result.append(("be/%s" % key, val))
10925

    
10926
    # OS change
10927
    if self.op.os_name:
10928
      instance.os = self.op.os_name
10929

    
10930
    # osparams changes
10931
    if self.op.osparams:
10932
      instance.osparams = self.os_inst
10933
      for key, val in self.op.osparams.iteritems():
10934
        result.append(("os/%s" % key, val))
10935

    
10936
    self.cfg.Update(instance, feedback_fn)
10937

    
10938
    return result
10939

    
10940
  _DISK_CONVERSIONS = {
10941
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10942
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10943
    }
10944

    
10945

    
10946
class LUBackupQuery(NoHooksLU):
10947
  """Query the exports list
10948

10949
  """
10950
  REQ_BGL = False
10951

    
10952
  def ExpandNames(self):
10953
    self.needed_locks = {}
10954
    self.share_locks[locking.LEVEL_NODE] = 1
10955
    if not self.op.nodes:
10956
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10957
    else:
10958
      self.needed_locks[locking.LEVEL_NODE] = \
10959
        _GetWantedNodes(self, self.op.nodes)
10960

    
10961
  def Exec(self, feedback_fn):
10962
    """Compute the list of all the exported system images.
10963

10964
    @rtype: dict
10965
    @return: a dictionary with the structure node->(export-list)
10966
        where export-list is a list of the instances exported on
10967
        that node.
10968

10969
    """
10970
    self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
10971
    rpcresult = self.rpc.call_export_list(self.nodes)
10972
    result = {}
10973
    for node in rpcresult:
10974
      if rpcresult[node].fail_msg:
10975
        result[node] = False
10976
      else:
10977
        result[node] = rpcresult[node].payload
10978

    
10979
    return result
10980

    
10981

    
10982
class LUBackupPrepare(NoHooksLU):
10983
  """Prepares an instance for an export and returns useful information.
10984

10985
  """
10986
  REQ_BGL = False
10987

    
10988
  def ExpandNames(self):
10989
    self._ExpandAndLockInstance()
10990

    
10991
  def CheckPrereq(self):
10992
    """Check prerequisites.
10993

10994
    """
10995
    instance_name = self.op.instance_name
10996

    
10997
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10998
    assert self.instance is not None, \
10999
          "Cannot retrieve locked instance %s" % self.op.instance_name
11000
    _CheckNodeOnline(self, self.instance.primary_node)
11001

    
11002
    self._cds = _GetClusterDomainSecret()
11003

    
11004
  def Exec(self, feedback_fn):
11005
    """Prepares an instance for an export.
11006

11007
    """
11008
    instance = self.instance
11009

    
11010
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11011
      salt = utils.GenerateSecret(8)
11012

    
11013
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11014
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11015
                                              constants.RIE_CERT_VALIDITY)
11016
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11017

    
11018
      (name, cert_pem) = result.payload
11019

    
11020
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11021
                                             cert_pem)
11022

    
11023
      return {
11024
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11025
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11026
                          salt),
11027
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11028
        }
11029

    
11030
    return None
11031

    
11032

    
11033
class LUBackupExport(LogicalUnit):
11034
  """Export an instance to an image in the cluster.
11035

11036
  """
11037
  HPATH = "instance-export"
11038
  HTYPE = constants.HTYPE_INSTANCE
11039
  REQ_BGL = False
11040

    
11041
  def CheckArguments(self):
11042
    """Check the arguments.
11043

11044
    """
11045
    self.x509_key_name = self.op.x509_key_name
11046
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11047

    
11048
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11049
      if not self.x509_key_name:
11050
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11051
                                   errors.ECODE_INVAL)
11052

    
11053
      if not self.dest_x509_ca_pem:
11054
        raise errors.OpPrereqError("Missing destination X509 CA",
11055
                                   errors.ECODE_INVAL)
11056

    
11057
  def ExpandNames(self):
11058
    self._ExpandAndLockInstance()
11059

    
11060
    # Lock all nodes for local exports
11061
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11062
      # FIXME: lock only instance primary and destination node
11063
      #
11064
      # Sad but true, for now we have do lock all nodes, as we don't know where
11065
      # the previous export might be, and in this LU we search for it and
11066
      # remove it from its current node. In the future we could fix this by:
11067
      #  - making a tasklet to search (share-lock all), then create the
11068
      #    new one, then one to remove, after
11069
      #  - removing the removal operation altogether
11070
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11071

    
11072
  def DeclareLocks(self, level):
11073
    """Last minute lock declaration."""
11074
    # All nodes are locked anyway, so nothing to do here.
11075

    
11076
  def BuildHooksEnv(self):
11077
    """Build hooks env.
11078

11079
    This will run on the master, primary node and target node.
11080

11081
    """
11082
    env = {
11083
      "EXPORT_MODE": self.op.mode,
11084
      "EXPORT_NODE": self.op.target_node,
11085
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11086
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11087
      # TODO: Generic function for boolean env variables
11088
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11089
      }
11090

    
11091
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11092

    
11093
    return env
11094

    
11095
  def BuildHooksNodes(self):
11096
    """Build hooks nodes.
11097

11098
    """
11099
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11100

    
11101
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11102
      nl.append(self.op.target_node)
11103

    
11104
    return (nl, nl)
11105

    
11106
  def CheckPrereq(self):
11107
    """Check prerequisites.
11108

11109
    This checks that the instance and node names are valid.
11110

11111
    """
11112
    instance_name = self.op.instance_name
11113

    
11114
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11115
    assert self.instance is not None, \
11116
          "Cannot retrieve locked instance %s" % self.op.instance_name
11117
    _CheckNodeOnline(self, self.instance.primary_node)
11118

    
11119
    if (self.op.remove_instance and self.instance.admin_up and
11120
        not self.op.shutdown):
11121
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11122
                                 " down before")
11123

    
11124
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11125
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11126
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11127
      assert self.dst_node is not None
11128

    
11129
      _CheckNodeOnline(self, self.dst_node.name)
11130
      _CheckNodeNotDrained(self, self.dst_node.name)
11131

    
11132
      self._cds = None
11133
      self.dest_disk_info = None
11134
      self.dest_x509_ca = None
11135

    
11136
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11137
      self.dst_node = None
11138

    
11139
      if len(self.op.target_node) != len(self.instance.disks):
11140
        raise errors.OpPrereqError(("Received destination information for %s"
11141
                                    " disks, but instance %s has %s disks") %
11142
                                   (len(self.op.target_node), instance_name,
11143
                                    len(self.instance.disks)),
11144
                                   errors.ECODE_INVAL)
11145

    
11146
      cds = _GetClusterDomainSecret()
11147

    
11148
      # Check X509 key name
11149
      try:
11150
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11151
      except (TypeError, ValueError), err:
11152
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11153

    
11154
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11155
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11156
                                   errors.ECODE_INVAL)
11157

    
11158
      # Load and verify CA
11159
      try:
11160
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11161
      except OpenSSL.crypto.Error, err:
11162
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11163
                                   (err, ), errors.ECODE_INVAL)
11164

    
11165
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11166
      if errcode is not None:
11167
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11168
                                   (msg, ), errors.ECODE_INVAL)
11169

    
11170
      self.dest_x509_ca = cert
11171

    
11172
      # Verify target information
11173
      disk_info = []
11174
      for idx, disk_data in enumerate(self.op.target_node):
11175
        try:
11176
          (host, port, magic) = \
11177
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11178
        except errors.GenericError, err:
11179
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11180
                                     (idx, err), errors.ECODE_INVAL)
11181

    
11182
        disk_info.append((host, port, magic))
11183

    
11184
      assert len(disk_info) == len(self.op.target_node)
11185
      self.dest_disk_info = disk_info
11186

    
11187
    else:
11188
      raise errors.ProgrammerError("Unhandled export mode %r" %
11189
                                   self.op.mode)
11190

    
11191
    # instance disk type verification
11192
    # TODO: Implement export support for file-based disks
11193
    for disk in self.instance.disks:
11194
      if disk.dev_type == constants.LD_FILE:
11195
        raise errors.OpPrereqError("Export not supported for instances with"
11196
                                   " file-based disks", errors.ECODE_INVAL)
11197

    
11198
  def _CleanupExports(self, feedback_fn):
11199
    """Removes exports of current instance from all other nodes.
11200

11201
    If an instance in a cluster with nodes A..D was exported to node C, its
11202
    exports will be removed from the nodes A, B and D.
11203

11204
    """
11205
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11206

    
11207
    nodelist = self.cfg.GetNodeList()
11208
    nodelist.remove(self.dst_node.name)
11209

    
11210
    # on one-node clusters nodelist will be empty after the removal
11211
    # if we proceed the backup would be removed because OpBackupQuery
11212
    # substitutes an empty list with the full cluster node list.
11213
    iname = self.instance.name
11214
    if nodelist:
11215
      feedback_fn("Removing old exports for instance %s" % iname)
11216
      exportlist = self.rpc.call_export_list(nodelist)
11217
      for node in exportlist:
11218
        if exportlist[node].fail_msg:
11219
          continue
11220
        if iname in exportlist[node].payload:
11221
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11222
          if msg:
11223
            self.LogWarning("Could not remove older export for instance %s"
11224
                            " on node %s: %s", iname, node, msg)
11225

    
11226
  def Exec(self, feedback_fn):
11227
    """Export an instance to an image in the cluster.
11228

11229
    """
11230
    assert self.op.mode in constants.EXPORT_MODES
11231

    
11232
    instance = self.instance
11233
    src_node = instance.primary_node
11234

    
11235
    if self.op.shutdown:
11236
      # shutdown the instance, but not the disks
11237
      feedback_fn("Shutting down instance %s" % instance.name)
11238
      result = self.rpc.call_instance_shutdown(src_node, instance,
11239
                                               self.op.shutdown_timeout)
11240
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11241
      result.Raise("Could not shutdown instance %s on"
11242
                   " node %s" % (instance.name, src_node))
11243

    
11244
    # set the disks ID correctly since call_instance_start needs the
11245
    # correct drbd minor to create the symlinks
11246
    for disk in instance.disks:
11247
      self.cfg.SetDiskID(disk, src_node)
11248

    
11249
    activate_disks = (not instance.admin_up)
11250

    
11251
    if activate_disks:
11252
      # Activate the instance disks if we'exporting a stopped instance
11253
      feedback_fn("Activating disks for %s" % instance.name)
11254
      _StartInstanceDisks(self, instance, None)
11255

    
11256
    try:
11257
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11258
                                                     instance)
11259

    
11260
      helper.CreateSnapshots()
11261
      try:
11262
        if (self.op.shutdown and instance.admin_up and
11263
            not self.op.remove_instance):
11264
          assert not activate_disks
11265
          feedback_fn("Starting instance %s" % instance.name)
11266
          result = self.rpc.call_instance_start(src_node, instance,
11267
                                                None, None, False)
11268
          msg = result.fail_msg
11269
          if msg:
11270
            feedback_fn("Failed to start instance: %s" % msg)
11271
            _ShutdownInstanceDisks(self, instance)
11272
            raise errors.OpExecError("Could not start instance: %s" % msg)
11273

    
11274
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11275
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11276
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11277
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11278
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11279

    
11280
          (key_name, _, _) = self.x509_key_name
11281

    
11282
          dest_ca_pem = \
11283
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11284
                                            self.dest_x509_ca)
11285

    
11286
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11287
                                                     key_name, dest_ca_pem,
11288
                                                     timeouts)
11289
      finally:
11290
        helper.Cleanup()
11291

    
11292
      # Check for backwards compatibility
11293
      assert len(dresults) == len(instance.disks)
11294
      assert compat.all(isinstance(i, bool) for i in dresults), \
11295
             "Not all results are boolean: %r" % dresults
11296

    
11297
    finally:
11298
      if activate_disks:
11299
        feedback_fn("Deactivating disks for %s" % instance.name)
11300
        _ShutdownInstanceDisks(self, instance)
11301

    
11302
    if not (compat.all(dresults) and fin_resu):
11303
      failures = []
11304
      if not fin_resu:
11305
        failures.append("export finalization")
11306
      if not compat.all(dresults):
11307
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11308
                               if not dsk)
11309
        failures.append("disk export: disk(s) %s" % fdsk)
11310

    
11311
      raise errors.OpExecError("Export failed, errors in %s" %
11312
                               utils.CommaJoin(failures))
11313

    
11314
    # At this point, the export was successful, we can cleanup/finish
11315

    
11316
    # Remove instance if requested
11317
    if self.op.remove_instance:
11318
      feedback_fn("Removing instance %s" % instance.name)
11319
      _RemoveInstance(self, feedback_fn, instance,
11320
                      self.op.ignore_remove_failures)
11321

    
11322
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11323
      self._CleanupExports(feedback_fn)
11324

    
11325
    return fin_resu, dresults
11326

    
11327

    
11328
class LUBackupRemove(NoHooksLU):
11329
  """Remove exports related to the named instance.
11330

11331
  """
11332
  REQ_BGL = False
11333

    
11334
  def ExpandNames(self):
11335
    self.needed_locks = {}
11336
    # We need all nodes to be locked in order for RemoveExport to work, but we
11337
    # don't need to lock the instance itself, as nothing will happen to it (and
11338
    # we can remove exports also for a removed instance)
11339
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11340

    
11341
  def Exec(self, feedback_fn):
11342
    """Remove any export.
11343

11344
    """
11345
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11346
    # If the instance was not found we'll try with the name that was passed in.
11347
    # This will only work if it was an FQDN, though.
11348
    fqdn_warn = False
11349
    if not instance_name:
11350
      fqdn_warn = True
11351
      instance_name = self.op.instance_name
11352

    
11353
    locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
11354
    exportlist = self.rpc.call_export_list(locked_nodes)
11355
    found = False
11356
    for node in exportlist:
11357
      msg = exportlist[node].fail_msg
11358
      if msg:
11359
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11360
        continue
11361
      if instance_name in exportlist[node].payload:
11362
        found = True
11363
        result = self.rpc.call_export_remove(node, instance_name)
11364
        msg = result.fail_msg
11365
        if msg:
11366
          logging.error("Could not remove export for instance %s"
11367
                        " on node %s: %s", instance_name, node, msg)
11368

    
11369
    if fqdn_warn and not found:
11370
      feedback_fn("Export not found. If trying to remove an export belonging"
11371
                  " to a deleted instance please use its Fully Qualified"
11372
                  " Domain Name.")
11373

    
11374

    
11375
class LUGroupAdd(LogicalUnit):
11376
  """Logical unit for creating node groups.
11377

11378
  """
11379
  HPATH = "group-add"
11380
  HTYPE = constants.HTYPE_GROUP
11381
  REQ_BGL = False
11382

    
11383
  def ExpandNames(self):
11384
    # We need the new group's UUID here so that we can create and acquire the
11385
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11386
    # that it should not check whether the UUID exists in the configuration.
11387
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11388
    self.needed_locks = {}
11389
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11390

    
11391
  def CheckPrereq(self):
11392
    """Check prerequisites.
11393

11394
    This checks that the given group name is not an existing node group
11395
    already.
11396

11397
    """
11398
    try:
11399
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11400
    except errors.OpPrereqError:
11401
      pass
11402
    else:
11403
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11404
                                 " node group (UUID: %s)" %
11405
                                 (self.op.group_name, existing_uuid),
11406
                                 errors.ECODE_EXISTS)
11407

    
11408
    if self.op.ndparams:
11409
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11410

    
11411
  def BuildHooksEnv(self):
11412
    """Build hooks env.
11413

11414
    """
11415
    return {
11416
      "GROUP_NAME": self.op.group_name,
11417
      }
11418

    
11419
  def BuildHooksNodes(self):
11420
    """Build hooks nodes.
11421

11422
    """
11423
    mn = self.cfg.GetMasterNode()
11424
    return ([mn], [mn])
11425

    
11426
  def Exec(self, feedback_fn):
11427
    """Add the node group to the cluster.
11428

11429
    """
11430
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11431
                                  uuid=self.group_uuid,
11432
                                  alloc_policy=self.op.alloc_policy,
11433
                                  ndparams=self.op.ndparams)
11434

    
11435
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11436
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11437

    
11438

    
11439
class LUGroupAssignNodes(NoHooksLU):
11440
  """Logical unit for assigning nodes to groups.
11441

11442
  """
11443
  REQ_BGL = False
11444

    
11445
  def ExpandNames(self):
11446
    # These raise errors.OpPrereqError on their own:
11447
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11448
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11449

    
11450
    # We want to lock all the affected nodes and groups. We have readily
11451
    # available the list of nodes, and the *destination* group. To gather the
11452
    # list of "source" groups, we need to fetch node information later on.
11453
    self.needed_locks = {
11454
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11455
      locking.LEVEL_NODE: self.op.nodes,
11456
      }
11457

    
11458
  def DeclareLocks(self, level):
11459
    if level == locking.LEVEL_NODEGROUP:
11460
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11461

    
11462
      # Try to get all affected nodes' groups without having the group or node
11463
      # lock yet. Needs verification later in the code flow.
11464
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11465

    
11466
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11467

    
11468
  def CheckPrereq(self):
11469
    """Check prerequisites.
11470

11471
    """
11472
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11473
    assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11474
            frozenset(self.op.nodes))
11475

    
11476
    expected_locks = (set([self.group_uuid]) |
11477
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11478
    actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11479
    if actual_locks != expected_locks:
11480
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11481
                               " current groups are '%s', used to be '%s'" %
11482
                               (utils.CommaJoin(expected_locks),
11483
                                utils.CommaJoin(actual_locks)))
11484

    
11485
    self.node_data = self.cfg.GetAllNodesInfo()
11486
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11487
    instance_data = self.cfg.GetAllInstancesInfo()
11488

    
11489
    if self.group is None:
11490
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11491
                               (self.op.group_name, self.group_uuid))
11492

    
11493
    (new_splits, previous_splits) = \
11494
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11495
                                             for node in self.op.nodes],
11496
                                            self.node_data, instance_data)
11497

    
11498
    if new_splits:
11499
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11500

    
11501
      if not self.op.force:
11502
        raise errors.OpExecError("The following instances get split by this"
11503
                                 " change and --force was not given: %s" %
11504
                                 fmt_new_splits)
11505
      else:
11506
        self.LogWarning("This operation will split the following instances: %s",
11507
                        fmt_new_splits)
11508

    
11509
        if previous_splits:
11510
          self.LogWarning("In addition, these already-split instances continue"
11511
                          " to be split across groups: %s",
11512
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
11513

    
11514
  def Exec(self, feedback_fn):
11515
    """Assign nodes to a new group.
11516

11517
    """
11518
    for node in self.op.nodes:
11519
      self.node_data[node].group = self.group_uuid
11520

    
11521
    # FIXME: Depends on side-effects of modifying the result of
11522
    # C{cfg.GetAllNodesInfo}
11523

    
11524
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11525

    
11526
  @staticmethod
11527
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11528
    """Check for split instances after a node assignment.
11529

11530
    This method considers a series of node assignments as an atomic operation,
11531
    and returns information about split instances after applying the set of
11532
    changes.
11533

11534
    In particular, it returns information about newly split instances, and
11535
    instances that were already split, and remain so after the change.
11536

11537
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11538
    considered.
11539

11540
    @type changes: list of (node_name, new_group_uuid) pairs.
11541
    @param changes: list of node assignments to consider.
11542
    @param node_data: a dict with data for all nodes
11543
    @param instance_data: a dict with all instances to consider
11544
    @rtype: a two-tuple
11545
    @return: a list of instances that were previously okay and result split as a
11546
      consequence of this change, and a list of instances that were previously
11547
      split and this change does not fix.
11548

11549
    """
11550
    changed_nodes = dict((node, group) for node, group in changes
11551
                         if node_data[node].group != group)
11552

    
11553
    all_split_instances = set()
11554
    previously_split_instances = set()
11555

    
11556
    def InstanceNodes(instance):
11557
      return [instance.primary_node] + list(instance.secondary_nodes)
11558

    
11559
    for inst in instance_data.values():
11560
      if inst.disk_template not in constants.DTS_INT_MIRROR:
11561
        continue
11562

    
11563
      instance_nodes = InstanceNodes(inst)
11564

    
11565
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
11566
        previously_split_instances.add(inst.name)
11567

    
11568
      if len(set(changed_nodes.get(node, node_data[node].group)
11569
                 for node in instance_nodes)) > 1:
11570
        all_split_instances.add(inst.name)
11571

    
11572
    return (list(all_split_instances - previously_split_instances),
11573
            list(previously_split_instances & all_split_instances))
11574

    
11575

    
11576
class _GroupQuery(_QueryBase):
11577
  FIELDS = query.GROUP_FIELDS
11578

    
11579
  def ExpandNames(self, lu):
11580
    lu.needed_locks = {}
11581

    
11582
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11583
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11584

    
11585
    if not self.names:
11586
      self.wanted = [name_to_uuid[name]
11587
                     for name in utils.NiceSort(name_to_uuid.keys())]
11588
    else:
11589
      # Accept names to be either names or UUIDs.
11590
      missing = []
11591
      self.wanted = []
11592
      all_uuid = frozenset(self._all_groups.keys())
11593

    
11594
      for name in self.names:
11595
        if name in all_uuid:
11596
          self.wanted.append(name)
11597
        elif name in name_to_uuid:
11598
          self.wanted.append(name_to_uuid[name])
11599
        else:
11600
          missing.append(name)
11601

    
11602
      if missing:
11603
        raise errors.OpPrereqError("Some groups do not exist: %s" %
11604
                                   utils.CommaJoin(missing),
11605
                                   errors.ECODE_NOENT)
11606

    
11607
  def DeclareLocks(self, lu, level):
11608
    pass
11609

    
11610
  def _GetQueryData(self, lu):
11611
    """Computes the list of node groups and their attributes.
11612

11613
    """
11614
    do_nodes = query.GQ_NODE in self.requested_data
11615
    do_instances = query.GQ_INST in self.requested_data
11616

    
11617
    group_to_nodes = None
11618
    group_to_instances = None
11619

    
11620
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11621
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11622
    # latter GetAllInstancesInfo() is not enough, for we have to go through
11623
    # instance->node. Hence, we will need to process nodes even if we only need
11624
    # instance information.
11625
    if do_nodes or do_instances:
11626
      all_nodes = lu.cfg.GetAllNodesInfo()
11627
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11628
      node_to_group = {}
11629

    
11630
      for node in all_nodes.values():
11631
        if node.group in group_to_nodes:
11632
          group_to_nodes[node.group].append(node.name)
11633
          node_to_group[node.name] = node.group
11634

    
11635
      if do_instances:
11636
        all_instances = lu.cfg.GetAllInstancesInfo()
11637
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
11638

    
11639
        for instance in all_instances.values():
11640
          node = instance.primary_node
11641
          if node in node_to_group:
11642
            group_to_instances[node_to_group[node]].append(instance.name)
11643

    
11644
        if not do_nodes:
11645
          # Do not pass on node information if it was not requested.
11646
          group_to_nodes = None
11647

    
11648
    return query.GroupQueryData([self._all_groups[uuid]
11649
                                 for uuid in self.wanted],
11650
                                group_to_nodes, group_to_instances)
11651

    
11652

    
11653
class LUGroupQuery(NoHooksLU):
11654
  """Logical unit for querying node groups.
11655

11656
  """
11657
  REQ_BGL = False
11658

    
11659
  def CheckArguments(self):
11660
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11661
                          self.op.output_fields, False)
11662

    
11663
  def ExpandNames(self):
11664
    self.gq.ExpandNames(self)
11665

    
11666
  def Exec(self, feedback_fn):
11667
    return self.gq.OldStyleQuery(self)
11668

    
11669

    
11670
class LUGroupSetParams(LogicalUnit):
11671
  """Modifies the parameters of a node group.
11672

11673
  """
11674
  HPATH = "group-modify"
11675
  HTYPE = constants.HTYPE_GROUP
11676
  REQ_BGL = False
11677

    
11678
  def CheckArguments(self):
11679
    all_changes = [
11680
      self.op.ndparams,
11681
      self.op.alloc_policy,
11682
      ]
11683

    
11684
    if all_changes.count(None) == len(all_changes):
11685
      raise errors.OpPrereqError("Please pass at least one modification",
11686
                                 errors.ECODE_INVAL)
11687

    
11688
  def ExpandNames(self):
11689
    # This raises errors.OpPrereqError on its own:
11690
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11691

    
11692
    self.needed_locks = {
11693
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11694
      }
11695

    
11696
  def CheckPrereq(self):
11697
    """Check prerequisites.
11698

11699
    """
11700
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11701

    
11702
    if self.group is None:
11703
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11704
                               (self.op.group_name, self.group_uuid))
11705

    
11706
    if self.op.ndparams:
11707
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11708
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11709
      self.new_ndparams = new_ndparams
11710

    
11711
  def BuildHooksEnv(self):
11712
    """Build hooks env.
11713

11714
    """
11715
    return {
11716
      "GROUP_NAME": self.op.group_name,
11717
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
11718
      }
11719

    
11720
  def BuildHooksNodes(self):
11721
    """Build hooks nodes.
11722

11723
    """
11724
    mn = self.cfg.GetMasterNode()
11725
    return ([mn], [mn])
11726

    
11727
  def Exec(self, feedback_fn):
11728
    """Modifies the node group.
11729

11730
    """
11731
    result = []
11732

    
11733
    if self.op.ndparams:
11734
      self.group.ndparams = self.new_ndparams
11735
      result.append(("ndparams", str(self.group.ndparams)))
11736

    
11737
    if self.op.alloc_policy:
11738
      self.group.alloc_policy = self.op.alloc_policy
11739

    
11740
    self.cfg.Update(self.group, feedback_fn)
11741
    return result
11742

    
11743

    
11744

    
11745
class LUGroupRemove(LogicalUnit):
11746
  HPATH = "group-remove"
11747
  HTYPE = constants.HTYPE_GROUP
11748
  REQ_BGL = False
11749

    
11750
  def ExpandNames(self):
11751
    # This will raises errors.OpPrereqError on its own:
11752
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11753
    self.needed_locks = {
11754
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11755
      }
11756

    
11757
  def CheckPrereq(self):
11758
    """Check prerequisites.
11759

11760
    This checks that the given group name exists as a node group, that is
11761
    empty (i.e., contains no nodes), and that is not the last group of the
11762
    cluster.
11763

11764
    """
11765
    # Verify that the group is empty.
11766
    group_nodes = [node.name
11767
                   for node in self.cfg.GetAllNodesInfo().values()
11768
                   if node.group == self.group_uuid]
11769

    
11770
    if group_nodes:
11771
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
11772
                                 " nodes: %s" %
11773
                                 (self.op.group_name,
11774
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
11775
                                 errors.ECODE_STATE)
11776

    
11777
    # Verify the cluster would not be left group-less.
11778
    if len(self.cfg.GetNodeGroupList()) == 1:
11779
      raise errors.OpPrereqError("Group '%s' is the only group,"
11780
                                 " cannot be removed" %
11781
                                 self.op.group_name,
11782
                                 errors.ECODE_STATE)
11783

    
11784
  def BuildHooksEnv(self):
11785
    """Build hooks env.
11786

11787
    """
11788
    return {
11789
      "GROUP_NAME": self.op.group_name,
11790
      }
11791

    
11792
  def BuildHooksNodes(self):
11793
    """Build hooks nodes.
11794

11795
    """
11796
    mn = self.cfg.GetMasterNode()
11797
    return ([mn], [mn])
11798

    
11799
  def Exec(self, feedback_fn):
11800
    """Remove the node group.
11801

11802
    """
11803
    try:
11804
      self.cfg.RemoveNodeGroup(self.group_uuid)
11805
    except errors.ConfigurationError:
11806
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11807
                               (self.op.group_name, self.group_uuid))
11808

    
11809
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11810

    
11811

    
11812
class LUGroupRename(LogicalUnit):
11813
  HPATH = "group-rename"
11814
  HTYPE = constants.HTYPE_GROUP
11815
  REQ_BGL = False
11816

    
11817
  def ExpandNames(self):
11818
    # This raises errors.OpPrereqError on its own:
11819
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11820

    
11821
    self.needed_locks = {
11822
      locking.LEVEL_NODEGROUP: [self.group_uuid],
11823
      }
11824

    
11825
  def CheckPrereq(self):
11826
    """Check prerequisites.
11827

11828
    Ensures requested new name is not yet used.
11829

11830
    """
11831
    try:
11832
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11833
    except errors.OpPrereqError:
11834
      pass
11835
    else:
11836
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11837
                                 " node group (UUID: %s)" %
11838
                                 (self.op.new_name, new_name_uuid),
11839
                                 errors.ECODE_EXISTS)
11840

    
11841
  def BuildHooksEnv(self):
11842
    """Build hooks env.
11843

11844
    """
11845
    return {
11846
      "OLD_NAME": self.op.group_name,
11847
      "NEW_NAME": self.op.new_name,
11848
      }
11849

    
11850
  def BuildHooksNodes(self):
11851
    """Build hooks nodes.
11852

11853
    """
11854
    mn = self.cfg.GetMasterNode()
11855

    
11856
    all_nodes = self.cfg.GetAllNodesInfo()
11857
    all_nodes.pop(mn, None)
11858

    
11859
    run_nodes = [mn]
11860
    run_nodes.extend(node.name for node in all_nodes.values()
11861
                     if node.group == self.group_uuid)
11862

    
11863
    return (run_nodes, run_nodes)
11864

    
11865
  def Exec(self, feedback_fn):
11866
    """Rename the node group.
11867

11868
    """
11869
    group = self.cfg.GetNodeGroup(self.group_uuid)
11870

    
11871
    if group is None:
11872
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11873
                               (self.op.group_name, self.group_uuid))
11874

    
11875
    group.name = self.op.new_name
11876
    self.cfg.Update(group, feedback_fn)
11877

    
11878
    return self.op.new_name
11879

    
11880

    
11881
class LUGroupEvacuate(LogicalUnit):
11882
  HPATH = "group-evacuate"
11883
  HTYPE = constants.HTYPE_GROUP
11884
  REQ_BGL = False
11885

    
11886
  def ExpandNames(self):
11887
    # This raises errors.OpPrereqError on its own:
11888
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11889

    
11890
    if self.op.target_groups:
11891
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11892
                                  self.op.target_groups)
11893
    else:
11894
      self.req_target_uuids = []
11895

    
11896
    if self.group_uuid in self.req_target_uuids:
11897
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
11898
                                 " as a target group (targets are %s)" %
11899
                                 (self.group_uuid,
11900
                                  utils.CommaJoin(self.req_target_uuids)),
11901
                                 errors.ECODE_INVAL)
11902

    
11903
    if not self.op.iallocator:
11904
      # Use default iallocator
11905
      self.op.iallocator = self.cfg.GetDefaultIAllocator()
11906

    
11907
    if not self.op.iallocator:
11908
      raise errors.OpPrereqError("No iallocator was specified, neither in the"
11909
                                 " opcode nor as a cluster-wide default",
11910
                                 errors.ECODE_INVAL)
11911

    
11912
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11913
    self.needed_locks = {
11914
      locking.LEVEL_INSTANCE: [],
11915
      locking.LEVEL_NODEGROUP: [],
11916
      locking.LEVEL_NODE: [],
11917
      }
11918

    
11919
  def DeclareLocks(self, level):
11920
    if level == locking.LEVEL_INSTANCE:
11921
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
11922

    
11923
      # Lock instances optimistically, needs verification once node and group
11924
      # locks have been acquired
11925
      self.needed_locks[locking.LEVEL_INSTANCE] = \
11926
        self.cfg.GetNodeGroupInstances(self.group_uuid)
11927

    
11928
    elif level == locking.LEVEL_NODEGROUP:
11929
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11930

    
11931
      if self.req_target_uuids:
11932
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
11933

    
11934
        # Lock all groups used by instances optimistically; this requires going
11935
        # via the node before it's locked, requiring verification later on
11936
        lock_groups.update(group_uuid
11937
                           for instance_name in
11938
                             self.glm.list_owned(locking.LEVEL_INSTANCE)
11939
                           for group_uuid in
11940
                             self.cfg.GetInstanceNodeGroups(instance_name))
11941
      else:
11942
        # No target groups, need to lock all of them
11943
        lock_groups = locking.ALL_SET
11944

    
11945
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11946

    
11947
    elif level == locking.LEVEL_NODE:
11948
      # This will only lock the nodes in the group to be evacuated which
11949
      # contain actual instances
11950
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11951
      self._LockInstancesNodes()
11952

    
11953
      # Lock all nodes in group to be evacuated
11954
      assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
11955
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
11956
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11957

    
11958
  def CheckPrereq(self):
11959
    owned_instances = frozenset(self.glm.list_owned(locking.LEVEL_INSTANCE))
11960
    owned_groups = frozenset(self.glm.list_owned(locking.LEVEL_NODEGROUP))
11961
    owned_nodes = frozenset(self.glm.list_owned(locking.LEVEL_NODE))
11962

    
11963
    assert owned_groups.issuperset(self.req_target_uuids)
11964
    assert self.group_uuid in owned_groups
11965

    
11966
    # Check if locked instances are still correct
11967
    wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
11968
    if owned_instances != wanted_instances:
11969
      raise errors.OpPrereqError("Instances in node group to be evacuated (%s)"
11970
                                 " changed since locks were acquired, wanted"
11971
                                 " %s, have %s; retry the operation" %
11972
                                 (self.group_uuid,
11973
                                  utils.CommaJoin(wanted_instances),
11974
                                  utils.CommaJoin(owned_instances)),
11975
                                 errors.ECODE_STATE)
11976

    
11977
    # Get instance information
11978
    self.instances = dict((name, self.cfg.GetInstanceInfo(name))
11979
                          for name in owned_instances)
11980

    
11981
    # Check if node groups for locked instances are still correct
11982
    for instance_name in owned_instances:
11983
      inst = self.instances[instance_name]
11984
      assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
11985
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
11986
      assert owned_nodes.issuperset(inst.all_nodes), \
11987
        "Instance %s's nodes changed while we kept the lock" % instance_name
11988

    
11989
      inst_groups = self.cfg.GetInstanceNodeGroups(instance_name)
11990
      if not owned_groups.issuperset(inst_groups):
11991
        raise errors.OpPrereqError("Instance's node groups changed since locks"
11992
                                   " were acquired, current groups are '%s',"
11993
                                   " owning groups '%s'; retry the operation" %
11994
                                   (utils.CommaJoin(inst_groups),
11995
                                    utils.CommaJoin(owned_groups)),
11996
                                   errors.ECODE_STATE)
11997

    
11998
    if self.req_target_uuids:
11999
      # User requested specific target groups
12000
      self.target_uuids = self.req_target_uuids
12001
    else:
12002
      # All groups except the one to be evacuated are potential targets
12003
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12004
                           if group_uuid != self.group_uuid]
12005

    
12006
      if not self.target_uuids:
12007
        raise errors.OpExecError("There are no possible target groups")
12008

    
12009
  def BuildHooksEnv(self):
12010
    """Build hooks env.
12011

12012
    """
12013
    return {
12014
      "GROUP_NAME": self.op.group_name,
12015
      "TARGET_GROUPS": " ".join(self.target_uuids),
12016
      }
12017

    
12018
  def BuildHooksNodes(self):
12019
    """Build hooks nodes.
12020

12021
    """
12022
    mn = self.cfg.GetMasterNode()
12023

    
12024
    assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
12025

    
12026
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12027

    
12028
    return (run_nodes, run_nodes)
12029

    
12030
  def Exec(self, feedback_fn):
12031
    instances = list(self.glm.list_owned(locking.LEVEL_INSTANCE))
12032

    
12033
    assert self.group_uuid not in self.target_uuids
12034

    
12035
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12036
                     instances=instances, target_groups=self.target_uuids)
12037

    
12038
    ial.Run(self.op.iallocator)
12039

    
12040
    if not ial.success:
12041
      raise errors.OpPrereqError("Can't compute group evacuation using"
12042
                                 " iallocator '%s': %s" %
12043
                                 (self.op.iallocator, ial.info),
12044
                                 errors.ECODE_NORES)
12045

    
12046
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12047

    
12048
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12049
                 len(jobs), self.op.group_name)
12050

    
12051
    return ResultWithJobs(jobs)
12052

    
12053

    
12054
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
12055
  """Generic tags LU.
12056

12057
  This is an abstract class which is the parent of all the other tags LUs.
12058

12059
  """
12060
  def ExpandNames(self):
12061
    self.group_uuid = None
12062
    self.needed_locks = {}
12063
    if self.op.kind == constants.TAG_NODE:
12064
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12065
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12066
    elif self.op.kind == constants.TAG_INSTANCE:
12067
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12068
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12069
    elif self.op.kind == constants.TAG_NODEGROUP:
12070
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12071

    
12072
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12073
    # not possible to acquire the BGL based on opcode parameters)
12074

    
12075
  def CheckPrereq(self):
12076
    """Check prerequisites.
12077

12078
    """
12079
    if self.op.kind == constants.TAG_CLUSTER:
12080
      self.target = self.cfg.GetClusterInfo()
12081
    elif self.op.kind == constants.TAG_NODE:
12082
      self.target = self.cfg.GetNodeInfo(self.op.name)
12083
    elif self.op.kind == constants.TAG_INSTANCE:
12084
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12085
    elif self.op.kind == constants.TAG_NODEGROUP:
12086
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12087
    else:
12088
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12089
                                 str(self.op.kind), errors.ECODE_INVAL)
12090

    
12091

    
12092
class LUTagsGet(TagsLU):
12093
  """Returns the tags of a given object.
12094

12095
  """
12096
  REQ_BGL = False
12097

    
12098
  def ExpandNames(self):
12099
    TagsLU.ExpandNames(self)
12100

    
12101
    # Share locks as this is only a read operation
12102
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
12103

    
12104
  def Exec(self, feedback_fn):
12105
    """Returns the tag list.
12106

12107
    """
12108
    return list(self.target.GetTags())
12109

    
12110

    
12111
class LUTagsSearch(NoHooksLU):
12112
  """Searches the tags for a given pattern.
12113

12114
  """
12115
  REQ_BGL = False
12116

    
12117
  def ExpandNames(self):
12118
    self.needed_locks = {}
12119

    
12120
  def CheckPrereq(self):
12121
    """Check prerequisites.
12122

12123
    This checks the pattern passed for validity by compiling it.
12124

12125
    """
12126
    try:
12127
      self.re = re.compile(self.op.pattern)
12128
    except re.error, err:
12129
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12130
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12131

    
12132
  def Exec(self, feedback_fn):
12133
    """Returns the tag list.
12134

12135
    """
12136
    cfg = self.cfg
12137
    tgts = [("/cluster", cfg.GetClusterInfo())]
12138
    ilist = cfg.GetAllInstancesInfo().values()
12139
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12140
    nlist = cfg.GetAllNodesInfo().values()
12141
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12142
    tgts.extend(("/nodegroup/%s" % n.name, n)
12143
                for n in cfg.GetAllNodeGroupsInfo().values())
12144
    results = []
12145
    for path, target in tgts:
12146
      for tag in target.GetTags():
12147
        if self.re.search(tag):
12148
          results.append((path, tag))
12149
    return results
12150

    
12151

    
12152
class LUTagsSet(TagsLU):
12153
  """Sets a tag on a given object.
12154

12155
  """
12156
  REQ_BGL = False
12157

    
12158
  def CheckPrereq(self):
12159
    """Check prerequisites.
12160

12161
    This checks the type and length of the tag name and value.
12162

12163
    """
12164
    TagsLU.CheckPrereq(self)
12165
    for tag in self.op.tags:
12166
      objects.TaggableObject.ValidateTag(tag)
12167

    
12168
  def Exec(self, feedback_fn):
12169
    """Sets the tag.
12170

12171
    """
12172
    try:
12173
      for tag in self.op.tags:
12174
        self.target.AddTag(tag)
12175
    except errors.TagError, err:
12176
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12177
    self.cfg.Update(self.target, feedback_fn)
12178

    
12179

    
12180
class LUTagsDel(TagsLU):
12181
  """Delete a list of tags from a given object.
12182

12183
  """
12184
  REQ_BGL = False
12185

    
12186
  def CheckPrereq(self):
12187
    """Check prerequisites.
12188

12189
    This checks that we have the given tag.
12190

12191
    """
12192
    TagsLU.CheckPrereq(self)
12193
    for tag in self.op.tags:
12194
      objects.TaggableObject.ValidateTag(tag)
12195
    del_tags = frozenset(self.op.tags)
12196
    cur_tags = self.target.GetTags()
12197

    
12198
    diff_tags = del_tags - cur_tags
12199
    if diff_tags:
12200
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12201
      raise errors.OpPrereqError("Tag(s) %s not found" %
12202
                                 (utils.CommaJoin(diff_names), ),
12203
                                 errors.ECODE_NOENT)
12204

    
12205
  def Exec(self, feedback_fn):
12206
    """Remove the tag from the object.
12207

12208
    """
12209
    for tag in self.op.tags:
12210
      self.target.RemoveTag(tag)
12211
    self.cfg.Update(self.target, feedback_fn)
12212

    
12213

    
12214
class LUTestDelay(NoHooksLU):
12215
  """Sleep for a specified amount of time.
12216

12217
  This LU sleeps on the master and/or nodes for a specified amount of
12218
  time.
12219

12220
  """
12221
  REQ_BGL = False
12222

    
12223
  def ExpandNames(self):
12224
    """Expand names and set required locks.
12225

12226
    This expands the node list, if any.
12227

12228
    """
12229
    self.needed_locks = {}
12230
    if self.op.on_nodes:
12231
      # _GetWantedNodes can be used here, but is not always appropriate to use
12232
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12233
      # more information.
12234
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12235
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12236

    
12237
  def _TestDelay(self):
12238
    """Do the actual sleep.
12239

12240
    """
12241
    if self.op.on_master:
12242
      if not utils.TestDelay(self.op.duration):
12243
        raise errors.OpExecError("Error during master delay test")
12244
    if self.op.on_nodes:
12245
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12246
      for node, node_result in result.items():
12247
        node_result.Raise("Failure during rpc call to node %s" % node)
12248

    
12249
  def Exec(self, feedback_fn):
12250
    """Execute the test delay opcode, with the wanted repetitions.
12251

12252
    """
12253
    if self.op.repeat == 0:
12254
      self._TestDelay()
12255
    else:
12256
      top_value = self.op.repeat - 1
12257
      for i in range(self.op.repeat):
12258
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12259
        self._TestDelay()
12260

    
12261

    
12262
class LUTestJqueue(NoHooksLU):
12263
  """Utility LU to test some aspects of the job queue.
12264

12265
  """
12266
  REQ_BGL = False
12267

    
12268
  # Must be lower than default timeout for WaitForJobChange to see whether it
12269
  # notices changed jobs
12270
  _CLIENT_CONNECT_TIMEOUT = 20.0
12271
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12272

    
12273
  @classmethod
12274
  def _NotifyUsingSocket(cls, cb, errcls):
12275
    """Opens a Unix socket and waits for another program to connect.
12276

12277
    @type cb: callable
12278
    @param cb: Callback to send socket name to client
12279
    @type errcls: class
12280
    @param errcls: Exception class to use for errors
12281

12282
    """
12283
    # Using a temporary directory as there's no easy way to create temporary
12284
    # sockets without writing a custom loop around tempfile.mktemp and
12285
    # socket.bind
12286
    tmpdir = tempfile.mkdtemp()
12287
    try:
12288
      tmpsock = utils.PathJoin(tmpdir, "sock")
12289

    
12290
      logging.debug("Creating temporary socket at %s", tmpsock)
12291
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12292
      try:
12293
        sock.bind(tmpsock)
12294
        sock.listen(1)
12295

    
12296
        # Send details to client
12297
        cb(tmpsock)
12298

    
12299
        # Wait for client to connect before continuing
12300
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12301
        try:
12302
          (conn, _) = sock.accept()
12303
        except socket.error, err:
12304
          raise errcls("Client didn't connect in time (%s)" % err)
12305
      finally:
12306
        sock.close()
12307
    finally:
12308
      # Remove as soon as client is connected
12309
      shutil.rmtree(tmpdir)
12310

    
12311
    # Wait for client to close
12312
    try:
12313
      try:
12314
        # pylint: disable-msg=E1101
12315
        # Instance of '_socketobject' has no ... member
12316
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12317
        conn.recv(1)
12318
      except socket.error, err:
12319
        raise errcls("Client failed to confirm notification (%s)" % err)
12320
    finally:
12321
      conn.close()
12322

    
12323
  def _SendNotification(self, test, arg, sockname):
12324
    """Sends a notification to the client.
12325

12326
    @type test: string
12327
    @param test: Test name
12328
    @param arg: Test argument (depends on test)
12329
    @type sockname: string
12330
    @param sockname: Socket path
12331

12332
    """
12333
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12334

    
12335
  def _Notify(self, prereq, test, arg):
12336
    """Notifies the client of a test.
12337

12338
    @type prereq: bool
12339
    @param prereq: Whether this is a prereq-phase test
12340
    @type test: string
12341
    @param test: Test name
12342
    @param arg: Test argument (depends on test)
12343

12344
    """
12345
    if prereq:
12346
      errcls = errors.OpPrereqError
12347
    else:
12348
      errcls = errors.OpExecError
12349

    
12350
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12351
                                                  test, arg),
12352
                                   errcls)
12353

    
12354
  def CheckArguments(self):
12355
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12356
    self.expandnames_calls = 0
12357

    
12358
  def ExpandNames(self):
12359
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12360
    if checkargs_calls < 1:
12361
      raise errors.ProgrammerError("CheckArguments was not called")
12362

    
12363
    self.expandnames_calls += 1
12364

    
12365
    if self.op.notify_waitlock:
12366
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12367

    
12368
    self.LogInfo("Expanding names")
12369

    
12370
    # Get lock on master node (just to get a lock, not for a particular reason)
12371
    self.needed_locks = {
12372
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12373
      }
12374

    
12375
  def Exec(self, feedback_fn):
12376
    if self.expandnames_calls < 1:
12377
      raise errors.ProgrammerError("ExpandNames was not called")
12378

    
12379
    if self.op.notify_exec:
12380
      self._Notify(False, constants.JQT_EXEC, None)
12381

    
12382
    self.LogInfo("Executing")
12383

    
12384
    if self.op.log_messages:
12385
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12386
      for idx, msg in enumerate(self.op.log_messages):
12387
        self.LogInfo("Sending log message %s", idx + 1)
12388
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12389
        # Report how many test messages have been sent
12390
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12391

    
12392
    if self.op.fail:
12393
      raise errors.OpExecError("Opcode failure was requested")
12394

    
12395
    return True
12396

    
12397

    
12398
class IAllocator(object):
12399
  """IAllocator framework.
12400

12401
  An IAllocator instance has three sets of attributes:
12402
    - cfg that is needed to query the cluster
12403
    - input data (all members of the _KEYS class attribute are required)
12404
    - four buffer attributes (in|out_data|text), that represent the
12405
      input (to the external script) in text and data structure format,
12406
      and the output from it, again in two formats
12407
    - the result variables from the script (success, info, nodes) for
12408
      easy usage
12409

12410
  """
12411
  # pylint: disable-msg=R0902
12412
  # lots of instance attributes
12413

    
12414
  def __init__(self, cfg, rpc, mode, **kwargs):
12415
    self.cfg = cfg
12416
    self.rpc = rpc
12417
    # init buffer variables
12418
    self.in_text = self.out_text = self.in_data = self.out_data = None
12419
    # init all input fields so that pylint is happy
12420
    self.mode = mode
12421
    self.memory = self.disks = self.disk_template = None
12422
    self.os = self.tags = self.nics = self.vcpus = None
12423
    self.hypervisor = None
12424
    self.relocate_from = None
12425
    self.name = None
12426
    self.evac_nodes = None
12427
    self.instances = None
12428
    self.evac_mode = None
12429
    self.target_groups = []
12430
    # computed fields
12431
    self.required_nodes = None
12432
    # init result fields
12433
    self.success = self.info = self.result = None
12434

    
12435
    try:
12436
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12437
    except KeyError:
12438
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12439
                                   " IAllocator" % self.mode)
12440

    
12441
    keyset = [n for (n, _) in keydata]
12442

    
12443
    for key in kwargs:
12444
      if key not in keyset:
12445
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12446
                                     " IAllocator" % key)
12447
      setattr(self, key, kwargs[key])
12448

    
12449
    for key in keyset:
12450
      if key not in kwargs:
12451
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12452
                                     " IAllocator" % key)
12453
    self._BuildInputData(compat.partial(fn, self), keydata)
12454

    
12455
  def _ComputeClusterData(self):
12456
    """Compute the generic allocator input data.
12457

12458
    This is the data that is independent of the actual operation.
12459

12460
    """
12461
    cfg = self.cfg
12462
    cluster_info = cfg.GetClusterInfo()
12463
    # cluster data
12464
    data = {
12465
      "version": constants.IALLOCATOR_VERSION,
12466
      "cluster_name": cfg.GetClusterName(),
12467
      "cluster_tags": list(cluster_info.GetTags()),
12468
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12469
      # we don't have job IDs
12470
      }
12471
    ninfo = cfg.GetAllNodesInfo()
12472
    iinfo = cfg.GetAllInstancesInfo().values()
12473
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12474

    
12475
    # node data
12476
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
12477

    
12478
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12479
      hypervisor_name = self.hypervisor
12480
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12481
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12482
    else:
12483
      hypervisor_name = cluster_info.enabled_hypervisors[0]
12484

    
12485
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12486
                                        hypervisor_name)
12487
    node_iinfo = \
12488
      self.rpc.call_all_instances_info(node_list,
12489
                                       cluster_info.enabled_hypervisors)
12490

    
12491
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12492

    
12493
    config_ndata = self._ComputeBasicNodeData(ninfo)
12494
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12495
                                                 i_list, config_ndata)
12496
    assert len(data["nodes"]) == len(ninfo), \
12497
        "Incomplete node data computed"
12498

    
12499
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12500

    
12501
    self.in_data = data
12502

    
12503
  @staticmethod
12504
  def _ComputeNodeGroupData(cfg):
12505
    """Compute node groups data.
12506

12507
    """
12508
    ng = dict((guuid, {
12509
      "name": gdata.name,
12510
      "alloc_policy": gdata.alloc_policy,
12511
      })
12512
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12513

    
12514
    return ng
12515

    
12516
  @staticmethod
12517
  def _ComputeBasicNodeData(node_cfg):
12518
    """Compute global node data.
12519

12520
    @rtype: dict
12521
    @returns: a dict of name: (node dict, node config)
12522

12523
    """
12524
    # fill in static (config-based) values
12525
    node_results = dict((ninfo.name, {
12526
      "tags": list(ninfo.GetTags()),
12527
      "primary_ip": ninfo.primary_ip,
12528
      "secondary_ip": ninfo.secondary_ip,
12529
      "offline": ninfo.offline,
12530
      "drained": ninfo.drained,
12531
      "master_candidate": ninfo.master_candidate,
12532
      "group": ninfo.group,
12533
      "master_capable": ninfo.master_capable,
12534
      "vm_capable": ninfo.vm_capable,
12535
      })
12536
      for ninfo in node_cfg.values())
12537

    
12538
    return node_results
12539

    
12540
  @staticmethod
12541
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12542
                              node_results):
12543
    """Compute global node data.
12544

12545
    @param node_results: the basic node structures as filled from the config
12546

12547
    """
12548
    # make a copy of the current dict
12549
    node_results = dict(node_results)
12550
    for nname, nresult in node_data.items():
12551
      assert nname in node_results, "Missing basic data for node %s" % nname
12552
      ninfo = node_cfg[nname]
12553

    
12554
      if not (ninfo.offline or ninfo.drained):
12555
        nresult.Raise("Can't get data for node %s" % nname)
12556
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12557
                                nname)
12558
        remote_info = nresult.payload
12559

    
12560
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
12561
                     'vg_size', 'vg_free', 'cpu_total']:
12562
          if attr not in remote_info:
12563
            raise errors.OpExecError("Node '%s' didn't return attribute"
12564
                                     " '%s'" % (nname, attr))
12565
          if not isinstance(remote_info[attr], int):
12566
            raise errors.OpExecError("Node '%s' returned invalid value"
12567
                                     " for '%s': %s" %
12568
                                     (nname, attr, remote_info[attr]))
12569
        # compute memory used by primary instances
12570
        i_p_mem = i_p_up_mem = 0
12571
        for iinfo, beinfo in i_list:
12572
          if iinfo.primary_node == nname:
12573
            i_p_mem += beinfo[constants.BE_MEMORY]
12574
            if iinfo.name not in node_iinfo[nname].payload:
12575
              i_used_mem = 0
12576
            else:
12577
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
12578
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12579
            remote_info['memory_free'] -= max(0, i_mem_diff)
12580

    
12581
            if iinfo.admin_up:
12582
              i_p_up_mem += beinfo[constants.BE_MEMORY]
12583

    
12584
        # compute memory used by instances
12585
        pnr_dyn = {
12586
          "total_memory": remote_info['memory_total'],
12587
          "reserved_memory": remote_info['memory_dom0'],
12588
          "free_memory": remote_info['memory_free'],
12589
          "total_disk": remote_info['vg_size'],
12590
          "free_disk": remote_info['vg_free'],
12591
          "total_cpus": remote_info['cpu_total'],
12592
          "i_pri_memory": i_p_mem,
12593
          "i_pri_up_memory": i_p_up_mem,
12594
          }
12595
        pnr_dyn.update(node_results[nname])
12596
        node_results[nname] = pnr_dyn
12597

    
12598
    return node_results
12599

    
12600
  @staticmethod
12601
  def _ComputeInstanceData(cluster_info, i_list):
12602
    """Compute global instance data.
12603

12604
    """
12605
    instance_data = {}
12606
    for iinfo, beinfo in i_list:
12607
      nic_data = []
12608
      for nic in iinfo.nics:
12609
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12610
        nic_dict = {
12611
          "mac": nic.mac,
12612
          "ip": nic.ip,
12613
          "mode": filled_params[constants.NIC_MODE],
12614
          "link": filled_params[constants.NIC_LINK],
12615
          }
12616
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12617
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12618
        nic_data.append(nic_dict)
12619
      pir = {
12620
        "tags": list(iinfo.GetTags()),
12621
        "admin_up": iinfo.admin_up,
12622
        "vcpus": beinfo[constants.BE_VCPUS],
12623
        "memory": beinfo[constants.BE_MEMORY],
12624
        "os": iinfo.os,
12625
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12626
        "nics": nic_data,
12627
        "disks": [{constants.IDISK_SIZE: dsk.size,
12628
                   constants.IDISK_MODE: dsk.mode}
12629
                  for dsk in iinfo.disks],
12630
        "disk_template": iinfo.disk_template,
12631
        "hypervisor": iinfo.hypervisor,
12632
        }
12633
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12634
                                                 pir["disks"])
12635
      instance_data[iinfo.name] = pir
12636

    
12637
    return instance_data
12638

    
12639
  def _AddNewInstance(self):
12640
    """Add new instance data to allocator structure.
12641

12642
    This in combination with _AllocatorGetClusterData will create the
12643
    correct structure needed as input for the allocator.
12644

12645
    The checks for the completeness of the opcode must have already been
12646
    done.
12647

12648
    """
12649
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12650

    
12651
    if self.disk_template in constants.DTS_INT_MIRROR:
12652
      self.required_nodes = 2
12653
    else:
12654
      self.required_nodes = 1
12655

    
12656
    request = {
12657
      "name": self.name,
12658
      "disk_template": self.disk_template,
12659
      "tags": self.tags,
12660
      "os": self.os,
12661
      "vcpus": self.vcpus,
12662
      "memory": self.memory,
12663
      "disks": self.disks,
12664
      "disk_space_total": disk_space,
12665
      "nics": self.nics,
12666
      "required_nodes": self.required_nodes,
12667
      "hypervisor": self.hypervisor,
12668
      }
12669

    
12670
    return request
12671

    
12672
  def _AddRelocateInstance(self):
12673
    """Add relocate instance data to allocator structure.
12674

12675
    This in combination with _IAllocatorGetClusterData will create the
12676
    correct structure needed as input for the allocator.
12677

12678
    The checks for the completeness of the opcode must have already been
12679
    done.
12680

12681
    """
12682
    instance = self.cfg.GetInstanceInfo(self.name)
12683
    if instance is None:
12684
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
12685
                                   " IAllocator" % self.name)
12686

    
12687
    if instance.disk_template not in constants.DTS_MIRRORED:
12688
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12689
                                 errors.ECODE_INVAL)
12690

    
12691
    if instance.disk_template in constants.DTS_INT_MIRROR and \
12692
        len(instance.secondary_nodes) != 1:
12693
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
12694
                                 errors.ECODE_STATE)
12695

    
12696
    self.required_nodes = 1
12697
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12698
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12699

    
12700
    request = {
12701
      "name": self.name,
12702
      "disk_space_total": disk_space,
12703
      "required_nodes": self.required_nodes,
12704
      "relocate_from": self.relocate_from,
12705
      }
12706
    return request
12707

    
12708
  def _AddEvacuateNodes(self):
12709
    """Add evacuate nodes data to allocator structure.
12710

12711
    """
12712
    request = {
12713
      "evac_nodes": self.evac_nodes
12714
      }
12715
    return request
12716

    
12717
  def _AddNodeEvacuate(self):
12718
    """Get data for node-evacuate requests.
12719

12720
    """
12721
    return {
12722
      "instances": self.instances,
12723
      "evac_mode": self.evac_mode,
12724
      }
12725

    
12726
  def _AddChangeGroup(self):
12727
    """Get data for node-evacuate requests.
12728

12729
    """
12730
    return {
12731
      "instances": self.instances,
12732
      "target_groups": self.target_groups,
12733
      }
12734

    
12735
  def _BuildInputData(self, fn, keydata):
12736
    """Build input data structures.
12737

12738
    """
12739
    self._ComputeClusterData()
12740

    
12741
    request = fn()
12742
    request["type"] = self.mode
12743
    for keyname, keytype in keydata:
12744
      if keyname not in request:
12745
        raise errors.ProgrammerError("Request parameter %s is missing" %
12746
                                     keyname)
12747
      val = request[keyname]
12748
      if not keytype(val):
12749
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
12750
                                     " validation, value %s, expected"
12751
                                     " type %s" % (keyname, val, keytype))
12752
    self.in_data["request"] = request
12753

    
12754
    self.in_text = serializer.Dump(self.in_data)
12755

    
12756
  _STRING_LIST = ht.TListOf(ht.TString)
12757
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12758
     # pylint: disable-msg=E1101
12759
     # Class '...' has no 'OP_ID' member
12760
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12761
                          opcodes.OpInstanceMigrate.OP_ID,
12762
                          opcodes.OpInstanceReplaceDisks.OP_ID])
12763
     })))
12764

    
12765
  _NEVAC_MOVED = \
12766
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
12767
                       ht.TItems([ht.TNonEmptyString,
12768
                                  ht.TNonEmptyString,
12769
                                  ht.TListOf(ht.TNonEmptyString),
12770
                                 ])))
12771
  _NEVAC_FAILED = \
12772
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
12773
                       ht.TItems([ht.TNonEmptyString,
12774
                                  ht.TMaybeString,
12775
                                 ])))
12776
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
12777
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
12778

    
12779
  _MODE_DATA = {
12780
    constants.IALLOCATOR_MODE_ALLOC:
12781
      (_AddNewInstance,
12782
       [
12783
        ("name", ht.TString),
12784
        ("memory", ht.TInt),
12785
        ("disks", ht.TListOf(ht.TDict)),
12786
        ("disk_template", ht.TString),
12787
        ("os", ht.TString),
12788
        ("tags", _STRING_LIST),
12789
        ("nics", ht.TListOf(ht.TDict)),
12790
        ("vcpus", ht.TInt),
12791
        ("hypervisor", ht.TString),
12792
        ], ht.TList),
12793
    constants.IALLOCATOR_MODE_RELOC:
12794
      (_AddRelocateInstance,
12795
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
12796
       ht.TList),
12797
    constants.IALLOCATOR_MODE_MEVAC:
12798
      (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
12799
       ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
12800
     constants.IALLOCATOR_MODE_NODE_EVAC:
12801
      (_AddNodeEvacuate, [
12802
        ("instances", _STRING_LIST),
12803
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
12804
        ], _NEVAC_RESULT),
12805
     constants.IALLOCATOR_MODE_CHG_GROUP:
12806
      (_AddChangeGroup, [
12807
        ("instances", _STRING_LIST),
12808
        ("target_groups", _STRING_LIST),
12809
        ], _NEVAC_RESULT),
12810
    }
12811

    
12812
  def Run(self, name, validate=True, call_fn=None):
12813
    """Run an instance allocator and return the results.
12814

12815
    """
12816
    if call_fn is None:
12817
      call_fn = self.rpc.call_iallocator_runner
12818

    
12819
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
12820
    result.Raise("Failure while running the iallocator script")
12821

    
12822
    self.out_text = result.payload
12823
    if validate:
12824
      self._ValidateResult()
12825

    
12826
  def _ValidateResult(self):
12827
    """Process the allocator results.
12828

12829
    This will process and if successful save the result in
12830
    self.out_data and the other parameters.
12831

12832
    """
12833
    try:
12834
      rdict = serializer.Load(self.out_text)
12835
    except Exception, err:
12836
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
12837

    
12838
    if not isinstance(rdict, dict):
12839
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
12840

    
12841
    # TODO: remove backwards compatiblity in later versions
12842
    if "nodes" in rdict and "result" not in rdict:
12843
      rdict["result"] = rdict["nodes"]
12844
      del rdict["nodes"]
12845

    
12846
    for key in "success", "info", "result":
12847
      if key not in rdict:
12848
        raise errors.OpExecError("Can't parse iallocator results:"
12849
                                 " missing key '%s'" % key)
12850
      setattr(self, key, rdict[key])
12851

    
12852
    if not self._result_check(self.result):
12853
      raise errors.OpExecError("Iallocator returned invalid result,"
12854
                               " expected %s, got %s" %
12855
                               (self._result_check, self.result),
12856
                               errors.ECODE_INVAL)
12857

    
12858
    if self.mode in (constants.IALLOCATOR_MODE_RELOC,
12859
                     constants.IALLOCATOR_MODE_MEVAC):
12860
      node2group = dict((name, ndata["group"])
12861
                        for (name, ndata) in self.in_data["nodes"].items())
12862

    
12863
      fn = compat.partial(self._NodesToGroups, node2group,
12864
                          self.in_data["nodegroups"])
12865

    
12866
      if self.mode == constants.IALLOCATOR_MODE_RELOC:
12867
        assert self.relocate_from is not None
12868
        assert self.required_nodes == 1
12869

    
12870
        request_groups = fn(self.relocate_from)
12871
        result_groups = fn(rdict["result"])
12872

    
12873
        if result_groups != request_groups:
12874
          raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12875
                                   " differ from original groups (%s)" %
12876
                                   (utils.CommaJoin(result_groups),
12877
                                    utils.CommaJoin(request_groups)))
12878
      elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
12879
        request_groups = fn(self.evac_nodes)
12880
        for (instance_name, secnode) in self.result:
12881
          result_groups = fn([secnode])
12882
          if result_groups != request_groups:
12883
            raise errors.OpExecError("Iallocator returned new secondary node"
12884
                                     " '%s' (group '%s') for instance '%s'"
12885
                                     " which is not in original group '%s'" %
12886
                                     (secnode, utils.CommaJoin(result_groups),
12887
                                      instance_name,
12888
                                      utils.CommaJoin(request_groups)))
12889
      else:
12890
        raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
12891

    
12892
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
12893
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
12894

    
12895
    self.out_data = rdict
12896

    
12897
  @staticmethod
12898
  def _NodesToGroups(node2group, groups, nodes):
12899
    """Returns a list of unique group names for a list of nodes.
12900

12901
    @type node2group: dict
12902
    @param node2group: Map from node name to group UUID
12903
    @type groups: dict
12904
    @param groups: Group information
12905
    @type nodes: list
12906
    @param nodes: Node names
12907

12908
    """
12909
    result = set()
12910

    
12911
    for node in nodes:
12912
      try:
12913
        group_uuid = node2group[node]
12914
      except KeyError:
12915
        # Ignore unknown node
12916
        pass
12917
      else:
12918
        try:
12919
          group = groups[group_uuid]
12920
        except KeyError:
12921
          # Can't find group, let's use UUID
12922
          group_name = group_uuid
12923
        else:
12924
          group_name = group["name"]
12925

    
12926
        result.add(group_name)
12927

    
12928
    return sorted(result)
12929

    
12930

    
12931
class LUTestAllocator(NoHooksLU):
12932
  """Run allocator tests.
12933

12934
  This LU runs the allocator tests
12935

12936
  """
12937
  def CheckPrereq(self):
12938
    """Check prerequisites.
12939

12940
    This checks the opcode parameters depending on the director and mode test.
12941

12942
    """
12943
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12944
      for attr in ["memory", "disks", "disk_template",
12945
                   "os", "tags", "nics", "vcpus"]:
12946
        if not hasattr(self.op, attr):
12947
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
12948
                                     attr, errors.ECODE_INVAL)
12949
      iname = self.cfg.ExpandInstanceName(self.op.name)
12950
      if iname is not None:
12951
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
12952
                                   iname, errors.ECODE_EXISTS)
12953
      if not isinstance(self.op.nics, list):
12954
        raise errors.OpPrereqError("Invalid parameter 'nics'",
12955
                                   errors.ECODE_INVAL)
12956
      if not isinstance(self.op.disks, list):
12957
        raise errors.OpPrereqError("Invalid parameter 'disks'",
12958
                                   errors.ECODE_INVAL)
12959
      for row in self.op.disks:
12960
        if (not isinstance(row, dict) or
12961
            constants.IDISK_SIZE not in row or
12962
            not isinstance(row[constants.IDISK_SIZE], int) or
12963
            constants.IDISK_MODE not in row or
12964
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
12965
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
12966
                                     " parameter", errors.ECODE_INVAL)
12967
      if self.op.hypervisor is None:
12968
        self.op.hypervisor = self.cfg.GetHypervisorType()
12969
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12970
      fname = _ExpandInstanceName(self.cfg, self.op.name)
12971
      self.op.name = fname
12972
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12973
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12974
      if not hasattr(self.op, "evac_nodes"):
12975
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12976
                                   " opcode input", errors.ECODE_INVAL)
12977
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
12978
                          constants.IALLOCATOR_MODE_NODE_EVAC):
12979
      if not self.op.instances:
12980
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
12981
      self.op.instances = _GetWantedInstances(self, self.op.instances)
12982
    else:
12983
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12984
                                 self.op.mode, errors.ECODE_INVAL)
12985

    
12986
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12987
      if self.op.allocator is None:
12988
        raise errors.OpPrereqError("Missing allocator name",
12989
                                   errors.ECODE_INVAL)
12990
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12991
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
12992
                                 self.op.direction, errors.ECODE_INVAL)
12993

    
12994
  def Exec(self, feedback_fn):
12995
    """Run the allocator test.
12996

12997
    """
12998
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12999
      ial = IAllocator(self.cfg, self.rpc,
13000
                       mode=self.op.mode,
13001
                       name=self.op.name,
13002
                       memory=self.op.memory,
13003
                       disks=self.op.disks,
13004
                       disk_template=self.op.disk_template,
13005
                       os=self.op.os,
13006
                       tags=self.op.tags,
13007
                       nics=self.op.nics,
13008
                       vcpus=self.op.vcpus,
13009
                       hypervisor=self.op.hypervisor,
13010
                       )
13011
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13012
      ial = IAllocator(self.cfg, self.rpc,
13013
                       mode=self.op.mode,
13014
                       name=self.op.name,
13015
                       relocate_from=list(self.relocate_from),
13016
                       )
13017
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13018
      ial = IAllocator(self.cfg, self.rpc,
13019
                       mode=self.op.mode,
13020
                       evac_nodes=self.op.evac_nodes)
13021
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13022
      ial = IAllocator(self.cfg, self.rpc,
13023
                       mode=self.op.mode,
13024
                       instances=self.op.instances,
13025
                       target_groups=self.op.target_groups)
13026
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13027
      ial = IAllocator(self.cfg, self.rpc,
13028
                       mode=self.op.mode,
13029
                       instances=self.op.instances,
13030
                       evac_mode=self.op.evac_mode)
13031
    else:
13032
      raise errors.ProgrammerError("Uncatched mode %s in"
13033
                                   " LUTestAllocator.Exec", self.op.mode)
13034

    
13035
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13036
      result = ial.in_text
13037
    else:
13038
      ial.Run(self.op.allocator, validate=False)
13039
      result = ial.out_text
13040
    return result
13041

    
13042

    
13043
#: Query type implementations
13044
_QUERY_IMPL = {
13045
  constants.QR_INSTANCE: _InstanceQuery,
13046
  constants.QR_NODE: _NodeQuery,
13047
  constants.QR_GROUP: _GroupQuery,
13048
  constants.QR_OS: _OsQuery,
13049
  }
13050

    
13051
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13052

    
13053

    
13054
def _GetQueryImplementation(name):
13055
  """Returns the implemtnation for a query type.
13056

13057
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13058

13059
  """
13060
  try:
13061
    return _QUERY_IMPL[name]
13062
  except KeyError:
13063
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13064
                               errors.ECODE_INVAL)