Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ a20e4768

History | View | Annotate | Download (477.5 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import logging
36
import copy
37
import OpenSSL
38
import socket
39
import tempfile
40
import shutil
41
import itertools
42
import operator
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import query
58
from ganeti import qlang
59
from ganeti import opcodes
60
from ganeti import ht
61
from ganeti import runtime
62

    
63
import ganeti.masterd.instance # pylint: disable=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    # readability alias
123
    self.owned_locks = context.glm.list_owned
124
    self.context = context
125
    self.rpc = rpc
126
    # Dicts used to declare locking needs to mcpu
127
    self.needed_locks = None
128
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
129
    self.add_locks = {}
130
    self.remove_locks = {}
131
    # Used to force good behavior when calling helper functions
132
    self.recalculate_locks = {}
133
    # logging
134
    self.Log = processor.Log # pylint: disable=C0103
135
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
136
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
137
    self.LogStep = processor.LogStep # pylint: disable=C0103
138
    # support for dry-run
139
    self.dry_run_result = None
140
    # support for generic debug attribute
141
    if (not hasattr(self.op, "debug_level") or
142
        not isinstance(self.op.debug_level, int)):
143
      self.op.debug_level = 0
144

    
145
    # Tasklets
146
    self.tasklets = None
147

    
148
    # Validate opcode parameters and set defaults
149
    self.op.Validate(True)
150

    
151
    self.CheckArguments()
152

    
153
  def CheckArguments(self):
154
    """Check syntactic validity for the opcode arguments.
155

156
    This method is for doing a simple syntactic check and ensure
157
    validity of opcode parameters, without any cluster-related
158
    checks. While the same can be accomplished in ExpandNames and/or
159
    CheckPrereq, doing these separate is better because:
160

161
      - ExpandNames is left as as purely a lock-related function
162
      - CheckPrereq is run after we have acquired locks (and possible
163
        waited for them)
164

165
    The function is allowed to change the self.op attribute so that
166
    later methods can no longer worry about missing parameters.
167

168
    """
169
    pass
170

    
171
  def ExpandNames(self):
172
    """Expand names for this LU.
173

174
    This method is called before starting to execute the opcode, and it should
175
    update all the parameters of the opcode to their canonical form (e.g. a
176
    short node name must be fully expanded after this method has successfully
177
    completed). This way locking, hooks, logging, etc. can work correctly.
178

179
    LUs which implement this method must also populate the self.needed_locks
180
    member, as a dict with lock levels as keys, and a list of needed lock names
181
    as values. Rules:
182

183
      - use an empty dict if you don't need any lock
184
      - if you don't need any lock at a particular level omit that level
185
      - don't put anything for the BGL level
186
      - if you want all locks at a level use locking.ALL_SET as a value
187

188
    If you need to share locks (rather than acquire them exclusively) at one
189
    level you can modify self.share_locks, setting a true value (usually 1) for
190
    that level. By default locks are not shared.
191

192
    This function can also define a list of tasklets, which then will be
193
    executed in order instead of the usual LU-level CheckPrereq and Exec
194
    functions, if those are not defined by the LU.
195

196
    Examples::
197

198
      # Acquire all nodes and one instance
199
      self.needed_locks = {
200
        locking.LEVEL_NODE: locking.ALL_SET,
201
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
202
      }
203
      # Acquire just two nodes
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206
      }
207
      # Acquire no locks
208
      self.needed_locks = {} # No, you can't leave it to the default value None
209

210
    """
211
    # The implementation of this method is mandatory only if the new LU is
212
    # concurrent, so that old LUs don't need to be changed all at the same
213
    # time.
214
    if self.REQ_BGL:
215
      self.needed_locks = {} # Exclusive LUs don't need locks.
216
    else:
217
      raise NotImplementedError
218

    
219
  def DeclareLocks(self, level):
220
    """Declare LU locking needs for a level
221

222
    While most LUs can just declare their locking needs at ExpandNames time,
223
    sometimes there's the need to calculate some locks after having acquired
224
    the ones before. This function is called just before acquiring locks at a
225
    particular level, but after acquiring the ones at lower levels, and permits
226
    such calculations. It can be used to modify self.needed_locks, and by
227
    default it does nothing.
228

229
    This function is only called if you have something already set in
230
    self.needed_locks for the level.
231

232
    @param level: Locking level which is going to be locked
233
    @type level: member of ganeti.locking.LEVELS
234

235
    """
236

    
237
  def CheckPrereq(self):
238
    """Check prerequisites for this LU.
239

240
    This method should check that the prerequisites for the execution
241
    of this LU are fulfilled. It can do internode communication, but
242
    it should be idempotent - no cluster or system changes are
243
    allowed.
244

245
    The method should raise errors.OpPrereqError in case something is
246
    not fulfilled. Its return value is ignored.
247

248
    This method should also update all the parameters of the opcode to
249
    their canonical form if it hasn't been done by ExpandNames before.
250

251
    """
252
    if self.tasklets is not None:
253
      for (idx, tl) in enumerate(self.tasklets):
254
        logging.debug("Checking prerequisites for tasklet %s/%s",
255
                      idx + 1, len(self.tasklets))
256
        tl.CheckPrereq()
257
    else:
258
      pass
259

    
260
  def Exec(self, feedback_fn):
261
    """Execute the LU.
262

263
    This method should implement the actual work. It should raise
264
    errors.OpExecError for failures that are somewhat dealt with in
265
    code, or expected.
266

267
    """
268
    if self.tasklets is not None:
269
      for (idx, tl) in enumerate(self.tasklets):
270
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271
        tl.Exec(feedback_fn)
272
    else:
273
      raise NotImplementedError
274

    
275
  def BuildHooksEnv(self):
276
    """Build hooks environment for this LU.
277

278
    @rtype: dict
279
    @return: Dictionary containing the environment that will be used for
280
      running the hooks for this LU. The keys of the dict must not be prefixed
281
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282
      will extend the environment with additional variables. If no environment
283
      should be defined, an empty dictionary should be returned (not C{None}).
284
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
285
      will not be called.
286

287
    """
288
    raise NotImplementedError
289

    
290
  def BuildHooksNodes(self):
291
    """Build list of nodes to run LU's hooks.
292

293
    @rtype: tuple; (list, list)
294
    @return: Tuple containing a list of node names on which the hook
295
      should run before the execution and a list of node names on which the
296
      hook should run after the execution. No nodes should be returned as an
297
      empty list (and not None).
298
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
299
      will not be called.
300

301
    """
302
    raise NotImplementedError
303

    
304
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305
    """Notify the LU about the results of its hooks.
306

307
    This method is called every time a hooks phase is executed, and notifies
308
    the Logical Unit about the hooks' result. The LU can then use it to alter
309
    its result based on the hooks.  By default the method does nothing and the
310
    previous result is passed back unchanged but any LU can define it if it
311
    wants to use the local cluster hook-scripts somehow.
312

313
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
314
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315
    @param hook_results: the results of the multi-node hooks rpc call
316
    @param feedback_fn: function used send feedback back to the caller
317
    @param lu_result: the previous Exec result this LU had, or None
318
        in the PRE phase
319
    @return: the new Exec result, based on the previous result
320
        and hook results
321

322
    """
323
    # API must be kept, thus we ignore the unused argument and could
324
    # be a function warnings
325
    # pylint: disable=W0613,R0201
326
    return lu_result
327

    
328
  def _ExpandAndLockInstance(self):
329
    """Helper function to expand and lock an instance.
330

331
    Many LUs that work on an instance take its name in self.op.instance_name
332
    and need to expand it and then declare the expanded name for locking. This
333
    function does it, and then updates self.op.instance_name to the expanded
334
    name. It also initializes needed_locks as a dict, if this hasn't been done
335
    before.
336

337
    """
338
    if self.needed_locks is None:
339
      self.needed_locks = {}
340
    else:
341
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342
        "_ExpandAndLockInstance called with instance-level locks set"
343
    self.op.instance_name = _ExpandInstanceName(self.cfg,
344
                                                self.op.instance_name)
345
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
346

    
347
  def _LockInstancesNodes(self, primary_only=False):
348
    """Helper function to declare instances' nodes for locking.
349

350
    This function should be called after locking one or more instances to lock
351
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352
    with all primary or secondary nodes for instances already locked and
353
    present in self.needed_locks[locking.LEVEL_INSTANCE].
354

355
    It should be called from DeclareLocks, and for safety only works if
356
    self.recalculate_locks[locking.LEVEL_NODE] is set.
357

358
    In the future it may grow parameters to just lock some instance's nodes, or
359
    to just lock primaries or secondary nodes, if needed.
360

361
    If should be called in DeclareLocks in a way similar to::
362

363
      if level == locking.LEVEL_NODE:
364
        self._LockInstancesNodes()
365

366
    @type primary_only: boolean
367
    @param primary_only: only lock primary nodes of locked instances
368

369
    """
370
    assert locking.LEVEL_NODE in self.recalculate_locks, \
371
      "_LockInstancesNodes helper function called with no nodes to recalculate"
372

    
373
    # TODO: check if we're really been called with the instance locks held
374

    
375
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376
    # future we might want to have different behaviors depending on the value
377
    # of self.recalculate_locks[locking.LEVEL_NODE]
378
    wanted_nodes = []
379
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381
      wanted_nodes.append(instance.primary_node)
382
      if not primary_only:
383
        wanted_nodes.extend(instance.secondary_nodes)
384

    
385
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
389

    
390
    del self.recalculate_locks[locking.LEVEL_NODE]
391

    
392

    
393
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
394
  """Simple LU which runs no hooks.
395

396
  This LU is intended as a parent for other LogicalUnits which will
397
  run no hooks, in order to reduce duplicate code.
398

399
  """
400
  HPATH = None
401
  HTYPE = None
402

    
403
  def BuildHooksEnv(self):
404
    """Empty BuildHooksEnv for NoHooksLu.
405

406
    This just raises an error.
407

408
    """
409
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
410

    
411
  def BuildHooksNodes(self):
412
    """Empty BuildHooksNodes for NoHooksLU.
413

414
    """
415
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
416

    
417

    
418
class Tasklet:
419
  """Tasklet base class.
420

421
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
423
  tasklets know nothing about locks.
424

425
  Subclasses must follow these rules:
426
    - Implement CheckPrereq
427
    - Implement Exec
428

429
  """
430
  def __init__(self, lu):
431
    self.lu = lu
432

    
433
    # Shortcuts
434
    self.cfg = lu.cfg
435
    self.rpc = lu.rpc
436

    
437
  def CheckPrereq(self):
438
    """Check prerequisites for this tasklets.
439

440
    This method should check whether the prerequisites for the execution of
441
    this tasklet are fulfilled. It can do internode communication, but it
442
    should be idempotent - no cluster or system changes are allowed.
443

444
    The method should raise errors.OpPrereqError in case something is not
445
    fulfilled. Its return value is ignored.
446

447
    This method should also update all parameters to their canonical form if it
448
    hasn't been done before.
449

450
    """
451
    pass
452

    
453
  def Exec(self, feedback_fn):
454
    """Execute the tasklet.
455

456
    This method should implement the actual work. It should raise
457
    errors.OpExecError for failures that are somewhat dealt with in code, or
458
    expected.
459

460
    """
461
    raise NotImplementedError
462

    
463

    
464
class _QueryBase:
465
  """Base for query utility classes.
466

467
  """
468
  #: Attribute holding field definitions
469
  FIELDS = None
470

    
471
  def __init__(self, filter_, fields, use_locking):
472
    """Initializes this class.
473

474
    """
475
    self.use_locking = use_locking
476

    
477
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
478
                             namefield="name")
479
    self.requested_data = self.query.RequestedData()
480
    self.names = self.query.RequestedNames()
481

    
482
    # Sort only if no names were requested
483
    self.sort_by_name = not self.names
484

    
485
    self.do_locking = None
486
    self.wanted = None
487

    
488
  def _GetNames(self, lu, all_names, lock_level):
489
    """Helper function to determine names asked for in the query.
490

491
    """
492
    if self.do_locking:
493
      names = lu.owned_locks(lock_level)
494
    else:
495
      names = all_names
496

    
497
    if self.wanted == locking.ALL_SET:
498
      assert not self.names
499
      # caller didn't specify names, so ordering is not important
500
      return utils.NiceSort(names)
501

    
502
    # caller specified names and we must keep the same order
503
    assert self.names
504
    assert not self.do_locking or lu.glm.is_owned(lock_level)
505

    
506
    missing = set(self.wanted).difference(names)
507
    if missing:
508
      raise errors.OpExecError("Some items were removed before retrieving"
509
                               " their data: %s" % missing)
510

    
511
    # Return expanded names
512
    return self.wanted
513

    
514
  def ExpandNames(self, lu):
515
    """Expand names for this query.
516

517
    See L{LogicalUnit.ExpandNames}.
518

519
    """
520
    raise NotImplementedError()
521

    
522
  def DeclareLocks(self, lu, level):
523
    """Declare locks for this query.
524

525
    See L{LogicalUnit.DeclareLocks}.
526

527
    """
528
    raise NotImplementedError()
529

    
530
  def _GetQueryData(self, lu):
531
    """Collects all data for this query.
532

533
    @return: Query data object
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def NewStyleQuery(self, lu):
539
    """Collect data and execute query.
540

541
    """
542
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543
                                  sort_by_name=self.sort_by_name)
544

    
545
  def OldStyleQuery(self, lu):
546
    """Collect data and execute query.
547

548
    """
549
    return self.query.OldStyleQuery(self._GetQueryData(lu),
550
                                    sort_by_name=self.sort_by_name)
551

    
552

    
553
def _ShareAll():
554
  """Returns a dict declaring all lock levels shared.
555

556
  """
557
  return dict.fromkeys(locking.LEVELS, 1)
558

    
559

    
560
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
561
  """Checks if the owned node groups are still correct for an instance.
562

563
  @type cfg: L{config.ConfigWriter}
564
  @param cfg: The cluster configuration
565
  @type instance_name: string
566
  @param instance_name: Instance name
567
  @type owned_groups: set or frozenset
568
  @param owned_groups: List of currently owned node groups
569

570
  """
571
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
572

    
573
  if not owned_groups.issuperset(inst_groups):
574
    raise errors.OpPrereqError("Instance %s's node groups changed since"
575
                               " locks were acquired, current groups are"
576
                               " are '%s', owning groups '%s'; retry the"
577
                               " operation" %
578
                               (instance_name,
579
                                utils.CommaJoin(inst_groups),
580
                                utils.CommaJoin(owned_groups)),
581
                               errors.ECODE_STATE)
582

    
583
  return inst_groups
584

    
585

    
586
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
587
  """Checks if the instances in a node group are still correct.
588

589
  @type cfg: L{config.ConfigWriter}
590
  @param cfg: The cluster configuration
591
  @type group_uuid: string
592
  @param group_uuid: Node group UUID
593
  @type owned_instances: set or frozenset
594
  @param owned_instances: List of currently owned instances
595

596
  """
597
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
598
  if owned_instances != wanted_instances:
599
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
600
                               " locks were acquired, wanted '%s', have '%s';"
601
                               " retry the operation" %
602
                               (group_uuid,
603
                                utils.CommaJoin(wanted_instances),
604
                                utils.CommaJoin(owned_instances)),
605
                               errors.ECODE_STATE)
606

    
607
  return wanted_instances
608

    
609

    
610
def _SupportsOob(cfg, node):
611
  """Tells if node supports OOB.
612

613
  @type cfg: L{config.ConfigWriter}
614
  @param cfg: The cluster configuration
615
  @type node: L{objects.Node}
616
  @param node: The node
617
  @return: The OOB script if supported or an empty string otherwise
618

619
  """
620
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
621

    
622

    
623
def _GetWantedNodes(lu, nodes):
624
  """Returns list of checked and expanded node names.
625

626
  @type lu: L{LogicalUnit}
627
  @param lu: the logical unit on whose behalf we execute
628
  @type nodes: list
629
  @param nodes: list of node names or None for all nodes
630
  @rtype: list
631
  @return: the list of nodes, sorted
632
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
633

634
  """
635
  if nodes:
636
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
637

    
638
  return utils.NiceSort(lu.cfg.GetNodeList())
639

    
640

    
641
def _GetWantedInstances(lu, instances):
642
  """Returns list of checked and expanded instance names.
643

644
  @type lu: L{LogicalUnit}
645
  @param lu: the logical unit on whose behalf we execute
646
  @type instances: list
647
  @param instances: list of instance names or None for all instances
648
  @rtype: list
649
  @return: the list of instances, sorted
650
  @raise errors.OpPrereqError: if the instances parameter is wrong type
651
  @raise errors.OpPrereqError: if any of the passed instances is not found
652

653
  """
654
  if instances:
655
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
656
  else:
657
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
658
  return wanted
659

    
660

    
661
def _GetUpdatedParams(old_params, update_dict,
662
                      use_default=True, use_none=False):
663
  """Return the new version of a parameter dictionary.
664

665
  @type old_params: dict
666
  @param old_params: old parameters
667
  @type update_dict: dict
668
  @param update_dict: dict containing new parameter values, or
669
      constants.VALUE_DEFAULT to reset the parameter to its default
670
      value
671
  @param use_default: boolean
672
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
673
      values as 'to be deleted' values
674
  @param use_none: boolean
675
  @type use_none: whether to recognise C{None} values as 'to be
676
      deleted' values
677
  @rtype: dict
678
  @return: the new parameter dictionary
679

680
  """
681
  params_copy = copy.deepcopy(old_params)
682
  for key, val in update_dict.iteritems():
683
    if ((use_default and val == constants.VALUE_DEFAULT) or
684
        (use_none and val is None)):
685
      try:
686
        del params_copy[key]
687
      except KeyError:
688
        pass
689
    else:
690
      params_copy[key] = val
691
  return params_copy
692

    
693

    
694
def _ReleaseLocks(lu, level, names=None, keep=None):
695
  """Releases locks owned by an LU.
696

697
  @type lu: L{LogicalUnit}
698
  @param level: Lock level
699
  @type names: list or None
700
  @param names: Names of locks to release
701
  @type keep: list or None
702
  @param keep: Names of locks to retain
703

704
  """
705
  assert not (keep is not None and names is not None), \
706
         "Only one of the 'names' and the 'keep' parameters can be given"
707

    
708
  if names is not None:
709
    should_release = names.__contains__
710
  elif keep:
711
    should_release = lambda name: name not in keep
712
  else:
713
    should_release = None
714

    
715
  if should_release:
716
    retain = []
717
    release = []
718

    
719
    # Determine which locks to release
720
    for name in lu.owned_locks(level):
721
      if should_release(name):
722
        release.append(name)
723
      else:
724
        retain.append(name)
725

    
726
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
727

    
728
    # Release just some locks
729
    lu.glm.release(level, names=release)
730

    
731
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
732
  else:
733
    # Release everything
734
    lu.glm.release(level)
735

    
736
    assert not lu.glm.is_owned(level), "No locks should be owned"
737

    
738

    
739
def _MapInstanceDisksToNodes(instances):
740
  """Creates a map from (node, volume) to instance name.
741

742
  @type instances: list of L{objects.Instance}
743
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
744

745
  """
746
  return dict(((node, vol), inst.name)
747
              for inst in instances
748
              for (node, vols) in inst.MapLVsByNode().items()
749
              for vol in vols)
750

    
751

    
752
def _RunPostHook(lu, node_name):
753
  """Runs the post-hook for an opcode on a single node.
754

755
  """
756
  hm = lu.proc.BuildHooksManager(lu)
757
  try:
758
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
759
  except:
760
    # pylint: disable=W0702
761
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
762

    
763

    
764
def _CheckOutputFields(static, dynamic, selected):
765
  """Checks whether all selected fields are valid.
766

767
  @type static: L{utils.FieldSet}
768
  @param static: static fields set
769
  @type dynamic: L{utils.FieldSet}
770
  @param dynamic: dynamic fields set
771

772
  """
773
  f = utils.FieldSet()
774
  f.Extend(static)
775
  f.Extend(dynamic)
776

    
777
  delta = f.NonMatching(selected)
778
  if delta:
779
    raise errors.OpPrereqError("Unknown output fields selected: %s"
780
                               % ",".join(delta), errors.ECODE_INVAL)
781

    
782

    
783
def _CheckGlobalHvParams(params):
784
  """Validates that given hypervisor params are not global ones.
785

786
  This will ensure that instances don't get customised versions of
787
  global params.
788

789
  """
790
  used_globals = constants.HVC_GLOBALS.intersection(params)
791
  if used_globals:
792
    msg = ("The following hypervisor parameters are global and cannot"
793
           " be customized at instance level, please modify them at"
794
           " cluster level: %s" % utils.CommaJoin(used_globals))
795
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
796

    
797

    
798
def _CheckNodeOnline(lu, node, msg=None):
799
  """Ensure that a given node is online.
800

801
  @param lu: the LU on behalf of which we make the check
802
  @param node: the node to check
803
  @param msg: if passed, should be a message to replace the default one
804
  @raise errors.OpPrereqError: if the node is offline
805

806
  """
807
  if msg is None:
808
    msg = "Can't use offline node"
809
  if lu.cfg.GetNodeInfo(node).offline:
810
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
811

    
812

    
813
def _CheckNodeNotDrained(lu, node):
814
  """Ensure that a given node is not drained.
815

816
  @param lu: the LU on behalf of which we make the check
817
  @param node: the node to check
818
  @raise errors.OpPrereqError: if the node is drained
819

820
  """
821
  if lu.cfg.GetNodeInfo(node).drained:
822
    raise errors.OpPrereqError("Can't use drained node %s" % node,
823
                               errors.ECODE_STATE)
824

    
825

    
826
def _CheckNodeVmCapable(lu, node):
827
  """Ensure that a given node is vm capable.
828

829
  @param lu: the LU on behalf of which we make the check
830
  @param node: the node to check
831
  @raise errors.OpPrereqError: if the node is not vm capable
832

833
  """
834
  if not lu.cfg.GetNodeInfo(node).vm_capable:
835
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
836
                               errors.ECODE_STATE)
837

    
838

    
839
def _CheckNodeHasOS(lu, node, os_name, force_variant):
840
  """Ensure that a node supports a given OS.
841

842
  @param lu: the LU on behalf of which we make the check
843
  @param node: the node to check
844
  @param os_name: the OS to query about
845
  @param force_variant: whether to ignore variant errors
846
  @raise errors.OpPrereqError: if the node is not supporting the OS
847

848
  """
849
  result = lu.rpc.call_os_get(node, os_name)
850
  result.Raise("OS '%s' not in supported OS list for node %s" %
851
               (os_name, node),
852
               prereq=True, ecode=errors.ECODE_INVAL)
853
  if not force_variant:
854
    _CheckOSVariant(result.payload, os_name)
855

    
856

    
857
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
858
  """Ensure that a node has the given secondary ip.
859

860
  @type lu: L{LogicalUnit}
861
  @param lu: the LU on behalf of which we make the check
862
  @type node: string
863
  @param node: the node to check
864
  @type secondary_ip: string
865
  @param secondary_ip: the ip to check
866
  @type prereq: boolean
867
  @param prereq: whether to throw a prerequisite or an execute error
868
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
869
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
870

871
  """
872
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
873
  result.Raise("Failure checking secondary ip on node %s" % node,
874
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
875
  if not result.payload:
876
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
877
           " please fix and re-run this command" % secondary_ip)
878
    if prereq:
879
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
880
    else:
881
      raise errors.OpExecError(msg)
882

    
883

    
884
def _GetClusterDomainSecret():
885
  """Reads the cluster domain secret.
886

887
  """
888
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
889
                               strict=True)
890

    
891

    
892
def _CheckInstanceDown(lu, instance, reason):
893
  """Ensure that an instance is not running."""
894
  if instance.admin_up:
895
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
896
                               (instance.name, reason), errors.ECODE_STATE)
897

    
898
  pnode = instance.primary_node
899
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
900
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
901
              prereq=True, ecode=errors.ECODE_ENVIRON)
902

    
903
  if instance.name in ins_l.payload:
904
    raise errors.OpPrereqError("Instance %s is running, %s" %
905
                               (instance.name, reason), errors.ECODE_STATE)
906

    
907

    
908
def _ExpandItemName(fn, name, kind):
909
  """Expand an item name.
910

911
  @param fn: the function to use for expansion
912
  @param name: requested item name
913
  @param kind: text description ('Node' or 'Instance')
914
  @return: the resolved (full) name
915
  @raise errors.OpPrereqError: if the item is not found
916

917
  """
918
  full_name = fn(name)
919
  if full_name is None:
920
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
921
                               errors.ECODE_NOENT)
922
  return full_name
923

    
924

    
925
def _ExpandNodeName(cfg, name):
926
  """Wrapper over L{_ExpandItemName} for nodes."""
927
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
928

    
929

    
930
def _ExpandInstanceName(cfg, name):
931
  """Wrapper over L{_ExpandItemName} for instance."""
932
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
933

    
934

    
935
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
936
                          memory, vcpus, nics, disk_template, disks,
937
                          bep, hvp, hypervisor_name, tags):
938
  """Builds instance related env variables for hooks
939

940
  This builds the hook environment from individual variables.
941

942
  @type name: string
943
  @param name: the name of the instance
944
  @type primary_node: string
945
  @param primary_node: the name of the instance's primary node
946
  @type secondary_nodes: list
947
  @param secondary_nodes: list of secondary nodes as strings
948
  @type os_type: string
949
  @param os_type: the name of the instance's OS
950
  @type status: boolean
951
  @param status: the should_run status of the instance
952
  @type memory: string
953
  @param memory: the memory size of the instance
954
  @type vcpus: string
955
  @param vcpus: the count of VCPUs the instance has
956
  @type nics: list
957
  @param nics: list of tuples (ip, mac, mode, link) representing
958
      the NICs the instance has
959
  @type disk_template: string
960
  @param disk_template: the disk template of the instance
961
  @type disks: list
962
  @param disks: the list of (size, mode) pairs
963
  @type bep: dict
964
  @param bep: the backend parameters for the instance
965
  @type hvp: dict
966
  @param hvp: the hypervisor parameters for the instance
967
  @type hypervisor_name: string
968
  @param hypervisor_name: the hypervisor for the instance
969
  @type tags: list
970
  @param tags: list of instance tags as strings
971
  @rtype: dict
972
  @return: the hook environment for this instance
973

974
  """
975
  if status:
976
    str_status = "up"
977
  else:
978
    str_status = "down"
979
  env = {
980
    "OP_TARGET": name,
981
    "INSTANCE_NAME": name,
982
    "INSTANCE_PRIMARY": primary_node,
983
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
984
    "INSTANCE_OS_TYPE": os_type,
985
    "INSTANCE_STATUS": str_status,
986
    "INSTANCE_MEMORY": memory,
987
    "INSTANCE_VCPUS": vcpus,
988
    "INSTANCE_DISK_TEMPLATE": disk_template,
989
    "INSTANCE_HYPERVISOR": hypervisor_name,
990
  }
991

    
992
  if nics:
993
    nic_count = len(nics)
994
    for idx, (ip, mac, mode, link) in enumerate(nics):
995
      if ip is None:
996
        ip = ""
997
      env["INSTANCE_NIC%d_IP" % idx] = ip
998
      env["INSTANCE_NIC%d_MAC" % idx] = mac
999
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1000
      env["INSTANCE_NIC%d_LINK" % idx] = link
1001
      if mode == constants.NIC_MODE_BRIDGED:
1002
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003
  else:
1004
    nic_count = 0
1005

    
1006
  env["INSTANCE_NIC_COUNT"] = nic_count
1007

    
1008
  if disks:
1009
    disk_count = len(disks)
1010
    for idx, (size, mode) in enumerate(disks):
1011
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1012
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1013
  else:
1014
    disk_count = 0
1015

    
1016
  env["INSTANCE_DISK_COUNT"] = disk_count
1017

    
1018
  if not tags:
1019
    tags = []
1020

    
1021
  env["INSTANCE_TAGS"] = " ".join(tags)
1022

    
1023
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024
    for key, value in source.items():
1025
      env["INSTANCE_%s_%s" % (kind, key)] = value
1026

    
1027
  return env
1028

    
1029

    
1030
def _NICListToTuple(lu, nics):
1031
  """Build a list of nic information tuples.
1032

1033
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034
  value in LUInstanceQueryData.
1035

1036
  @type lu:  L{LogicalUnit}
1037
  @param lu: the logical unit on whose behalf we execute
1038
  @type nics: list of L{objects.NIC}
1039
  @param nics: list of nics to convert to hooks tuples
1040

1041
  """
1042
  hooks_nics = []
1043
  cluster = lu.cfg.GetClusterInfo()
1044
  for nic in nics:
1045
    ip = nic.ip
1046
    mac = nic.mac
1047
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048
    mode = filled_params[constants.NIC_MODE]
1049
    link = filled_params[constants.NIC_LINK]
1050
    hooks_nics.append((ip, mac, mode, link))
1051
  return hooks_nics
1052

    
1053

    
1054
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055
  """Builds instance related env variables for hooks from an object.
1056

1057
  @type lu: L{LogicalUnit}
1058
  @param lu: the logical unit on whose behalf we execute
1059
  @type instance: L{objects.Instance}
1060
  @param instance: the instance for which we should build the
1061
      environment
1062
  @type override: dict
1063
  @param override: dictionary with key/values that will override
1064
      our values
1065
  @rtype: dict
1066
  @return: the hook environment dictionary
1067

1068
  """
1069
  cluster = lu.cfg.GetClusterInfo()
1070
  bep = cluster.FillBE(instance)
1071
  hvp = cluster.FillHV(instance)
1072
  args = {
1073
    "name": instance.name,
1074
    "primary_node": instance.primary_node,
1075
    "secondary_nodes": instance.secondary_nodes,
1076
    "os_type": instance.os,
1077
    "status": instance.admin_up,
1078
    "memory": bep[constants.BE_MEMORY],
1079
    "vcpus": bep[constants.BE_VCPUS],
1080
    "nics": _NICListToTuple(lu, instance.nics),
1081
    "disk_template": instance.disk_template,
1082
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083
    "bep": bep,
1084
    "hvp": hvp,
1085
    "hypervisor_name": instance.hypervisor,
1086
    "tags": instance.tags,
1087
  }
1088
  if override:
1089
    args.update(override)
1090
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1091

    
1092

    
1093
def _AdjustCandidatePool(lu, exceptions):
1094
  """Adjust the candidate pool after node operations.
1095

1096
  """
1097
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098
  if mod_list:
1099
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1100
               utils.CommaJoin(node.name for node in mod_list))
1101
    for name in mod_list:
1102
      lu.context.ReaddNode(name)
1103
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104
  if mc_now > mc_max:
1105
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106
               (mc_now, mc_max))
1107

    
1108

    
1109
def _DecideSelfPromotion(lu, exceptions=None):
1110
  """Decide whether I should promote myself as a master candidate.
1111

1112
  """
1113
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115
  # the new node will increase mc_max with one, so:
1116
  mc_should = min(mc_should + 1, cp_size)
1117
  return mc_now < mc_should
1118

    
1119

    
1120
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121
  """Check that the brigdes needed by a list of nics exist.
1122

1123
  """
1124
  cluster = lu.cfg.GetClusterInfo()
1125
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126
  brlist = [params[constants.NIC_LINK] for params in paramslist
1127
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128
  if brlist:
1129
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1130
    result.Raise("Error checking bridges on destination node '%s'" %
1131
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132

    
1133

    
1134
def _CheckInstanceBridgesExist(lu, instance, node=None):
1135
  """Check that the brigdes needed by an instance exist.
1136

1137
  """
1138
  if node is None:
1139
    node = instance.primary_node
1140
  _CheckNicsBridgesExist(lu, instance.nics, node)
1141

    
1142

    
1143
def _CheckOSVariant(os_obj, name):
1144
  """Check whether an OS name conforms to the os variants specification.
1145

1146
  @type os_obj: L{objects.OS}
1147
  @param os_obj: OS object to check
1148
  @type name: string
1149
  @param name: OS name passed by the user, to check for validity
1150

1151
  """
1152
  variant = objects.OS.GetVariant(name)
1153
  if not os_obj.supported_variants:
1154
    if variant:
1155
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156
                                 " passed)" % (os_obj.name, variant),
1157
                                 errors.ECODE_INVAL)
1158
    return
1159
  if not variant:
1160
    raise errors.OpPrereqError("OS name must include a variant",
1161
                               errors.ECODE_INVAL)
1162

    
1163
  if variant not in os_obj.supported_variants:
1164
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165

    
1166

    
1167
def _GetNodeInstancesInner(cfg, fn):
1168
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169

    
1170

    
1171
def _GetNodeInstances(cfg, node_name):
1172
  """Returns a list of all primary and secondary instances on a node.
1173

1174
  """
1175

    
1176
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177

    
1178

    
1179
def _GetNodePrimaryInstances(cfg, node_name):
1180
  """Returns primary instances on a node.
1181

1182
  """
1183
  return _GetNodeInstancesInner(cfg,
1184
                                lambda inst: node_name == inst.primary_node)
1185

    
1186

    
1187
def _GetNodeSecondaryInstances(cfg, node_name):
1188
  """Returns secondary instances on a node.
1189

1190
  """
1191
  return _GetNodeInstancesInner(cfg,
1192
                                lambda inst: node_name in inst.secondary_nodes)
1193

    
1194

    
1195
def _GetStorageTypeArgs(cfg, storage_type):
1196
  """Returns the arguments for a storage type.
1197

1198
  """
1199
  # Special case for file storage
1200
  if storage_type == constants.ST_FILE:
1201
    # storage.FileStorage wants a list of storage directories
1202
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203

    
1204
  return []
1205

    
1206

    
1207
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208
  faulty = []
1209

    
1210
  for dev in instance.disks:
1211
    cfg.SetDiskID(dev, node_name)
1212

    
1213
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214
  result.Raise("Failed to get disk status from node %s" % node_name,
1215
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216

    
1217
  for idx, bdev_status in enumerate(result.payload):
1218
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219
      faulty.append(idx)
1220

    
1221
  return faulty
1222

    
1223

    
1224
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225
  """Check the sanity of iallocator and node arguments and use the
1226
  cluster-wide iallocator if appropriate.
1227

1228
  Check that at most one of (iallocator, node) is specified. If none is
1229
  specified, then the LU's opcode's iallocator slot is filled with the
1230
  cluster-wide default iallocator.
1231

1232
  @type iallocator_slot: string
1233
  @param iallocator_slot: the name of the opcode iallocator slot
1234
  @type node_slot: string
1235
  @param node_slot: the name of the opcode target node slot
1236

1237
  """
1238
  node = getattr(lu.op, node_slot, None)
1239
  iallocator = getattr(lu.op, iallocator_slot, None)
1240

    
1241
  if node is not None and iallocator is not None:
1242
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243
                               errors.ECODE_INVAL)
1244
  elif node is None and iallocator is None:
1245
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1246
    if default_iallocator:
1247
      setattr(lu.op, iallocator_slot, default_iallocator)
1248
    else:
1249
      raise errors.OpPrereqError("No iallocator or node given and no"
1250
                                 " cluster-wide default iallocator found;"
1251
                                 " please specify either an iallocator or a"
1252
                                 " node, or set a cluster-wide default"
1253
                                 " iallocator")
1254

    
1255

    
1256
def _GetDefaultIAllocator(cfg, iallocator):
1257
  """Decides on which iallocator to use.
1258

1259
  @type cfg: L{config.ConfigWriter}
1260
  @param cfg: Cluster configuration object
1261
  @type iallocator: string or None
1262
  @param iallocator: Iallocator specified in opcode
1263
  @rtype: string
1264
  @return: Iallocator name
1265

1266
  """
1267
  if not iallocator:
1268
    # Use default iallocator
1269
    iallocator = cfg.GetDefaultIAllocator()
1270

    
1271
  if not iallocator:
1272
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273
                               " opcode nor as a cluster-wide default",
1274
                               errors.ECODE_INVAL)
1275

    
1276
  return iallocator
1277

    
1278

    
1279
class LUClusterPostInit(LogicalUnit):
1280
  """Logical unit for running hooks after cluster initialization.
1281

1282
  """
1283
  HPATH = "cluster-init"
1284
  HTYPE = constants.HTYPE_CLUSTER
1285

    
1286
  def BuildHooksEnv(self):
1287
    """Build hooks env.
1288

1289
    """
1290
    return {
1291
      "OP_TARGET": self.cfg.GetClusterName(),
1292
      }
1293

    
1294
  def BuildHooksNodes(self):
1295
    """Build hooks nodes.
1296

1297
    """
1298
    return ([], [self.cfg.GetMasterNode()])
1299

    
1300
  def Exec(self, feedback_fn):
1301
    """Nothing to do.
1302

1303
    """
1304
    return True
1305

    
1306

    
1307
class LUClusterDestroy(LogicalUnit):
1308
  """Logical unit for destroying the cluster.
1309

1310
  """
1311
  HPATH = "cluster-destroy"
1312
  HTYPE = constants.HTYPE_CLUSTER
1313

    
1314
  def BuildHooksEnv(self):
1315
    """Build hooks env.
1316

1317
    """
1318
    return {
1319
      "OP_TARGET": self.cfg.GetClusterName(),
1320
      }
1321

    
1322
  def BuildHooksNodes(self):
1323
    """Build hooks nodes.
1324

1325
    """
1326
    return ([], [])
1327

    
1328
  def CheckPrereq(self):
1329
    """Check prerequisites.
1330

1331
    This checks whether the cluster is empty.
1332

1333
    Any errors are signaled by raising errors.OpPrereqError.
1334

1335
    """
1336
    master = self.cfg.GetMasterNode()
1337

    
1338
    nodelist = self.cfg.GetNodeList()
1339
    if len(nodelist) != 1 or nodelist[0] != master:
1340
      raise errors.OpPrereqError("There are still %d node(s) in"
1341
                                 " this cluster." % (len(nodelist) - 1),
1342
                                 errors.ECODE_INVAL)
1343
    instancelist = self.cfg.GetInstanceList()
1344
    if instancelist:
1345
      raise errors.OpPrereqError("There are still %d instance(s) in"
1346
                                 " this cluster." % len(instancelist),
1347
                                 errors.ECODE_INVAL)
1348

    
1349
  def Exec(self, feedback_fn):
1350
    """Destroys the cluster.
1351

1352
    """
1353
    master = self.cfg.GetMasterNode()
1354

    
1355
    # Run post hooks on master node before it's removed
1356
    _RunPostHook(self, master)
1357

    
1358
    result = self.rpc.call_node_deactivate_master_ip(master)
1359
    result.Raise("Could not disable the master role")
1360

    
1361
    return master
1362

    
1363

    
1364
def _VerifyCertificate(filename):
1365
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1366

1367
  @type filename: string
1368
  @param filename: Path to PEM file
1369

1370
  """
1371
  try:
1372
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373
                                           utils.ReadFile(filename))
1374
  except Exception, err: # pylint: disable=W0703
1375
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1376
            "Failed to load X509 certificate %s: %s" % (filename, err))
1377

    
1378
  (errcode, msg) = \
1379
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380
                                constants.SSL_CERT_EXPIRATION_ERROR)
1381

    
1382
  if msg:
1383
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1384
  else:
1385
    fnamemsg = None
1386

    
1387
  if errcode is None:
1388
    return (None, fnamemsg)
1389
  elif errcode == utils.CERT_WARNING:
1390
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391
  elif errcode == utils.CERT_ERROR:
1392
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393

    
1394
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395

    
1396

    
1397
def _GetAllHypervisorParameters(cluster, instances):
1398
  """Compute the set of all hypervisor parameters.
1399

1400
  @type cluster: L{objects.Cluster}
1401
  @param cluster: the cluster object
1402
  @param instances: list of L{objects.Instance}
1403
  @param instances: additional instances from which to obtain parameters
1404
  @rtype: list of (origin, hypervisor, parameters)
1405
  @return: a list with all parameters found, indicating the hypervisor they
1406
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407

1408
  """
1409
  hvp_data = []
1410

    
1411
  for hv_name in cluster.enabled_hypervisors:
1412
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413

    
1414
  for os_name, os_hvp in cluster.os_hvp.items():
1415
    for hv_name, hv_params in os_hvp.items():
1416
      if hv_params:
1417
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419

    
1420
  # TODO: collapse identical parameter values in a single one
1421
  for instance in instances:
1422
    if instance.hvparams:
1423
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424
                       cluster.FillHV(instance)))
1425

    
1426
  return hvp_data
1427

    
1428

    
1429
class _VerifyErrors(object):
1430
  """Mix-in for cluster/group verify LUs.
1431

1432
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433
  self.op and self._feedback_fn to be available.)
1434

1435
  """
1436
  TCLUSTER = "cluster"
1437
  TNODE = "node"
1438
  TINSTANCE = "instance"
1439

    
1440
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452
  ENODEDRBD = (TNODE, "ENODEDRBD")
1453
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456
  ENODEHV = (TNODE, "ENODEHV")
1457
  ENODELVM = (TNODE, "ENODELVM")
1458
  ENODEN1 = (TNODE, "ENODEN1")
1459
  ENODENET = (TNODE, "ENODENET")
1460
  ENODEOS = (TNODE, "ENODEOS")
1461
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463
  ENODERPC = (TNODE, "ENODERPC")
1464
  ENODESSH = (TNODE, "ENODESSH")
1465
  ENODEVERSION = (TNODE, "ENODEVERSION")
1466
  ENODESETUP = (TNODE, "ENODESETUP")
1467
  ENODETIME = (TNODE, "ENODETIME")
1468
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469

    
1470
  ETYPE_FIELD = "code"
1471
  ETYPE_ERROR = "ERROR"
1472
  ETYPE_WARNING = "WARNING"
1473

    
1474
  def _Error(self, ecode, item, msg, *args, **kwargs):
1475
    """Format an error message.
1476

1477
    Based on the opcode's error_codes parameter, either format a
1478
    parseable error code, or a simpler error string.
1479

1480
    This must be called only from Exec and functions called from Exec.
1481

1482
    """
1483
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484
    itype, etxt = ecode
1485
    # first complete the msg
1486
    if args:
1487
      msg = msg % args
1488
    # then format the whole message
1489
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491
    else:
1492
      if item:
1493
        item = " " + item
1494
      else:
1495
        item = ""
1496
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497
    # and finally report it via the feedback_fn
1498
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1499

    
1500
  def _ErrorIf(self, cond, *args, **kwargs):
1501
    """Log an error message if the passed condition is True.
1502

1503
    """
1504
    cond = (bool(cond)
1505
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1506
    if cond:
1507
      self._Error(*args, **kwargs)
1508
    # do not mark the operation as failed for WARN cases only
1509
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510
      self.bad = self.bad or cond
1511

    
1512

    
1513
class LUClusterVerify(NoHooksLU):
1514
  """Submits all jobs necessary to verify the cluster.
1515

1516
  """
1517
  REQ_BGL = False
1518

    
1519
  def ExpandNames(self):
1520
    self.needed_locks = {}
1521

    
1522
  def Exec(self, feedback_fn):
1523
    jobs = []
1524

    
1525
    if self.op.group_name:
1526
      groups = [self.op.group_name]
1527
      depends_fn = lambda: None
1528
    else:
1529
      groups = self.cfg.GetNodeGroupList()
1530

    
1531
      # Verify global configuration
1532
      jobs.append([opcodes.OpClusterVerifyConfig()])
1533

    
1534
      # Always depend on global verification
1535
      depends_fn = lambda: [(-len(jobs), [])]
1536

    
1537
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538
                                              depends=depends_fn())]
1539
                for group in groups)
1540

    
1541
    # Fix up all parameters
1542
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1543
      op.debug_simulate_errors = self.op.debug_simulate_errors
1544
      op.verbose = self.op.verbose
1545
      op.error_codes = self.op.error_codes
1546
      try:
1547
        op.skip_checks = self.op.skip_checks
1548
      except AttributeError:
1549
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1550

    
1551
    return ResultWithJobs(jobs)
1552

    
1553

    
1554
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555
  """Verifies the cluster config.
1556

1557
  """
1558
  REQ_BGL = True
1559

    
1560
  def _VerifyHVP(self, hvp_data):
1561
    """Verifies locally the syntax of the hypervisor parameters.
1562

1563
    """
1564
    for item, hv_name, hv_params in hvp_data:
1565
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1566
             (item, hv_name))
1567
      try:
1568
        hv_class = hypervisor.GetHypervisor(hv_name)
1569
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570
        hv_class.CheckParameterSyntax(hv_params)
1571
      except errors.GenericError, err:
1572
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1573

    
1574
  def ExpandNames(self):
1575
    # Information can be safely retrieved as the BGL is acquired in exclusive
1576
    # mode
1577
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579
    self.all_node_info = self.cfg.GetAllNodesInfo()
1580
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581
    self.needed_locks = {}
1582

    
1583
  def Exec(self, feedback_fn):
1584
    """Verify integrity of cluster, performing various test on nodes.
1585

1586
    """
1587
    self.bad = False
1588
    self._feedback_fn = feedback_fn
1589

    
1590
    feedback_fn("* Verifying cluster config")
1591

    
1592
    for msg in self.cfg.VerifyConfig():
1593
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1594

    
1595
    feedback_fn("* Verifying cluster certificate files")
1596

    
1597
    for cert_filename in constants.ALL_CERT_FILES:
1598
      (errcode, msg) = _VerifyCertificate(cert_filename)
1599
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1600

    
1601
    feedback_fn("* Verifying hypervisor parameters")
1602

    
1603
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604
                                                self.all_inst_info.values()))
1605

    
1606
    feedback_fn("* Verifying all nodes belong to an existing group")
1607

    
1608
    # We do this verification here because, should this bogus circumstance
1609
    # occur, it would never be caught by VerifyGroup, which only acts on
1610
    # nodes/instances reachable from existing node groups.
1611

    
1612
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1613
                         if node.group not in self.all_group_info)
1614

    
1615
    dangling_instances = {}
1616
    no_node_instances = []
1617

    
1618
    for inst in self.all_inst_info.values():
1619
      if inst.primary_node in dangling_nodes:
1620
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621
      elif inst.primary_node not in self.all_node_info:
1622
        no_node_instances.append(inst.name)
1623

    
1624
    pretty_dangling = [
1625
        "%s (%s)" %
1626
        (node.name,
1627
         utils.CommaJoin(dangling_instances.get(node.name,
1628
                                                ["no instances"])))
1629
        for node in dangling_nodes]
1630

    
1631
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632
                  "the following nodes (and their instances) belong to a non"
1633
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1634

    
1635
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636
                  "the following instances have a non-existing primary-node:"
1637
                  " %s", utils.CommaJoin(no_node_instances))
1638

    
1639
    return not self.bad
1640

    
1641

    
1642
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643
  """Verifies the status of a node group.
1644

1645
  """
1646
  HPATH = "cluster-verify"
1647
  HTYPE = constants.HTYPE_CLUSTER
1648
  REQ_BGL = False
1649

    
1650
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1651

    
1652
  class NodeImage(object):
1653
    """A class representing the logical and physical status of a node.
1654

1655
    @type name: string
1656
    @ivar name: the node name to which this object refers
1657
    @ivar volumes: a structure as returned from
1658
        L{ganeti.backend.GetVolumeList} (runtime)
1659
    @ivar instances: a list of running instances (runtime)
1660
    @ivar pinst: list of configured primary instances (config)
1661
    @ivar sinst: list of configured secondary instances (config)
1662
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1663
        instances for which this node is secondary (config)
1664
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1665
    @ivar dfree: free disk, as reported by the node (runtime)
1666
    @ivar offline: the offline status (config)
1667
    @type rpc_fail: boolean
1668
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669
        not whether the individual keys were correct) (runtime)
1670
    @type lvm_fail: boolean
1671
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672
    @type hyp_fail: boolean
1673
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1674
    @type ghost: boolean
1675
    @ivar ghost: whether this is a known node or not (config)
1676
    @type os_fail: boolean
1677
    @ivar os_fail: whether the RPC call didn't return valid OS data
1678
    @type oslist: list
1679
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680
    @type vm_capable: boolean
1681
    @ivar vm_capable: whether the node can host instances
1682

1683
    """
1684
    def __init__(self, offline=False, name=None, vm_capable=True):
1685
      self.name = name
1686
      self.volumes = {}
1687
      self.instances = []
1688
      self.pinst = []
1689
      self.sinst = []
1690
      self.sbp = {}
1691
      self.mfree = 0
1692
      self.dfree = 0
1693
      self.offline = offline
1694
      self.vm_capable = vm_capable
1695
      self.rpc_fail = False
1696
      self.lvm_fail = False
1697
      self.hyp_fail = False
1698
      self.ghost = False
1699
      self.os_fail = False
1700
      self.oslist = {}
1701

    
1702
  def ExpandNames(self):
1703
    # This raises errors.OpPrereqError on its own:
1704
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1705

    
1706
    # Get instances in node group; this is unsafe and needs verification later
1707
    inst_names = \
1708
      self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1709

    
1710
    self.needed_locks = {
1711
      locking.LEVEL_INSTANCE: inst_names,
1712
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1713
      locking.LEVEL_NODE: [],
1714
      }
1715

    
1716
    self.share_locks = _ShareAll()
1717

    
1718
  def DeclareLocks(self, level):
1719
    if level == locking.LEVEL_NODE:
1720
      # Get members of node group; this is unsafe and needs verification later
1721
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1722

    
1723
      all_inst_info = self.cfg.GetAllInstancesInfo()
1724

    
1725
      # In Exec(), we warn about mirrored instances that have primary and
1726
      # secondary living in separate node groups. To fully verify that
1727
      # volumes for these instances are healthy, we will need to do an
1728
      # extra call to their secondaries. We ensure here those nodes will
1729
      # be locked.
1730
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1731
        # Important: access only the instances whose lock is owned
1732
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1733
          nodes.update(all_inst_info[inst].secondary_nodes)
1734

    
1735
      self.needed_locks[locking.LEVEL_NODE] = nodes
1736

    
1737
  def CheckPrereq(self):
1738
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1739
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1740

    
1741
    group_nodes = set(self.group_info.members)
1742
    group_instances = \
1743
      self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1744

    
1745
    unlocked_nodes = \
1746
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1747

    
1748
    unlocked_instances = \
1749
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1750

    
1751
    if unlocked_nodes:
1752
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1753
                                 utils.CommaJoin(unlocked_nodes),
1754
                                 errors.ECODE_STATE)
1755

    
1756
    if unlocked_instances:
1757
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1758
                                 utils.CommaJoin(unlocked_instances),
1759
                                 errors.ECODE_STATE)
1760

    
1761
    self.all_node_info = self.cfg.GetAllNodesInfo()
1762
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1763

    
1764
    self.my_node_names = utils.NiceSort(group_nodes)
1765
    self.my_inst_names = utils.NiceSort(group_instances)
1766

    
1767
    self.my_node_info = dict((name, self.all_node_info[name])
1768
                             for name in self.my_node_names)
1769

    
1770
    self.my_inst_info = dict((name, self.all_inst_info[name])
1771
                             for name in self.my_inst_names)
1772

    
1773
    # We detect here the nodes that will need the extra RPC calls for verifying
1774
    # split LV volumes; they should be locked.
1775
    extra_lv_nodes = set()
1776

    
1777
    for inst in self.my_inst_info.values():
1778
      if inst.disk_template in constants.DTS_INT_MIRROR:
1779
        for nname in inst.all_nodes:
1780
          if self.all_node_info[nname].group != self.group_uuid:
1781
            extra_lv_nodes.add(nname)
1782

    
1783
    unlocked_lv_nodes = \
1784
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1785

    
1786
    if unlocked_lv_nodes:
1787
      raise errors.OpPrereqError("Missing node locks for LV check: %s" %
1788
                                 utils.CommaJoin(unlocked_lv_nodes),
1789
                                 errors.ECODE_STATE)
1790
    self.extra_lv_nodes = list(extra_lv_nodes)
1791

    
1792
  def _VerifyNode(self, ninfo, nresult):
1793
    """Perform some basic validation on data returned from a node.
1794

1795
      - check the result data structure is well formed and has all the
1796
        mandatory fields
1797
      - check ganeti version
1798

1799
    @type ninfo: L{objects.Node}
1800
    @param ninfo: the node to check
1801
    @param nresult: the results from the node
1802
    @rtype: boolean
1803
    @return: whether overall this call was successful (and we can expect
1804
         reasonable values in the respose)
1805

1806
    """
1807
    node = ninfo.name
1808
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1809

    
1810
    # main result, nresult should be a non-empty dict
1811
    test = not nresult or not isinstance(nresult, dict)
1812
    _ErrorIf(test, self.ENODERPC, node,
1813
                  "unable to verify node: no data returned")
1814
    if test:
1815
      return False
1816

    
1817
    # compares ganeti version
1818
    local_version = constants.PROTOCOL_VERSION
1819
    remote_version = nresult.get("version", None)
1820
    test = not (remote_version and
1821
                isinstance(remote_version, (list, tuple)) and
1822
                len(remote_version) == 2)
1823
    _ErrorIf(test, self.ENODERPC, node,
1824
             "connection to node returned invalid data")
1825
    if test:
1826
      return False
1827

    
1828
    test = local_version != remote_version[0]
1829
    _ErrorIf(test, self.ENODEVERSION, node,
1830
             "incompatible protocol versions: master %s,"
1831
             " node %s", local_version, remote_version[0])
1832
    if test:
1833
      return False
1834

    
1835
    # node seems compatible, we can actually try to look into its results
1836

    
1837
    # full package version
1838
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1839
                  self.ENODEVERSION, node,
1840
                  "software version mismatch: master %s, node %s",
1841
                  constants.RELEASE_VERSION, remote_version[1],
1842
                  code=self.ETYPE_WARNING)
1843

    
1844
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1845
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1846
      for hv_name, hv_result in hyp_result.iteritems():
1847
        test = hv_result is not None
1848
        _ErrorIf(test, self.ENODEHV, node,
1849
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1850

    
1851
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1852
    if ninfo.vm_capable and isinstance(hvp_result, list):
1853
      for item, hv_name, hv_result in hvp_result:
1854
        _ErrorIf(True, self.ENODEHV, node,
1855
                 "hypervisor %s parameter verify failure (source %s): %s",
1856
                 hv_name, item, hv_result)
1857

    
1858
    test = nresult.get(constants.NV_NODESETUP,
1859
                       ["Missing NODESETUP results"])
1860
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1861
             "; ".join(test))
1862

    
1863
    return True
1864

    
1865
  def _VerifyNodeTime(self, ninfo, nresult,
1866
                      nvinfo_starttime, nvinfo_endtime):
1867
    """Check the node time.
1868

1869
    @type ninfo: L{objects.Node}
1870
    @param ninfo: the node to check
1871
    @param nresult: the remote results for the node
1872
    @param nvinfo_starttime: the start time of the RPC call
1873
    @param nvinfo_endtime: the end time of the RPC call
1874

1875
    """
1876
    node = ninfo.name
1877
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1878

    
1879
    ntime = nresult.get(constants.NV_TIME, None)
1880
    try:
1881
      ntime_merged = utils.MergeTime(ntime)
1882
    except (ValueError, TypeError):
1883
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1884
      return
1885

    
1886
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1887
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1888
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1889
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1890
    else:
1891
      ntime_diff = None
1892

    
1893
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1894
             "Node time diverges by at least %s from master node time",
1895
             ntime_diff)
1896

    
1897
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1898
    """Check the node LVM results.
1899

1900
    @type ninfo: L{objects.Node}
1901
    @param ninfo: the node to check
1902
    @param nresult: the remote results for the node
1903
    @param vg_name: the configured VG name
1904

1905
    """
1906
    if vg_name is None:
1907
      return
1908

    
1909
    node = ninfo.name
1910
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1911

    
1912
    # checks vg existence and size > 20G
1913
    vglist = nresult.get(constants.NV_VGLIST, None)
1914
    test = not vglist
1915
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1916
    if not test:
1917
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1918
                                            constants.MIN_VG_SIZE)
1919
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1920

    
1921
    # check pv names
1922
    pvlist = nresult.get(constants.NV_PVLIST, None)
1923
    test = pvlist is None
1924
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1925
    if not test:
1926
      # check that ':' is not present in PV names, since it's a
1927
      # special character for lvcreate (denotes the range of PEs to
1928
      # use on the PV)
1929
      for _, pvname, owner_vg in pvlist:
1930
        test = ":" in pvname
1931
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1932
                 " '%s' of VG '%s'", pvname, owner_vg)
1933

    
1934
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1935
    """Check the node bridges.
1936

1937
    @type ninfo: L{objects.Node}
1938
    @param ninfo: the node to check
1939
    @param nresult: the remote results for the node
1940
    @param bridges: the expected list of bridges
1941

1942
    """
1943
    if not bridges:
1944
      return
1945

    
1946
    node = ninfo.name
1947
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1948

    
1949
    missing = nresult.get(constants.NV_BRIDGES, None)
1950
    test = not isinstance(missing, list)
1951
    _ErrorIf(test, self.ENODENET, node,
1952
             "did not return valid bridge information")
1953
    if not test:
1954
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1955
               utils.CommaJoin(sorted(missing)))
1956

    
1957
  def _VerifyNodeNetwork(self, ninfo, nresult):
1958
    """Check the node network connectivity results.
1959

1960
    @type ninfo: L{objects.Node}
1961
    @param ninfo: the node to check
1962
    @param nresult: the remote results for the node
1963

1964
    """
1965
    node = ninfo.name
1966
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1967

    
1968
    test = constants.NV_NODELIST not in nresult
1969
    _ErrorIf(test, self.ENODESSH, node,
1970
             "node hasn't returned node ssh connectivity data")
1971
    if not test:
1972
      if nresult[constants.NV_NODELIST]:
1973
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1974
          _ErrorIf(True, self.ENODESSH, node,
1975
                   "ssh communication with node '%s': %s", a_node, a_msg)
1976

    
1977
    test = constants.NV_NODENETTEST not in nresult
1978
    _ErrorIf(test, self.ENODENET, node,
1979
             "node hasn't returned node tcp connectivity data")
1980
    if not test:
1981
      if nresult[constants.NV_NODENETTEST]:
1982
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1983
        for anode in nlist:
1984
          _ErrorIf(True, self.ENODENET, node,
1985
                   "tcp communication with node '%s': %s",
1986
                   anode, nresult[constants.NV_NODENETTEST][anode])
1987

    
1988
    test = constants.NV_MASTERIP not in nresult
1989
    _ErrorIf(test, self.ENODENET, node,
1990
             "node hasn't returned node master IP reachability data")
1991
    if not test:
1992
      if not nresult[constants.NV_MASTERIP]:
1993
        if node == self.master_node:
1994
          msg = "the master node cannot reach the master IP (not configured?)"
1995
        else:
1996
          msg = "cannot reach the master IP"
1997
        _ErrorIf(True, self.ENODENET, node, msg)
1998

    
1999
  def _VerifyInstance(self, instance, instanceconfig, node_image,
2000
                      diskstatus):
2001
    """Verify an instance.
2002

2003
    This function checks to see if the required block devices are
2004
    available on the instance's node.
2005

2006
    """
2007
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2008
    node_current = instanceconfig.primary_node
2009

    
2010
    node_vol_should = {}
2011
    instanceconfig.MapLVsByNode(node_vol_should)
2012

    
2013
    for node in node_vol_should:
2014
      n_img = node_image[node]
2015
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2016
        # ignore missing volumes on offline or broken nodes
2017
        continue
2018
      for volume in node_vol_should[node]:
2019
        test = volume not in n_img.volumes
2020
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2021
                 "volume %s missing on node %s", volume, node)
2022

    
2023
    if instanceconfig.admin_up:
2024
      pri_img = node_image[node_current]
2025
      test = instance not in pri_img.instances and not pri_img.offline
2026
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
2027
               "instance not running on its primary node %s",
2028
               node_current)
2029

    
2030
    diskdata = [(nname, success, status, idx)
2031
                for (nname, disks) in diskstatus.items()
2032
                for idx, (success, status) in enumerate(disks)]
2033

    
2034
    for nname, success, bdev_status, idx in diskdata:
2035
      # the 'ghost node' construction in Exec() ensures that we have a
2036
      # node here
2037
      snode = node_image[nname]
2038
      bad_snode = snode.ghost or snode.offline
2039
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2040
               self.EINSTANCEFAULTYDISK, instance,
2041
               "couldn't retrieve status for disk/%s on %s: %s",
2042
               idx, nname, bdev_status)
2043
      _ErrorIf((instanceconfig.admin_up and success and
2044
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2045
               self.EINSTANCEFAULTYDISK, instance,
2046
               "disk/%s on %s is faulty", idx, nname)
2047

    
2048
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2049
    """Verify if there are any unknown volumes in the cluster.
2050

2051
    The .os, .swap and backup volumes are ignored. All other volumes are
2052
    reported as unknown.
2053

2054
    @type reserved: L{ganeti.utils.FieldSet}
2055
    @param reserved: a FieldSet of reserved volume names
2056

2057
    """
2058
    for node, n_img in node_image.items():
2059
      if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2060
          self.all_node_info[node].group != self.group_uuid):
2061
        # skip non-healthy nodes
2062
        continue
2063
      for volume in n_img.volumes:
2064
        test = ((node not in node_vol_should or
2065
                volume not in node_vol_should[node]) and
2066
                not reserved.Matches(volume))
2067
        self._ErrorIf(test, self.ENODEORPHANLV, node,
2068
                      "volume %s is unknown", volume)
2069

    
2070
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2071
    """Verify N+1 Memory Resilience.
2072

2073
    Check that if one single node dies we can still start all the
2074
    instances it was primary for.
2075

2076
    """
2077
    cluster_info = self.cfg.GetClusterInfo()
2078
    for node, n_img in node_image.items():
2079
      # This code checks that every node which is now listed as
2080
      # secondary has enough memory to host all instances it is
2081
      # supposed to should a single other node in the cluster fail.
2082
      # FIXME: not ready for failover to an arbitrary node
2083
      # FIXME: does not support file-backed instances
2084
      # WARNING: we currently take into account down instances as well
2085
      # as up ones, considering that even if they're down someone
2086
      # might want to start them even in the event of a node failure.
2087
      if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2088
        # we're skipping nodes marked offline and nodes in other groups from
2089
        # the N+1 warning, since most likely we don't have good memory
2090
        # infromation from them; we already list instances living on such
2091
        # nodes, and that's enough warning
2092
        continue
2093
      for prinode, instances in n_img.sbp.items():
2094
        needed_mem = 0
2095
        for instance in instances:
2096
          bep = cluster_info.FillBE(instance_cfg[instance])
2097
          if bep[constants.BE_AUTO_BALANCE]:
2098
            needed_mem += bep[constants.BE_MEMORY]
2099
        test = n_img.mfree < needed_mem
2100
        self._ErrorIf(test, self.ENODEN1, node,
2101
                      "not enough memory to accomodate instance failovers"
2102
                      " should node %s fail (%dMiB needed, %dMiB available)",
2103
                      prinode, needed_mem, n_img.mfree)
2104

    
2105
  @classmethod
2106
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2107
                   (files_all, files_opt, files_mc, files_vm)):
2108
    """Verifies file checksums collected from all nodes.
2109

2110
    @param errorif: Callback for reporting errors
2111
    @param nodeinfo: List of L{objects.Node} objects
2112
    @param master_node: Name of master node
2113
    @param all_nvinfo: RPC results
2114

2115
    """
2116
    # Define functions determining which nodes to consider for a file
2117
    files2nodefn = [
2118
      (files_all, None),
2119
      (files_mc, lambda node: (node.master_candidate or
2120
                               node.name == master_node)),
2121
      (files_vm, lambda node: node.vm_capable),
2122
      ]
2123

    
2124
    # Build mapping from filename to list of nodes which should have the file
2125
    nodefiles = {}
2126
    for (files, fn) in files2nodefn:
2127
      if fn is None:
2128
        filenodes = nodeinfo
2129
      else:
2130
        filenodes = filter(fn, nodeinfo)
2131
      nodefiles.update((filename,
2132
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2133
                       for filename in files)
2134

    
2135
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2136

    
2137
    fileinfo = dict((filename, {}) for filename in nodefiles)
2138
    ignore_nodes = set()
2139

    
2140
    for node in nodeinfo:
2141
      if node.offline:
2142
        ignore_nodes.add(node.name)
2143
        continue
2144

    
2145
      nresult = all_nvinfo[node.name]
2146

    
2147
      if nresult.fail_msg or not nresult.payload:
2148
        node_files = None
2149
      else:
2150
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2151

    
2152
      test = not (node_files and isinstance(node_files, dict))
2153
      errorif(test, cls.ENODEFILECHECK, node.name,
2154
              "Node did not return file checksum data")
2155
      if test:
2156
        ignore_nodes.add(node.name)
2157
        continue
2158

    
2159
      # Build per-checksum mapping from filename to nodes having it
2160
      for (filename, checksum) in node_files.items():
2161
        assert filename in nodefiles
2162
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2163

    
2164
    for (filename, checksums) in fileinfo.items():
2165
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2166

    
2167
      # Nodes having the file
2168
      with_file = frozenset(node_name
2169
                            for nodes in fileinfo[filename].values()
2170
                            for node_name in nodes) - ignore_nodes
2171

    
2172
      expected_nodes = nodefiles[filename] - ignore_nodes
2173

    
2174
      # Nodes missing file
2175
      missing_file = expected_nodes - with_file
2176

    
2177
      if filename in files_opt:
2178
        # All or no nodes
2179
        errorif(missing_file and missing_file != expected_nodes,
2180
                cls.ECLUSTERFILECHECK, None,
2181
                "File %s is optional, but it must exist on all or no"
2182
                " nodes (not found on %s)",
2183
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2184
      else:
2185
        # Non-optional files
2186
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2187
                "File %s is missing from node(s) %s", filename,
2188
                utils.CommaJoin(utils.NiceSort(missing_file)))
2189

    
2190
        # Warn if a node has a file it shouldn't
2191
        unexpected = with_file - expected_nodes
2192
        errorif(unexpected,
2193
                cls.ECLUSTERFILECHECK, None,
2194
                "File %s should not exist on node(s) %s",
2195
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2196

    
2197
      # See if there are multiple versions of the file
2198
      test = len(checksums) > 1
2199
      if test:
2200
        variants = ["variant %s on %s" %
2201
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2202
                    for (idx, (checksum, nodes)) in
2203
                      enumerate(sorted(checksums.items()))]
2204
      else:
2205
        variants = []
2206

    
2207
      errorif(test, cls.ECLUSTERFILECHECK, None,
2208
              "File %s found with %s different checksums (%s)",
2209
              filename, len(checksums), "; ".join(variants))
2210

    
2211
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2212
                      drbd_map):
2213
    """Verifies and the node DRBD status.
2214

2215
    @type ninfo: L{objects.Node}
2216
    @param ninfo: the node to check
2217
    @param nresult: the remote results for the node
2218
    @param instanceinfo: the dict of instances
2219
    @param drbd_helper: the configured DRBD usermode helper
2220
    @param drbd_map: the DRBD map as returned by
2221
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2222

2223
    """
2224
    node = ninfo.name
2225
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2226

    
2227
    if drbd_helper:
2228
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2229
      test = (helper_result == None)
2230
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2231
               "no drbd usermode helper returned")
2232
      if helper_result:
2233
        status, payload = helper_result
2234
        test = not status
2235
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2236
                 "drbd usermode helper check unsuccessful: %s", payload)
2237
        test = status and (payload != drbd_helper)
2238
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2239
                 "wrong drbd usermode helper: %s", payload)
2240

    
2241
    # compute the DRBD minors
2242
    node_drbd = {}
2243
    for minor, instance in drbd_map[node].items():
2244
      test = instance not in instanceinfo
2245
      _ErrorIf(test, self.ECLUSTERCFG, None,
2246
               "ghost instance '%s' in temporary DRBD map", instance)
2247
        # ghost instance should not be running, but otherwise we
2248
        # don't give double warnings (both ghost instance and
2249
        # unallocated minor in use)
2250
      if test:
2251
        node_drbd[minor] = (instance, False)
2252
      else:
2253
        instance = instanceinfo[instance]
2254
        node_drbd[minor] = (instance.name, instance.admin_up)
2255

    
2256
    # and now check them
2257
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2258
    test = not isinstance(used_minors, (tuple, list))
2259
    _ErrorIf(test, self.ENODEDRBD, node,
2260
             "cannot parse drbd status file: %s", str(used_minors))
2261
    if test:
2262
      # we cannot check drbd status
2263
      return
2264

    
2265
    for minor, (iname, must_exist) in node_drbd.items():
2266
      test = minor not in used_minors and must_exist
2267
      _ErrorIf(test, self.ENODEDRBD, node,
2268
               "drbd minor %d of instance %s is not active", minor, iname)
2269
    for minor in used_minors:
2270
      test = minor not in node_drbd
2271
      _ErrorIf(test, self.ENODEDRBD, node,
2272
               "unallocated drbd minor %d is in use", minor)
2273

    
2274
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2275
    """Builds the node OS structures.
2276

2277
    @type ninfo: L{objects.Node}
2278
    @param ninfo: the node to check
2279
    @param nresult: the remote results for the node
2280
    @param nimg: the node image object
2281

2282
    """
2283
    node = ninfo.name
2284
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2285

    
2286
    remote_os = nresult.get(constants.NV_OSLIST, None)
2287
    test = (not isinstance(remote_os, list) or
2288
            not compat.all(isinstance(v, list) and len(v) == 7
2289
                           for v in remote_os))
2290

    
2291
    _ErrorIf(test, self.ENODEOS, node,
2292
             "node hasn't returned valid OS data")
2293

    
2294
    nimg.os_fail = test
2295

    
2296
    if test:
2297
      return
2298

    
2299
    os_dict = {}
2300

    
2301
    for (name, os_path, status, diagnose,
2302
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2303

    
2304
      if name not in os_dict:
2305
        os_dict[name] = []
2306

    
2307
      # parameters is a list of lists instead of list of tuples due to
2308
      # JSON lacking a real tuple type, fix it:
2309
      parameters = [tuple(v) for v in parameters]
2310
      os_dict[name].append((os_path, status, diagnose,
2311
                            set(variants), set(parameters), set(api_ver)))
2312

    
2313
    nimg.oslist = os_dict
2314

    
2315
  def _VerifyNodeOS(self, ninfo, nimg, base):
2316
    """Verifies the node OS list.
2317

2318
    @type ninfo: L{objects.Node}
2319
    @param ninfo: the node to check
2320
    @param nimg: the node image object
2321
    @param base: the 'template' node we match against (e.g. from the master)
2322

2323
    """
2324
    node = ninfo.name
2325
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2326

    
2327
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2328

    
2329
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2330
    for os_name, os_data in nimg.oslist.items():
2331
      assert os_data, "Empty OS status for OS %s?!" % os_name
2332
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2333
      _ErrorIf(not f_status, self.ENODEOS, node,
2334
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2335
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2336
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2337
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2338
      # comparisons with the 'base' image
2339
      test = os_name not in base.oslist
2340
      _ErrorIf(test, self.ENODEOS, node,
2341
               "Extra OS %s not present on reference node (%s)",
2342
               os_name, base.name)
2343
      if test:
2344
        continue
2345
      assert base.oslist[os_name], "Base node has empty OS status?"
2346
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2347
      if not b_status:
2348
        # base OS is invalid, skipping
2349
        continue
2350
      for kind, a, b in [("API version", f_api, b_api),
2351
                         ("variants list", f_var, b_var),
2352
                         ("parameters", beautify_params(f_param),
2353
                          beautify_params(b_param))]:
2354
        _ErrorIf(a != b, self.ENODEOS, node,
2355
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2356
                 kind, os_name, base.name,
2357
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2358

    
2359
    # check any missing OSes
2360
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2361
    _ErrorIf(missing, self.ENODEOS, node,
2362
             "OSes present on reference node %s but missing on this node: %s",
2363
             base.name, utils.CommaJoin(missing))
2364

    
2365
  def _VerifyOob(self, ninfo, nresult):
2366
    """Verifies out of band functionality of a node.
2367

2368
    @type ninfo: L{objects.Node}
2369
    @param ninfo: the node to check
2370
    @param nresult: the remote results for the node
2371

2372
    """
2373
    node = ninfo.name
2374
    # We just have to verify the paths on master and/or master candidates
2375
    # as the oob helper is invoked on the master
2376
    if ((ninfo.master_candidate or ninfo.master_capable) and
2377
        constants.NV_OOB_PATHS in nresult):
2378
      for path_result in nresult[constants.NV_OOB_PATHS]:
2379
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2380

    
2381
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2382
    """Verifies and updates the node volume data.
2383

2384
    This function will update a L{NodeImage}'s internal structures
2385
    with data from the remote call.
2386

2387
    @type ninfo: L{objects.Node}
2388
    @param ninfo: the node to check
2389
    @param nresult: the remote results for the node
2390
    @param nimg: the node image object
2391
    @param vg_name: the configured VG name
2392

2393
    """
2394
    node = ninfo.name
2395
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2396

    
2397
    nimg.lvm_fail = True
2398
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2399
    if vg_name is None:
2400
      pass
2401
    elif isinstance(lvdata, basestring):
2402
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2403
               utils.SafeEncode(lvdata))
2404
    elif not isinstance(lvdata, dict):
2405
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2406
    else:
2407
      nimg.volumes = lvdata
2408
      nimg.lvm_fail = False
2409

    
2410
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2411
    """Verifies and updates the node instance list.
2412

2413
    If the listing was successful, then updates this node's instance
2414
    list. Otherwise, it marks the RPC call as failed for the instance
2415
    list key.
2416

2417
    @type ninfo: L{objects.Node}
2418
    @param ninfo: the node to check
2419
    @param nresult: the remote results for the node
2420
    @param nimg: the node image object
2421

2422
    """
2423
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2424
    test = not isinstance(idata, list)
2425
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2426
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2427
    if test:
2428
      nimg.hyp_fail = True
2429
    else:
2430
      nimg.instances = idata
2431

    
2432
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2433
    """Verifies and computes a node information map
2434

2435
    @type ninfo: L{objects.Node}
2436
    @param ninfo: the node to check
2437
    @param nresult: the remote results for the node
2438
    @param nimg: the node image object
2439
    @param vg_name: the configured VG name
2440

2441
    """
2442
    node = ninfo.name
2443
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2444

    
2445
    # try to read free memory (from the hypervisor)
2446
    hv_info = nresult.get(constants.NV_HVINFO, None)
2447
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2448
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2449
    if not test:
2450
      try:
2451
        nimg.mfree = int(hv_info["memory_free"])
2452
      except (ValueError, TypeError):
2453
        _ErrorIf(True, self.ENODERPC, node,
2454
                 "node returned invalid nodeinfo, check hypervisor")
2455

    
2456
    # FIXME: devise a free space model for file based instances as well
2457
    if vg_name is not None:
2458
      test = (constants.NV_VGLIST not in nresult or
2459
              vg_name not in nresult[constants.NV_VGLIST])
2460
      _ErrorIf(test, self.ENODELVM, node,
2461
               "node didn't return data for the volume group '%s'"
2462
               " - it is either missing or broken", vg_name)
2463
      if not test:
2464
        try:
2465
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2466
        except (ValueError, TypeError):
2467
          _ErrorIf(True, self.ENODERPC, node,
2468
                   "node returned invalid LVM info, check LVM status")
2469

    
2470
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2471
    """Gets per-disk status information for all instances.
2472

2473
    @type nodelist: list of strings
2474
    @param nodelist: Node names
2475
    @type node_image: dict of (name, L{objects.Node})
2476
    @param node_image: Node objects
2477
    @type instanceinfo: dict of (name, L{objects.Instance})
2478
    @param instanceinfo: Instance objects
2479
    @rtype: {instance: {node: [(succes, payload)]}}
2480
    @return: a dictionary of per-instance dictionaries with nodes as
2481
        keys and disk information as values; the disk information is a
2482
        list of tuples (success, payload)
2483

2484
    """
2485
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2486

    
2487
    node_disks = {}
2488
    node_disks_devonly = {}
2489
    diskless_instances = set()
2490
    diskless = constants.DT_DISKLESS
2491

    
2492
    for nname in nodelist:
2493
      node_instances = list(itertools.chain(node_image[nname].pinst,
2494
                                            node_image[nname].sinst))
2495
      diskless_instances.update(inst for inst in node_instances
2496
                                if instanceinfo[inst].disk_template == diskless)
2497
      disks = [(inst, disk)
2498
               for inst in node_instances
2499
               for disk in instanceinfo[inst].disks]
2500

    
2501
      if not disks:
2502
        # No need to collect data
2503
        continue
2504

    
2505
      node_disks[nname] = disks
2506

    
2507
      # Creating copies as SetDiskID below will modify the objects and that can
2508
      # lead to incorrect data returned from nodes
2509
      devonly = [dev.Copy() for (_, dev) in disks]
2510

    
2511
      for dev in devonly:
2512
        self.cfg.SetDiskID(dev, nname)
2513

    
2514
      node_disks_devonly[nname] = devonly
2515

    
2516
    assert len(node_disks) == len(node_disks_devonly)
2517

    
2518
    # Collect data from all nodes with disks
2519
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2520
                                                          node_disks_devonly)
2521

    
2522
    assert len(result) == len(node_disks)
2523

    
2524
    instdisk = {}
2525

    
2526
    for (nname, nres) in result.items():
2527
      disks = node_disks[nname]
2528

    
2529
      if nres.offline:
2530
        # No data from this node
2531
        data = len(disks) * [(False, "node offline")]
2532
      else:
2533
        msg = nres.fail_msg
2534
        _ErrorIf(msg, self.ENODERPC, nname,
2535
                 "while getting disk information: %s", msg)
2536
        if msg:
2537
          # No data from this node
2538
          data = len(disks) * [(False, msg)]
2539
        else:
2540
          data = []
2541
          for idx, i in enumerate(nres.payload):
2542
            if isinstance(i, (tuple, list)) and len(i) == 2:
2543
              data.append(i)
2544
            else:
2545
              logging.warning("Invalid result from node %s, entry %d: %s",
2546
                              nname, idx, i)
2547
              data.append((False, "Invalid result from the remote node"))
2548

    
2549
      for ((inst, _), status) in zip(disks, data):
2550
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2551

    
2552
    # Add empty entries for diskless instances.
2553
    for inst in diskless_instances:
2554
      assert inst not in instdisk
2555
      instdisk[inst] = {}
2556

    
2557
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2558
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2559
                      compat.all(isinstance(s, (tuple, list)) and
2560
                                 len(s) == 2 for s in statuses)
2561
                      for inst, nnames in instdisk.items()
2562
                      for nname, statuses in nnames.items())
2563
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2564

    
2565
    return instdisk
2566

    
2567
  @staticmethod
2568
  def _SshNodeSelector(group_uuid, all_nodes):
2569
    """Create endless iterators for all potential SSH check hosts.
2570

2571
    """
2572
    nodes = [node for node in all_nodes
2573
             if (node.group != group_uuid and
2574
                 not node.offline)]
2575
    keyfunc = operator.attrgetter("group")
2576

    
2577
    return map(itertools.cycle,
2578
               [sorted(map(operator.attrgetter("name"), names))
2579
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2580
                                                  keyfunc)])
2581

    
2582
  @classmethod
2583
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2584
    """Choose which nodes should talk to which other nodes.
2585

2586
    We will make nodes contact all nodes in their group, and one node from
2587
    every other group.
2588

2589
    @warning: This algorithm has a known issue if one node group is much
2590
      smaller than others (e.g. just one node). In such a case all other
2591
      nodes will talk to the single node.
2592

2593
    """
2594
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2595
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2596

    
2597
    return (online_nodes,
2598
            dict((name, sorted([i.next() for i in sel]))
2599
                 for name in online_nodes))
2600

    
2601
  def BuildHooksEnv(self):
2602
    """Build hooks env.
2603

2604
    Cluster-Verify hooks just ran in the post phase and their failure makes
2605
    the output be logged in the verify output and the verification to fail.
2606

2607
    """
2608
    env = {
2609
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2610
      }
2611

    
2612
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2613
               for node in self.my_node_info.values())
2614

    
2615
    return env
2616

    
2617
  def BuildHooksNodes(self):
2618
    """Build hooks nodes.
2619

2620
    """
2621
    return ([], self.my_node_names)
2622

    
2623
  def Exec(self, feedback_fn):
2624
    """Verify integrity of the node group, performing various test on nodes.
2625

2626
    """
2627
    # This method has too many local variables. pylint: disable=R0914
2628
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2629

    
2630
    if not self.my_node_names:
2631
      # empty node group
2632
      feedback_fn("* Empty node group, skipping verification")
2633
      return True
2634

    
2635
    self.bad = False
2636
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2637
    verbose = self.op.verbose
2638
    self._feedback_fn = feedback_fn
2639

    
2640
    vg_name = self.cfg.GetVGName()
2641
    drbd_helper = self.cfg.GetDRBDHelper()
2642
    cluster = self.cfg.GetClusterInfo()
2643
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2644
    hypervisors = cluster.enabled_hypervisors
2645
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2646

    
2647
    i_non_redundant = [] # Non redundant instances
2648
    i_non_a_balanced = [] # Non auto-balanced instances
2649
    n_offline = 0 # Count of offline nodes
2650
    n_drained = 0 # Count of nodes being drained
2651
    node_vol_should = {}
2652

    
2653
    # FIXME: verify OS list
2654

    
2655
    # File verification
2656
    filemap = _ComputeAncillaryFiles(cluster, False)
2657

    
2658
    # do local checksums
2659
    master_node = self.master_node = self.cfg.GetMasterNode()
2660
    master_ip = self.cfg.GetMasterIP()
2661

    
2662
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2663

    
2664
    node_verify_param = {
2665
      constants.NV_FILELIST:
2666
        utils.UniqueSequence(filename
2667
                             for files in filemap
2668
                             for filename in files),
2669
      constants.NV_NODELIST:
2670
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2671
                                  self.all_node_info.values()),
2672
      constants.NV_HYPERVISOR: hypervisors,
2673
      constants.NV_HVPARAMS:
2674
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2675
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2676
                                 for node in node_data_list
2677
                                 if not node.offline],
2678
      constants.NV_INSTANCELIST: hypervisors,
2679
      constants.NV_VERSION: None,
2680
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2681
      constants.NV_NODESETUP: None,
2682
      constants.NV_TIME: None,
2683
      constants.NV_MASTERIP: (master_node, master_ip),
2684
      constants.NV_OSLIST: None,
2685
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2686
      }
2687

    
2688
    if vg_name is not None:
2689
      node_verify_param[constants.NV_VGLIST] = None
2690
      node_verify_param[constants.NV_LVLIST] = vg_name
2691
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2692
      node_verify_param[constants.NV_DRBDLIST] = None
2693

    
2694
    if drbd_helper:
2695
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2696

    
2697
    # bridge checks
2698
    # FIXME: this needs to be changed per node-group, not cluster-wide
2699
    bridges = set()
2700
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2701
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2702
      bridges.add(default_nicpp[constants.NIC_LINK])
2703
    for instance in self.my_inst_info.values():
2704
      for nic in instance.nics:
2705
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2706
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2707
          bridges.add(full_nic[constants.NIC_LINK])
2708

    
2709
    if bridges:
2710
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2711

    
2712
    # Build our expected cluster state
2713
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2714
                                                 name=node.name,
2715
                                                 vm_capable=node.vm_capable))
2716
                      for node in node_data_list)
2717

    
2718
    # Gather OOB paths
2719
    oob_paths = []
2720
    for node in self.all_node_info.values():
2721
      path = _SupportsOob(self.cfg, node)
2722
      if path and path not in oob_paths:
2723
        oob_paths.append(path)
2724

    
2725
    if oob_paths:
2726
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2727

    
2728
    for instance in self.my_inst_names:
2729
      inst_config = self.my_inst_info[instance]
2730

    
2731
      for nname in inst_config.all_nodes:
2732
        if nname not in node_image:
2733
          gnode = self.NodeImage(name=nname)
2734
          gnode.ghost = (nname not in self.all_node_info)
2735
          node_image[nname] = gnode
2736

    
2737
      inst_config.MapLVsByNode(node_vol_should)
2738

    
2739
      pnode = inst_config.primary_node
2740
      node_image[pnode].pinst.append(instance)
2741

    
2742
      for snode in inst_config.secondary_nodes:
2743
        nimg = node_image[snode]
2744
        nimg.sinst.append(instance)
2745
        if pnode not in nimg.sbp:
2746
          nimg.sbp[pnode] = []
2747
        nimg.sbp[pnode].append(instance)
2748

    
2749
    # At this point, we have the in-memory data structures complete,
2750
    # except for the runtime information, which we'll gather next
2751

    
2752
    # Due to the way our RPC system works, exact response times cannot be
2753
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2754
    # time before and after executing the request, we can at least have a time
2755
    # window.
2756
    nvinfo_starttime = time.time()
2757
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2758
                                           node_verify_param,
2759
                                           self.cfg.GetClusterName())
2760
    nvinfo_endtime = time.time()
2761

    
2762
    if self.extra_lv_nodes and vg_name is not None:
2763
      extra_lv_nvinfo = \
2764
          self.rpc.call_node_verify(self.extra_lv_nodes,
2765
                                    {constants.NV_LVLIST: vg_name},
2766
                                    self.cfg.GetClusterName())
2767
    else:
2768
      extra_lv_nvinfo = {}
2769

    
2770
    all_drbd_map = self.cfg.ComputeDRBDMap()
2771

    
2772
    feedback_fn("* Gathering disk information (%s nodes)" %
2773
                len(self.my_node_names))
2774
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2775
                                     self.my_inst_info)
2776

    
2777
    feedback_fn("* Verifying configuration file consistency")
2778

    
2779
    # If not all nodes are being checked, we need to make sure the master node
2780
    # and a non-checked vm_capable node are in the list.
2781
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2782
    if absent_nodes:
2783
      vf_nvinfo = all_nvinfo.copy()
2784
      vf_node_info = list(self.my_node_info.values())
2785
      additional_nodes = []
2786
      if master_node not in self.my_node_info:
2787
        additional_nodes.append(master_node)
2788
        vf_node_info.append(self.all_node_info[master_node])
2789
      # Add the first vm_capable node we find which is not included
2790
      for node in absent_nodes:
2791
        nodeinfo = self.all_node_info[node]
2792
        if nodeinfo.vm_capable and not nodeinfo.offline:
2793
          additional_nodes.append(node)
2794
          vf_node_info.append(self.all_node_info[node])
2795
          break
2796
      key = constants.NV_FILELIST
2797
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2798
                                                 {key: node_verify_param[key]},
2799
                                                 self.cfg.GetClusterName()))
2800
    else:
2801
      vf_nvinfo = all_nvinfo
2802
      vf_node_info = self.my_node_info.values()
2803

    
2804
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2805

    
2806
    feedback_fn("* Verifying node status")
2807

    
2808
    refos_img = None
2809

    
2810
    for node_i in node_data_list:
2811
      node = node_i.name
2812
      nimg = node_image[node]
2813

    
2814
      if node_i.offline:
2815
        if verbose:
2816
          feedback_fn("* Skipping offline node %s" % (node,))
2817
        n_offline += 1
2818
        continue
2819

    
2820
      if node == master_node:
2821
        ntype = "master"
2822
      elif node_i.master_candidate:
2823
        ntype = "master candidate"
2824
      elif node_i.drained:
2825
        ntype = "drained"
2826
        n_drained += 1
2827
      else:
2828
        ntype = "regular"
2829
      if verbose:
2830
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2831

    
2832
      msg = all_nvinfo[node].fail_msg
2833
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2834
      if msg:
2835
        nimg.rpc_fail = True
2836
        continue
2837

    
2838
      nresult = all_nvinfo[node].payload
2839

    
2840
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2841
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2842
      self._VerifyNodeNetwork(node_i, nresult)
2843
      self._VerifyOob(node_i, nresult)
2844

    
2845
      if nimg.vm_capable:
2846
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2847
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2848
                             all_drbd_map)
2849

    
2850
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2851
        self._UpdateNodeInstances(node_i, nresult, nimg)
2852
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2853
        self._UpdateNodeOS(node_i, nresult, nimg)
2854

    
2855
        if not nimg.os_fail:
2856
          if refos_img is None:
2857
            refos_img = nimg
2858
          self._VerifyNodeOS(node_i, nimg, refos_img)
2859
        self._VerifyNodeBridges(node_i, nresult, bridges)
2860

    
2861
        # Check whether all running instancies are primary for the node. (This
2862
        # can no longer be done from _VerifyInstance below, since some of the
2863
        # wrong instances could be from other node groups.)
2864
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2865

    
2866
        for inst in non_primary_inst:
2867
          test = inst in self.all_inst_info
2868
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2869
                   "instance should not run on node %s", node_i.name)
2870
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2871
                   "node is running unknown instance %s", inst)
2872

    
2873
    for node, result in extra_lv_nvinfo.items():
2874
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2875
                              node_image[node], vg_name)
2876

    
2877
    feedback_fn("* Verifying instance status")
2878
    for instance in self.my_inst_names:
2879
      if verbose:
2880
        feedback_fn("* Verifying instance %s" % instance)
2881
      inst_config = self.my_inst_info[instance]
2882
      self._VerifyInstance(instance, inst_config, node_image,
2883
                           instdisk[instance])
2884
      inst_nodes_offline = []
2885

    
2886
      pnode = inst_config.primary_node
2887
      pnode_img = node_image[pnode]
2888
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2889
               self.ENODERPC, pnode, "instance %s, connection to"
2890
               " primary node failed", instance)
2891

    
2892
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2893
               self.EINSTANCEBADNODE, instance,
2894
               "instance is marked as running and lives on offline node %s",
2895
               inst_config.primary_node)
2896

    
2897
      # If the instance is non-redundant we cannot survive losing its primary
2898
      # node, so we are not N+1 compliant. On the other hand we have no disk
2899
      # templates with more than one secondary so that situation is not well
2900
      # supported either.
2901
      # FIXME: does not support file-backed instances
2902
      if not inst_config.secondary_nodes:
2903
        i_non_redundant.append(instance)
2904

    
2905
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2906
               instance, "instance has multiple secondary nodes: %s",
2907
               utils.CommaJoin(inst_config.secondary_nodes),
2908
               code=self.ETYPE_WARNING)
2909

    
2910
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2911
        pnode = inst_config.primary_node
2912
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2913
        instance_groups = {}
2914

    
2915
        for node in instance_nodes:
2916
          instance_groups.setdefault(self.all_node_info[node].group,
2917
                                     []).append(node)
2918

    
2919
        pretty_list = [
2920
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2921
          # Sort so that we always list the primary node first.
2922
          for group, nodes in sorted(instance_groups.items(),
2923
                                     key=lambda (_, nodes): pnode in nodes,
2924
                                     reverse=True)]
2925

    
2926
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2927
                      instance, "instance has primary and secondary nodes in"
2928
                      " different groups: %s", utils.CommaJoin(pretty_list),
2929
                      code=self.ETYPE_WARNING)
2930

    
2931
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2932
        i_non_a_balanced.append(instance)
2933

    
2934
      for snode in inst_config.secondary_nodes:
2935
        s_img = node_image[snode]
2936
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2937
                 "instance %s, connection to secondary node failed", instance)
2938

    
2939
        if s_img.offline:
2940
          inst_nodes_offline.append(snode)
2941

    
2942
      # warn that the instance lives on offline nodes
2943
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2944
               "instance has offline secondary node(s) %s",
2945
               utils.CommaJoin(inst_nodes_offline))
2946
      # ... or ghost/non-vm_capable nodes
2947
      for node in inst_config.all_nodes:
2948
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2949
                 "instance lives on ghost node %s", node)
2950
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2951
                 instance, "instance lives on non-vm_capable node %s", node)
2952

    
2953
    feedback_fn("* Verifying orphan volumes")
2954
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2955

    
2956
    # We will get spurious "unknown volume" warnings if any node of this group
2957
    # is secondary for an instance whose primary is in another group. To avoid
2958
    # them, we find these instances and add their volumes to node_vol_should.
2959
    for inst in self.all_inst_info.values():
2960
      for secondary in inst.secondary_nodes:
2961
        if (secondary in self.my_node_info
2962
            and inst.name not in self.my_inst_info):
2963
          inst.MapLVsByNode(node_vol_should)
2964
          break
2965

    
2966
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2967

    
2968
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2969
      feedback_fn("* Verifying N+1 Memory redundancy")
2970
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2971

    
2972
    feedback_fn("* Other Notes")
2973
    if i_non_redundant:
2974
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2975
                  % len(i_non_redundant))
2976

    
2977
    if i_non_a_balanced:
2978
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2979
                  % len(i_non_a_balanced))
2980

    
2981
    if n_offline:
2982
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2983

    
2984
    if n_drained:
2985
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2986

    
2987
    return not self.bad
2988

    
2989
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2990
    """Analyze the post-hooks' result
2991

2992
    This method analyses the hook result, handles it, and sends some
2993
    nicely-formatted feedback back to the user.
2994

2995
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2996
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2997
    @param hooks_results: the results of the multi-node hooks rpc call
2998
    @param feedback_fn: function used send feedback back to the caller
2999
    @param lu_result: previous Exec result
3000
    @return: the new Exec result, based on the previous result
3001
        and hook results
3002

3003
    """
3004
    # We only really run POST phase hooks, only for non-empty groups,
3005
    # and are only interested in their results
3006
    if not self.my_node_names:
3007
      # empty node group
3008
      pass
3009
    elif phase == constants.HOOKS_PHASE_POST:
3010
      # Used to change hooks' output to proper indentation
3011
      feedback_fn("* Hooks Results")
3012
      assert hooks_results, "invalid result from hooks"
3013

    
3014
      for node_name in hooks_results:
3015
        res = hooks_results[node_name]
3016
        msg = res.fail_msg
3017
        test = msg and not res.offline
3018
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
3019
                      "Communication failure in hooks execution: %s", msg)
3020
        if res.offline or msg:
3021
          # No need to investigate payload if node is offline or gave
3022
          # an error.
3023
          continue
3024
        for script, hkr, output in res.payload:
3025
          test = hkr == constants.HKR_FAIL
3026
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
3027
                        "Script %s failed, output:", script)
3028
          if test:
3029
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3030
            feedback_fn("%s" % output)
3031
            lu_result = False
3032

    
3033
    return lu_result
3034

    
3035

    
3036
class LUClusterVerifyDisks(NoHooksLU):
3037
  """Verifies the cluster disks status.
3038

3039
  """
3040
  REQ_BGL = False
3041

    
3042
  def ExpandNames(self):
3043
    self.share_locks = _ShareAll()
3044
    self.needed_locks = {
3045
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3046
      }
3047

    
3048
  def Exec(self, feedback_fn):
3049
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3050

    
3051
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3052
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3053
                           for group in group_names])
3054

    
3055

    
3056
class LUGroupVerifyDisks(NoHooksLU):
3057
  """Verifies the status of all disks in a node group.
3058

3059
  """
3060
  REQ_BGL = False
3061

    
3062
  def ExpandNames(self):
3063
    # Raises errors.OpPrereqError on its own if group can't be found
3064
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3065

    
3066
    self.share_locks = _ShareAll()
3067
    self.needed_locks = {
3068
      locking.LEVEL_INSTANCE: [],
3069
      locking.LEVEL_NODEGROUP: [],
3070
      locking.LEVEL_NODE: [],
3071
      }
3072

    
3073
  def DeclareLocks(self, level):
3074
    if level == locking.LEVEL_INSTANCE:
3075
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3076

    
3077
      # Lock instances optimistically, needs verification once node and group
3078
      # locks have been acquired
3079
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3080
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3081

    
3082
    elif level == locking.LEVEL_NODEGROUP:
3083
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3084

    
3085
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3086
        set([self.group_uuid] +
3087
            # Lock all groups used by instances optimistically; this requires
3088
            # going via the node before it's locked, requiring verification
3089
            # later on
3090
            [group_uuid
3091
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3092
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3093

    
3094
    elif level == locking.LEVEL_NODE:
3095
      # This will only lock the nodes in the group to be verified which contain
3096
      # actual instances
3097
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3098
      self._LockInstancesNodes()
3099

    
3100
      # Lock all nodes in group to be verified
3101
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3102
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3103
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3104

    
3105
  def CheckPrereq(self):
3106
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3107
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3108
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3109

    
3110
    assert self.group_uuid in owned_groups
3111

    
3112
    # Check if locked instances are still correct
3113
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3114

    
3115
    # Get instance information
3116
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3117

    
3118
    # Check if node groups for locked instances are still correct
3119
    for (instance_name, inst) in self.instances.items():
3120
      assert owned_nodes.issuperset(inst.all_nodes), \
3121
        "Instance %s's nodes changed while we kept the lock" % instance_name
3122

    
3123
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3124
                                             owned_groups)
3125

    
3126
      assert self.group_uuid in inst_groups, \
3127
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3128

    
3129
  def Exec(self, feedback_fn):
3130
    """Verify integrity of cluster disks.
3131

3132
    @rtype: tuple of three items
3133
    @return: a tuple of (dict of node-to-node_error, list of instances
3134
        which need activate-disks, dict of instance: (node, volume) for
3135
        missing volumes
3136

3137
    """
3138
    res_nodes = {}
3139
    res_instances = set()
3140
    res_missing = {}
3141

    
3142
    nv_dict = _MapInstanceDisksToNodes([inst
3143
                                        for inst in self.instances.values()
3144
                                        if inst.admin_up])
3145

    
3146
    if nv_dict:
3147
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3148
                             set(self.cfg.GetVmCapableNodeList()))
3149

    
3150
      node_lvs = self.rpc.call_lv_list(nodes, [])
3151

    
3152
      for (node, node_res) in node_lvs.items():
3153
        if node_res.offline:
3154
          continue
3155

    
3156
        msg = node_res.fail_msg
3157
        if msg:
3158
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3159
          res_nodes[node] = msg
3160
          continue
3161

    
3162
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3163
          inst = nv_dict.pop((node, lv_name), None)
3164
          if not (lv_online or inst is None):
3165
            res_instances.add(inst)
3166

    
3167
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3168
      # better
3169
      for key, inst in nv_dict.iteritems():
3170
        res_missing.setdefault(inst, []).append(list(key))
3171

    
3172
    return (res_nodes, list(res_instances), res_missing)
3173

    
3174

    
3175
class LUClusterRepairDiskSizes(NoHooksLU):
3176
  """Verifies the cluster disks sizes.
3177

3178
  """
3179
  REQ_BGL = False
3180

    
3181
  def ExpandNames(self):
3182
    if self.op.instances:
3183
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3184
      self.needed_locks = {
3185
        locking.LEVEL_NODE: [],
3186
        locking.LEVEL_INSTANCE: self.wanted_names,
3187
        }
3188
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3189
    else:
3190
      self.wanted_names = None
3191
      self.needed_locks = {
3192
        locking.LEVEL_NODE: locking.ALL_SET,
3193
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3194
        }
3195
    self.share_locks = {
3196
      locking.LEVEL_NODE: 1,
3197
      locking.LEVEL_INSTANCE: 0,
3198
      }
3199

    
3200
  def DeclareLocks(self, level):
3201
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3202
      self._LockInstancesNodes(primary_only=True)
3203

    
3204
  def CheckPrereq(self):
3205
    """Check prerequisites.
3206

3207
    This only checks the optional instance list against the existing names.
3208

3209
    """
3210
    if self.wanted_names is None:
3211
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3212

    
3213
    self.wanted_instances = \
3214
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3215

    
3216
  def _EnsureChildSizes(self, disk):
3217
    """Ensure children of the disk have the needed disk size.
3218

3219
    This is valid mainly for DRBD8 and fixes an issue where the
3220
    children have smaller disk size.
3221

3222
    @param disk: an L{ganeti.objects.Disk} object
3223

3224
    """
3225
    if disk.dev_type == constants.LD_DRBD8:
3226
      assert disk.children, "Empty children for DRBD8?"
3227
      fchild = disk.children[0]
3228
      mismatch = fchild.size < disk.size
3229
      if mismatch:
3230
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3231
                     fchild.size, disk.size)
3232
        fchild.size = disk.size
3233

    
3234
      # and we recurse on this child only, not on the metadev
3235
      return self._EnsureChildSizes(fchild) or mismatch
3236
    else:
3237
      return False
3238

    
3239
  def Exec(self, feedback_fn):
3240
    """Verify the size of cluster disks.
3241

3242
    """
3243
    # TODO: check child disks too
3244
    # TODO: check differences in size between primary/secondary nodes
3245
    per_node_disks = {}
3246
    for instance in self.wanted_instances:
3247
      pnode = instance.primary_node
3248
      if pnode not in per_node_disks:
3249
        per_node_disks[pnode] = []
3250
      for idx, disk in enumerate(instance.disks):
3251
        per_node_disks[pnode].append((instance, idx, disk))
3252

    
3253
    changed = []
3254
    for node, dskl in per_node_disks.items():
3255
      newl = [v[2].Copy() for v in dskl]
3256
      for dsk in newl:
3257
        self.cfg.SetDiskID(dsk, node)
3258
      result = self.rpc.call_blockdev_getsize(node, newl)
3259
      if result.fail_msg:
3260
        self.LogWarning("Failure in blockdev_getsize call to node"
3261
                        " %s, ignoring", node)
3262
        continue
3263
      if len(result.payload) != len(dskl):
3264
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3265
                        " result.payload=%s", node, len(dskl), result.payload)
3266
        self.LogWarning("Invalid result from node %s, ignoring node results",
3267
                        node)
3268
        continue
3269
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3270
        if size is None:
3271
          self.LogWarning("Disk %d of instance %s did not return size"
3272
                          " information, ignoring", idx, instance.name)
3273
          continue
3274
        if not isinstance(size, (int, long)):
3275
          self.LogWarning("Disk %d of instance %s did not return valid"
3276
                          " size information, ignoring", idx, instance.name)
3277
          continue
3278
        size = size >> 20
3279
        if size != disk.size:
3280
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3281
                       " correcting: recorded %d, actual %d", idx,
3282
                       instance.name, disk.size, size)
3283
          disk.size = size
3284
          self.cfg.Update(instance, feedback_fn)
3285
          changed.append((instance.name, idx, size))
3286
        if self._EnsureChildSizes(disk):
3287
          self.cfg.Update(instance, feedback_fn)
3288
          changed.append((instance.name, idx, disk.size))
3289
    return changed
3290

    
3291

    
3292
class LUClusterRename(LogicalUnit):
3293
  """Rename the cluster.
3294

3295
  """
3296
  HPATH = "cluster-rename"
3297
  HTYPE = constants.HTYPE_CLUSTER
3298

    
3299
  def BuildHooksEnv(self):
3300
    """Build hooks env.
3301

3302
    """
3303
    return {
3304
      "OP_TARGET": self.cfg.GetClusterName(),
3305
      "NEW_NAME": self.op.name,
3306
      }
3307

    
3308
  def BuildHooksNodes(self):
3309
    """Build hooks nodes.
3310

3311
    """
3312
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3313

    
3314
  def CheckPrereq(self):
3315
    """Verify that the passed name is a valid one.
3316

3317
    """
3318
    hostname = netutils.GetHostname(name=self.op.name,
3319
                                    family=self.cfg.GetPrimaryIPFamily())
3320

    
3321
    new_name = hostname.name
3322
    self.ip = new_ip = hostname.ip
3323
    old_name = self.cfg.GetClusterName()
3324
    old_ip = self.cfg.GetMasterIP()
3325
    if new_name == old_name and new_ip == old_ip:
3326
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3327
                                 " cluster has changed",
3328
                                 errors.ECODE_INVAL)
3329
    if new_ip != old_ip:
3330
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3331
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3332
                                   " reachable on the network" %
3333
                                   new_ip, errors.ECODE_NOTUNIQUE)
3334

    
3335
    self.op.name = new_name
3336

    
3337
  def Exec(self, feedback_fn):
3338
    """Rename the cluster.
3339

3340
    """
3341
    clustername = self.op.name
3342
    ip = self.ip
3343

    
3344
    # shutdown the master IP
3345
    master = self.cfg.GetMasterNode()
3346
    result = self.rpc.call_node_deactivate_master_ip(master)
3347
    result.Raise("Could not disable the master role")
3348

    
3349
    try:
3350
      cluster = self.cfg.GetClusterInfo()
3351
      cluster.cluster_name = clustername
3352
      cluster.master_ip = ip
3353
      self.cfg.Update(cluster, feedback_fn)
3354

    
3355
      # update the known hosts file
3356
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3357
      node_list = self.cfg.GetOnlineNodeList()
3358
      try:
3359
        node_list.remove(master)
3360
      except ValueError:
3361
        pass
3362
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3363
    finally:
3364
      result = self.rpc.call_node_activate_master_ip(master)
3365
      msg = result.fail_msg
3366
      if msg:
3367
        self.LogWarning("Could not re-enable the master role on"
3368
                        " the master, please restart manually: %s", msg)
3369

    
3370
    return clustername
3371

    
3372

    
3373
class LUClusterSetParams(LogicalUnit):
3374
  """Change the parameters of the cluster.
3375

3376
  """
3377
  HPATH = "cluster-modify"
3378
  HTYPE = constants.HTYPE_CLUSTER
3379
  REQ_BGL = False
3380

    
3381
  def CheckArguments(self):
3382
    """Check parameters
3383

3384
    """
3385
    if self.op.uid_pool:
3386
      uidpool.CheckUidPool(self.op.uid_pool)
3387

    
3388
    if self.op.add_uids:
3389
      uidpool.CheckUidPool(self.op.add_uids)
3390

    
3391
    if self.op.remove_uids:
3392
      uidpool.CheckUidPool(self.op.remove_uids)
3393

    
3394
  def ExpandNames(self):
3395
    # FIXME: in the future maybe other cluster params won't require checking on
3396
    # all nodes to be modified.
3397
    self.needed_locks = {
3398
      locking.LEVEL_NODE: locking.ALL_SET,
3399
    }
3400
    self.share_locks[locking.LEVEL_NODE] = 1
3401

    
3402
  def BuildHooksEnv(self):
3403
    """Build hooks env.
3404

3405
    """
3406
    return {
3407
      "OP_TARGET": self.cfg.GetClusterName(),
3408
      "NEW_VG_NAME": self.op.vg_name,
3409
      }
3410

    
3411
  def BuildHooksNodes(self):
3412
    """Build hooks nodes.
3413

3414
    """
3415
    mn = self.cfg.GetMasterNode()
3416
    return ([mn], [mn])
3417

    
3418
  def CheckPrereq(self):
3419
    """Check prerequisites.
3420

3421
    This checks whether the given params don't conflict and
3422
    if the given volume group is valid.
3423

3424
    """
3425
    if self.op.vg_name is not None and not self.op.vg_name:
3426
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3427
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3428
                                   " instances exist", errors.ECODE_INVAL)
3429

    
3430
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3431
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3432
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3433
                                   " drbd-based instances exist",
3434
                                   errors.ECODE_INVAL)
3435

    
3436
    node_list = self.owned_locks(locking.LEVEL_NODE)
3437

    
3438
    # if vg_name not None, checks given volume group on all nodes
3439
    if self.op.vg_name:
3440
      vglist = self.rpc.call_vg_list(node_list)
3441
      for node in node_list:
3442
        msg = vglist[node].fail_msg
3443
        if msg:
3444
          # ignoring down node
3445
          self.LogWarning("Error while gathering data on node %s"
3446
                          " (ignoring node): %s", node, msg)
3447
          continue
3448
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3449
                                              self.op.vg_name,
3450
                                              constants.MIN_VG_SIZE)
3451
        if vgstatus:
3452
          raise errors.OpPrereqError("Error on node '%s': %s" %
3453
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3454

    
3455
    if self.op.drbd_helper:
3456
      # checks given drbd helper on all nodes
3457
      helpers = self.rpc.call_drbd_helper(node_list)
3458
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3459
        if ninfo.offline:
3460
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3461
          continue
3462
        msg = helpers[node].fail_msg
3463
        if msg:
3464
          raise errors.OpPrereqError("Error checking drbd helper on node"
3465
                                     " '%s': %s" % (node, msg),
3466
                                     errors.ECODE_ENVIRON)
3467
        node_helper = helpers[node].payload
3468
        if node_helper != self.op.drbd_helper:
3469
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3470
                                     (node, node_helper), errors.ECODE_ENVIRON)
3471

    
3472
    self.cluster = cluster = self.cfg.GetClusterInfo()
3473
    # validate params changes
3474
    if self.op.beparams:
3475
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3476
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3477

    
3478
    if self.op.ndparams:
3479
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3480
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3481

    
3482
      # TODO: we need a more general way to handle resetting
3483
      # cluster-level parameters to default values
3484
      if self.new_ndparams["oob_program"] == "":
3485
        self.new_ndparams["oob_program"] = \
3486
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3487

    
3488
    if self.op.nicparams:
3489
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3490
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3491
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3492
      nic_errors = []
3493

    
3494
      # check all instances for consistency
3495
      for instance in self.cfg.GetAllInstancesInfo().values():
3496
        for nic_idx, nic in enumerate(instance.nics):
3497
          params_copy = copy.deepcopy(nic.nicparams)
3498
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3499

    
3500
          # check parameter syntax
3501
          try:
3502
            objects.NIC.CheckParameterSyntax(params_filled)
3503
          except errors.ConfigurationError, err:
3504
            nic_errors.append("Instance %s, nic/%d: %s" %
3505
                              (instance.name, nic_idx, err))
3506

    
3507
          # if we're moving instances to routed, check that they have an ip
3508
          target_mode = params_filled[constants.NIC_MODE]
3509
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3510
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3511
                              " address" % (instance.name, nic_idx))
3512
      if nic_errors:
3513
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3514
                                   "\n".join(nic_errors))
3515

    
3516
    # hypervisor list/parameters
3517
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3518
    if self.op.hvparams:
3519
      for hv_name, hv_dict in self.op.hvparams.items():
3520
        if hv_name not in self.new_hvparams:
3521
          self.new_hvparams[hv_name] = hv_dict
3522
        else:
3523
          self.new_hvparams[hv_name].update(hv_dict)
3524

    
3525
    # os hypervisor parameters
3526
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3527
    if self.op.os_hvp:
3528
      for os_name, hvs in self.op.os_hvp.items():
3529
        if os_name not in self.new_os_hvp:
3530
          self.new_os_hvp[os_name] = hvs
3531
        else:
3532
          for hv_name, hv_dict in hvs.items():
3533
            if hv_name not in self.new_os_hvp[os_name]:
3534
              self.new_os_hvp[os_name][hv_name] = hv_dict
3535
            else:
3536
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3537

    
3538
    # os parameters
3539
    self.new_osp = objects.FillDict(cluster.osparams, {})
3540
    if self.op.osparams:
3541
      for os_name, osp in self.op.osparams.items():
3542
        if os_name not in self.new_osp:
3543
          self.new_osp[os_name] = {}
3544

    
3545
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3546
                                                  use_none=True)
3547

    
3548
        if not self.new_osp[os_name]:
3549
          # we removed all parameters
3550
          del self.new_osp[os_name]
3551
        else:
3552
          # check the parameter validity (remote check)
3553
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3554
                         os_name, self.new_osp[os_name])
3555

    
3556
    # changes to the hypervisor list
3557
    if self.op.enabled_hypervisors is not None:
3558
      self.hv_list = self.op.enabled_hypervisors
3559
      for hv in self.hv_list:
3560
        # if the hypervisor doesn't already exist in the cluster
3561
        # hvparams, we initialize it to empty, and then (in both
3562
        # cases) we make sure to fill the defaults, as we might not
3563
        # have a complete defaults list if the hypervisor wasn't
3564
        # enabled before
3565
        if hv not in new_hvp:
3566
          new_hvp[hv] = {}
3567
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3568
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3569
    else:
3570
      self.hv_list = cluster.enabled_hypervisors
3571

    
3572
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3573
      # either the enabled list has changed, or the parameters have, validate
3574
      for hv_name, hv_params in self.new_hvparams.items():
3575
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3576
            (self.op.enabled_hypervisors and
3577
             hv_name in self.op.enabled_hypervisors)):
3578
          # either this is a new hypervisor, or its parameters have changed
3579
          hv_class = hypervisor.GetHypervisor(hv_name)
3580
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3581
          hv_class.CheckParameterSyntax(hv_params)
3582
          _CheckHVParams(self, node_list, hv_name, hv_params)
3583

    
3584
    if self.op.os_hvp:
3585
      # no need to check any newly-enabled hypervisors, since the
3586
      # defaults have already been checked in the above code-block
3587
      for os_name, os_hvp in self.new_os_hvp.items():
3588
        for hv_name, hv_params in os_hvp.items():
3589
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3590
          # we need to fill in the new os_hvp on top of the actual hv_p
3591
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3592
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3593
          hv_class = hypervisor.GetHypervisor(hv_name)
3594
          hv_class.CheckParameterSyntax(new_osp)
3595
          _CheckHVParams(self, node_list, hv_name, new_osp)
3596

    
3597
    if self.op.default_iallocator:
3598
      alloc_script = utils.FindFile(self.op.default_iallocator,
3599
                                    constants.IALLOCATOR_SEARCH_PATH,
3600
                                    os.path.isfile)
3601
      if alloc_script is None:
3602
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3603
                                   " specified" % self.op.default_iallocator,
3604
                                   errors.ECODE_INVAL)
3605

    
3606
  def Exec(self, feedback_fn):
3607
    """Change the parameters of the cluster.
3608

3609
    """
3610
    if self.op.vg_name is not None:
3611
      new_volume = self.op.vg_name
3612
      if not new_volume:
3613
        new_volume = None
3614
      if new_volume != self.cfg.GetVGName():
3615
        self.cfg.SetVGName(new_volume)
3616
      else:
3617
        feedback_fn("Cluster LVM configuration already in desired"
3618
                    " state, not changing")
3619
    if self.op.drbd_helper is not None:
3620
      new_helper = self.op.drbd_helper
3621
      if not new_helper:
3622
        new_helper = None
3623
      if new_helper != self.cfg.GetDRBDHelper():
3624
        self.cfg.SetDRBDHelper(new_helper)
3625
      else:
3626
        feedback_fn("Cluster DRBD helper already in desired state,"
3627
                    " not changing")
3628
    if self.op.hvparams:
3629
      self.cluster.hvparams = self.new_hvparams
3630
    if self.op.os_hvp:
3631
      self.cluster.os_hvp = self.new_os_hvp
3632
    if self.op.enabled_hypervisors is not None:
3633
      self.cluster.hvparams = self.new_hvparams
3634
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3635
    if self.op.beparams:
3636
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3637
    if self.op.nicparams:
3638
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3639
    if self.op.osparams:
3640
      self.cluster.osparams = self.new_osp
3641
    if self.op.ndparams:
3642
      self.cluster.ndparams = self.new_ndparams
3643

    
3644
    if self.op.candidate_pool_size is not None:
3645
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3646
      # we need to update the pool size here, otherwise the save will fail
3647
      _AdjustCandidatePool(self, [])
3648

    
3649
    if self.op.maintain_node_health is not None:
3650
      self.cluster.maintain_node_health = self.op.maintain_node_health
3651

    
3652
    if self.op.prealloc_wipe_disks is not None:
3653
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3654

    
3655
    if self.op.add_uids is not None:
3656
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3657

    
3658
    if self.op.remove_uids is not None:
3659
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3660

    
3661
    if self.op.uid_pool is not None:
3662
      self.cluster.uid_pool = self.op.uid_pool
3663

    
3664
    if self.op.default_iallocator is not None:
3665
      self.cluster.default_iallocator = self.op.default_iallocator
3666

    
3667
    if self.op.reserved_lvs is not None:
3668
      self.cluster.reserved_lvs = self.op.reserved_lvs
3669

    
3670
    def helper_os(aname, mods, desc):
3671
      desc += " OS list"
3672
      lst = getattr(self.cluster, aname)
3673
      for key, val in mods:
3674
        if key == constants.DDM_ADD:
3675
          if val in lst:
3676
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3677
          else:
3678
            lst.append(val)
3679
        elif key == constants.DDM_REMOVE:
3680
          if val in lst:
3681
            lst.remove(val)
3682
          else:
3683
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3684
        else:
3685
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3686

    
3687
    if self.op.hidden_os:
3688
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3689

    
3690
    if self.op.blacklisted_os:
3691
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3692

    
3693
    if self.op.master_netdev:
3694
      master = self.cfg.GetMasterNode()
3695
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3696
                  self.cluster.master_netdev)
3697
      result = self.rpc.call_node_deactivate_master_ip(master)
3698
      result.Raise("Could not disable the master ip")
3699
      feedback_fn("Changing master_netdev from %s to %s" %
3700
                  (self.cluster.master_netdev, self.op.master_netdev))
3701
      self.cluster.master_netdev = self.op.master_netdev
3702

    
3703
    self.cfg.Update(self.cluster, feedback_fn)
3704

    
3705
    if self.op.master_netdev:
3706
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3707
                  self.op.master_netdev)
3708
      result = self.rpc.call_node_activate_master_ip(master)
3709
      if result.fail_msg:
3710
        self.LogWarning("Could not re-enable the master ip on"
3711
                        " the master, please restart manually: %s",
3712
                        result.fail_msg)
3713

    
3714

    
3715
def _UploadHelper(lu, nodes, fname):
3716
  """Helper for uploading a file and showing warnings.
3717

3718
  """
3719
  if os.path.exists(fname):
3720
    result = lu.rpc.call_upload_file(nodes, fname)
3721
    for to_node, to_result in result.items():
3722
      msg = to_result.fail_msg
3723
      if msg:
3724
        msg = ("Copy of file %s to node %s failed: %s" %
3725
               (fname, to_node, msg))
3726
        lu.proc.LogWarning(msg)
3727

    
3728

    
3729
def _ComputeAncillaryFiles(cluster, redist):
3730
  """Compute files external to Ganeti which need to be consistent.
3731

3732
  @type redist: boolean
3733
  @param redist: Whether to include files which need to be redistributed
3734

3735
  """
3736
  # Compute files for all nodes
3737
  files_all = set([
3738
    constants.SSH_KNOWN_HOSTS_FILE,
3739
    constants.CONFD_HMAC_KEY,
3740
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3741
    constants.RAPI_USERS_FILE,
3742
    ])
3743

    
3744
  if not redist:
3745
    files_all.update(constants.ALL_CERT_FILES)
3746
    files_all.update(ssconf.SimpleStore().GetFileList())
3747
  else:
3748
    # we need to ship at least the RAPI certificate
3749
    files_all.add(constants.RAPI_CERT_FILE)
3750

    
3751
  if cluster.modify_etc_hosts:
3752
    files_all.add(constants.ETC_HOSTS)
3753

    
3754
  # Files which are optional, these must:
3755
  # - be present in one other category as well
3756
  # - either exist or not exist on all nodes of that category (mc, vm all)
3757
  files_opt = set([
3758
    constants.RAPI_USERS_FILE,
3759
    ])
3760

    
3761
  # Files which should only be on master candidates
3762
  files_mc = set()
3763
  if not redist:
3764
    files_mc.add(constants.CLUSTER_CONF_FILE)
3765

    
3766
  # Files which should only be on VM-capable nodes
3767
  files_vm = set(filename
3768
    for hv_name in cluster.enabled_hypervisors
3769
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3770

    
3771
  files_opt |= set(filename
3772
    for hv_name in cluster.enabled_hypervisors
3773
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3774

    
3775
  # Filenames in each category must be unique
3776
  all_files_set = files_all | files_mc | files_vm
3777
  assert (len(all_files_set) ==
3778
          sum(map(len, [files_all, files_mc, files_vm]))), \
3779
         "Found file listed in more than one file list"
3780

    
3781
  # Optional files must be present in one other category
3782
  assert all_files_set.issuperset(files_opt), \
3783
         "Optional file not in a different required list"
3784

    
3785
  return (files_all, files_opt, files_mc, files_vm)
3786

    
3787

    
3788
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3789
  """Distribute additional files which are part of the cluster configuration.
3790

3791
  ConfigWriter takes care of distributing the config and ssconf files, but
3792
  there are more files which should be distributed to all nodes. This function
3793
  makes sure those are copied.
3794

3795
  @param lu: calling logical unit
3796
  @param additional_nodes: list of nodes not in the config to distribute to
3797
  @type additional_vm: boolean
3798
  @param additional_vm: whether the additional nodes are vm-capable or not
3799

3800
  """
3801
  # Gather target nodes
3802
  cluster = lu.cfg.GetClusterInfo()
3803
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3804

    
3805
  online_nodes = lu.cfg.GetOnlineNodeList()
3806
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3807

    
3808
  if additional_nodes is not None:
3809
    online_nodes.extend(additional_nodes)
3810
    if additional_vm:
3811
      vm_nodes.extend(additional_nodes)
3812

    
3813
  # Never distribute to master node
3814
  for nodelist in [online_nodes, vm_nodes]:
3815
    if master_info.name in nodelist:
3816
      nodelist.remove(master_info.name)
3817

    
3818
  # Gather file lists
3819
  (files_all, _, files_mc, files_vm) = \
3820
    _ComputeAncillaryFiles(cluster, True)
3821

    
3822
  # Never re-distribute configuration file from here
3823
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3824
              constants.CLUSTER_CONF_FILE in files_vm)
3825
  assert not files_mc, "Master candidates not handled in this function"
3826

    
3827
  filemap = [
3828
    (online_nodes, files_all),
3829
    (vm_nodes, files_vm),
3830
    ]
3831

    
3832
  # Upload the files
3833
  for (node_list, files) in filemap:
3834
    for fname in files:
3835
      _UploadHelper(lu, node_list, fname)
3836

    
3837

    
3838
class LUClusterRedistConf(NoHooksLU):
3839
  """Force the redistribution of cluster configuration.
3840

3841
  This is a very simple LU.
3842

3843
  """
3844
  REQ_BGL = False
3845

    
3846
  def ExpandNames(self):
3847
    self.needed_locks = {
3848
      locking.LEVEL_NODE: locking.ALL_SET,
3849
    }
3850
    self.share_locks[locking.LEVEL_NODE] = 1
3851

    
3852
  def Exec(self, feedback_fn):
3853
    """Redistribute the configuration.
3854

3855
    """
3856
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3857
    _RedistributeAncillaryFiles(self)
3858

    
3859

    
3860
class LUClusterActivateMasterIp(NoHooksLU):
3861
  """Activate the master IP on the master node.
3862

3863
  """
3864
  def Exec(self, feedback_fn):
3865
    """Activate the master IP.
3866

3867
    """
3868
    master = self.cfg.GetMasterNode()
3869
    result = self.rpc.call_node_activate_master_ip(master)
3870
    result.Raise("Could not activate the master IP")
3871

    
3872

    
3873
class LUClusterDeactivateMasterIp(NoHooksLU):
3874
  """Deactivate the master IP on the master node.
3875

3876
  """
3877
  def Exec(self, feedback_fn):
3878
    """Deactivate the master IP.
3879

3880
    """
3881
    master = self.cfg.GetMasterNode()
3882
    result = self.rpc.call_node_deactivate_master_ip(master)
3883
    result.Raise("Could not deactivate the master IP")
3884

    
3885

    
3886
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3887
  """Sleep and poll for an instance's disk to sync.
3888

3889
  """
3890
  if not instance.disks or disks is not None and not disks:
3891
    return True
3892

    
3893
  disks = _ExpandCheckDisks(instance, disks)
3894

    
3895
  if not oneshot:
3896
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3897

    
3898
  node = instance.primary_node
3899

    
3900
  for dev in disks:
3901
    lu.cfg.SetDiskID(dev, node)
3902

    
3903
  # TODO: Convert to utils.Retry
3904

    
3905
  retries = 0
3906
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3907
  while True:
3908
    max_time = 0
3909
    done = True
3910
    cumul_degraded = False
3911
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3912
    msg = rstats.fail_msg
3913
    if msg:
3914
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3915
      retries += 1
3916
      if retries >= 10:
3917
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3918
                                 " aborting." % node)
3919
      time.sleep(6)
3920
      continue
3921
    rstats = rstats.payload
3922
    retries = 0
3923
    for i, mstat in enumerate(rstats):
3924
      if mstat is None:
3925
        lu.LogWarning("Can't compute data for node %s/%s",
3926
                           node, disks[i].iv_name)
3927
        continue
3928

    
3929
      cumul_degraded = (cumul_degraded or
3930
                        (mstat.is_degraded and mstat.sync_percent is None))
3931
      if mstat.sync_percent is not None:
3932
        done = False
3933
        if mstat.estimated_time is not None:
3934
          rem_time = ("%s remaining (estimated)" %
3935
                      utils.FormatSeconds(mstat.estimated_time))
3936
          max_time = mstat.estimated_time
3937
        else:
3938
          rem_time = "no time estimate"
3939
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3940
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3941

    
3942
    # if we're done but degraded, let's do a few small retries, to
3943
    # make sure we see a stable and not transient situation; therefore
3944
    # we force restart of the loop
3945
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3946
      logging.info("Degraded disks found, %d retries left", degr_retries)
3947
      degr_retries -= 1
3948
      time.sleep(1)
3949
      continue
3950

    
3951
    if done or oneshot:
3952
      break
3953

    
3954
    time.sleep(min(60, max_time))
3955

    
3956
  if done:
3957
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3958
  return not cumul_degraded
3959

    
3960

    
3961
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3962
  """Check that mirrors are not degraded.
3963

3964
  The ldisk parameter, if True, will change the test from the
3965
  is_degraded attribute (which represents overall non-ok status for
3966
  the device(s)) to the ldisk (representing the local storage status).
3967

3968
  """
3969
  lu.cfg.SetDiskID(dev, node)
3970

    
3971
  result = True
3972

    
3973
  if on_primary or dev.AssembleOnSecondary():
3974
    rstats = lu.rpc.call_blockdev_find(node, dev)
3975
    msg = rstats.fail_msg
3976
    if msg:
3977
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3978
      result = False
3979
    elif not rstats.payload:
3980
      lu.LogWarning("Can't find disk on node %s", node)
3981
      result = False
3982
    else:
3983
      if ldisk:
3984
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3985
      else:
3986
        result = result and not rstats.payload.is_degraded
3987

    
3988
  if dev.children:
3989
    for child in dev.children:
3990
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3991

    
3992
  return result
3993

    
3994

    
3995
class LUOobCommand(NoHooksLU):
3996
  """Logical unit for OOB handling.
3997

3998
  """
3999
  REG_BGL = False
4000
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4001

    
4002
  def ExpandNames(self):
4003
    """Gather locks we need.
4004

4005
    """
4006
    if self.op.node_names:
4007
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4008
      lock_names = self.op.node_names
4009
    else:
4010
      lock_names = locking.ALL_SET
4011

    
4012
    self.needed_locks = {
4013
      locking.LEVEL_NODE: lock_names,
4014
      }
4015

    
4016
  def CheckPrereq(self):
4017
    """Check prerequisites.
4018

4019
    This checks:
4020
     - the node exists in the configuration
4021
     - OOB is supported
4022

4023
    Any errors are signaled by raising errors.OpPrereqError.
4024

4025
    """
4026
    self.nodes = []
4027
    self.master_node = self.cfg.GetMasterNode()
4028

    
4029
    assert self.op.power_delay >= 0.0
4030

    
4031
    if self.op.node_names:
4032
      if (self.op.command in self._SKIP_MASTER and
4033
          self.master_node in self.op.node_names):
4034
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4035
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4036

    
4037
        if master_oob_handler:
4038
          additional_text = ("run '%s %s %s' if you want to operate on the"
4039
                             " master regardless") % (master_oob_handler,
4040
                                                      self.op.command,
4041
                                                      self.master_node)
4042
        else:
4043
          additional_text = "it does not support out-of-band operations"
4044

    
4045
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4046
                                    " allowed for %s; %s") %
4047
                                   (self.master_node, self.op.command,
4048
                                    additional_text), errors.ECODE_INVAL)
4049
    else:
4050
      self.op.node_names = self.cfg.GetNodeList()
4051
      if self.op.command in self._SKIP_MASTER:
4052
        self.op.node_names.remove(self.master_node)
4053

    
4054
    if self.op.command in self._SKIP_MASTER:
4055
      assert self.master_node not in self.op.node_names
4056

    
4057
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4058
      if node is None:
4059
        raise errors.OpPrereqError("Node %s not found" % node_name,
4060
                                   errors.ECODE_NOENT)
4061
      else:
4062
        self.nodes.append(node)
4063

    
4064
      if (not self.op.ignore_status and
4065
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4066
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4067
                                    " not marked offline") % node_name,
4068
                                   errors.ECODE_STATE)
4069

    
4070
  def Exec(self, feedback_fn):
4071
    """Execute OOB and return result if we expect any.
4072

4073
    """
4074
    master_node = self.master_node
4075
    ret = []
4076

    
4077
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4078
                                              key=lambda node: node.name)):
4079
      node_entry = [(constants.RS_NORMAL, node.name)]
4080
      ret.append(node_entry)
4081

    
4082
      oob_program = _SupportsOob(self.cfg, node)
4083

    
4084
      if not oob_program:
4085
        node_entry.append((constants.RS_UNAVAIL, None))
4086
        continue
4087

    
4088
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4089
                   self.op.command, oob_program, node.name)
4090
      result = self.rpc.call_run_oob(master_node, oob_program,
4091
                                     self.op.command, node.name,
4092
                                     self.op.timeout)
4093

    
4094
      if result.fail_msg:
4095
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4096
                        node.name, result.fail_msg)
4097
        node_entry.append((constants.RS_NODATA, None))
4098
      else:
4099
        try:
4100
          self._CheckPayload(result)
4101
        except errors.OpExecError, err:
4102
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4103
                          node.name, err)
4104
          node_entry.append((constants.RS_NODATA, None))
4105
        else:
4106
          if self.op.command == constants.OOB_HEALTH:
4107
            # For health we should log important events
4108
            for item, status in result.payload:
4109
              if status in [constants.OOB_STATUS_WARNING,
4110
                            constants.OOB_STATUS_CRITICAL]:
4111
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4112
                                item, node.name, status)
4113

    
4114
          if self.op.command == constants.OOB_POWER_ON:
4115
            node.powered = True
4116
          elif self.op.command == constants.OOB_POWER_OFF:
4117
            node.powered = False
4118
          elif self.op.command == constants.OOB_POWER_STATUS:
4119
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4120
            if powered != node.powered:
4121
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4122
                               " match actual power state (%s)"), node.powered,
4123
                              node.name, powered)
4124

    
4125
          # For configuration changing commands we should update the node
4126
          if self.op.command in (constants.OOB_POWER_ON,
4127
                                 constants.OOB_POWER_OFF):
4128
            self.cfg.Update(node, feedback_fn)
4129

    
4130
          node_entry.append((constants.RS_NORMAL, result.payload))
4131

    
4132
          if (self.op.command == constants.OOB_POWER_ON and
4133
              idx < len(self.nodes) - 1):
4134
            time.sleep(self.op.power_delay)
4135

    
4136
    return ret
4137

    
4138
  def _CheckPayload(self, result):
4139
    """Checks if the payload is valid.
4140

4141
    @param result: RPC result
4142
    @raises errors.OpExecError: If payload is not valid
4143

4144
    """
4145
    errs = []
4146
    if self.op.command == constants.OOB_HEALTH:
4147
      if not isinstance(result.payload, list):
4148
        errs.append("command 'health' is expected to return a list but got %s" %
4149
                    type(result.payload))
4150
      else:
4151
        for item, status in result.payload:
4152
          if status not in constants.OOB_STATUSES:
4153
            errs.append("health item '%s' has invalid status '%s'" %
4154
                        (item, status))
4155

    
4156
    if self.op.command == constants.OOB_POWER_STATUS:
4157
      if not isinstance(result.payload, dict):
4158
        errs.append("power-status is expected to return a dict but got %s" %
4159
                    type(result.payload))
4160

    
4161
    if self.op.command in [
4162
        constants.OOB_POWER_ON,
4163
        constants.OOB_POWER_OFF,
4164
        constants.OOB_POWER_CYCLE,
4165
        ]:
4166
      if result.payload is not None:
4167
        errs.append("%s is expected to not return payload but got '%s'" %
4168
                    (self.op.command, result.payload))
4169

    
4170
    if errs:
4171
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4172
                               utils.CommaJoin(errs))
4173

    
4174

    
4175
class _OsQuery(_QueryBase):
4176
  FIELDS = query.OS_FIELDS
4177

    
4178
  def ExpandNames(self, lu):
4179
    # Lock all nodes in shared mode
4180
    # Temporary removal of locks, should be reverted later
4181
    # TODO: reintroduce locks when they are lighter-weight
4182
    lu.needed_locks = {}
4183
    #self.share_locks[locking.LEVEL_NODE] = 1
4184
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4185

    
4186
    # The following variables interact with _QueryBase._GetNames
4187
    if self.names:
4188
      self.wanted = self.names
4189
    else:
4190
      self.wanted = locking.ALL_SET
4191

    
4192
    self.do_locking = self.use_locking
4193

    
4194
  def DeclareLocks(self, lu, level):
4195
    pass
4196

    
4197
  @staticmethod
4198
  def _DiagnoseByOS(rlist):
4199
    """Remaps a per-node return list into an a per-os per-node dictionary
4200

4201
    @param rlist: a map with node names as keys and OS objects as values
4202

4203
    @rtype: dict
4204
    @return: a dictionary with osnames as keys and as value another
4205
        map, with nodes as keys and tuples of (path, status, diagnose,
4206
        variants, parameters, api_versions) as values, eg::
4207

4208
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4209
                                     (/srv/..., False, "invalid api")],
4210
                           "node2": [(/srv/..., True, "", [], [])]}
4211
          }
4212

4213
    """
4214
    all_os = {}
4215
    # we build here the list of nodes that didn't fail the RPC (at RPC
4216
    # level), so that nodes with a non-responding node daemon don't
4217
    # make all OSes invalid
4218
    good_nodes = [node_name for node_name in rlist
4219
                  if not rlist[node_name].fail_msg]
4220
    for node_name, nr in rlist.items():
4221
      if nr.fail_msg or not nr.payload:
4222
        continue
4223
      for (name, path, status, diagnose, variants,
4224
           params, api_versions) in nr.payload:
4225
        if name not in all_os:
4226
          # build a list of nodes for this os containing empty lists
4227
          # for each node in node_list
4228
          all_os[name] = {}
4229
          for nname in good_nodes:
4230
            all_os[name][nname] = []
4231
        # convert params from [name, help] to (name, help)
4232
        params = [tuple(v) for v in params]
4233
        all_os[name][node_name].append((path, status, diagnose,
4234
                                        variants, params, api_versions))
4235
    return all_os
4236

    
4237
  def _GetQueryData(self, lu):
4238
    """Computes the list of nodes and their attributes.
4239

4240
    """
4241
    # Locking is not used
4242
    assert not (compat.any(lu.glm.is_owned(level)
4243
                           for level in locking.LEVELS
4244
                           if level != locking.LEVEL_CLUSTER) or
4245
                self.do_locking or self.use_locking)
4246

    
4247
    valid_nodes = [node.name
4248
                   for node in lu.cfg.GetAllNodesInfo().values()
4249
                   if not node.offline and node.vm_capable]
4250
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4251
    cluster = lu.cfg.GetClusterInfo()
4252

    
4253
    data = {}
4254

    
4255
    for (os_name, os_data) in pol.items():
4256
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4257
                          hidden=(os_name in cluster.hidden_os),
4258
                          blacklisted=(os_name in cluster.blacklisted_os))
4259

    
4260
      variants = set()
4261
      parameters = set()
4262
      api_versions = set()
4263

    
4264
      for idx, osl in enumerate(os_data.values()):
4265
        info.valid = bool(info.valid and osl and osl[0][1])
4266
        if not info.valid:
4267
          break
4268

    
4269
        (node_variants, node_params, node_api) = osl[0][3:6]
4270
        if idx == 0:
4271
          # First entry
4272
          variants.update(node_variants)
4273
          parameters.update(node_params)
4274
          api_versions.update(node_api)
4275
        else:
4276
          # Filter out inconsistent values
4277
          variants.intersection_update(node_variants)
4278
          parameters.intersection_update(node_params)
4279
          api_versions.intersection_update(node_api)
4280

    
4281
      info.variants = list(variants)
4282
      info.parameters = list(parameters)
4283
      info.api_versions = list(api_versions)
4284

    
4285
      data[os_name] = info
4286

    
4287
    # Prepare data in requested order
4288
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4289
            if name in data]
4290

    
4291

    
4292
class LUOsDiagnose(NoHooksLU):
4293
  """Logical unit for OS diagnose/query.
4294

4295
  """
4296
  REQ_BGL = False
4297

    
4298
  @staticmethod
4299
  def _BuildFilter(fields, names):
4300
    """Builds a filter for querying OSes.
4301

4302
    """
4303
    name_filter = qlang.MakeSimpleFilter("name", names)
4304

    
4305
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4306
    # respective field is not requested
4307
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4308
                     for fname in ["hidden", "blacklisted"]
4309
                     if fname not in fields]
4310
    if "valid" not in fields:
4311
      status_filter.append([qlang.OP_TRUE, "valid"])
4312

    
4313
    if status_filter:
4314
      status_filter.insert(0, qlang.OP_AND)
4315
    else:
4316
      status_filter = None
4317

    
4318
    if name_filter and status_filter:
4319
      return [qlang.OP_AND, name_filter, status_filter]
4320
    elif name_filter:
4321
      return name_filter
4322
    else:
4323
      return status_filter
4324

    
4325
  def CheckArguments(self):
4326
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4327
                       self.op.output_fields, False)
4328

    
4329
  def ExpandNames(self):
4330
    self.oq.ExpandNames(self)
4331

    
4332
  def Exec(self, feedback_fn):
4333
    return self.oq.OldStyleQuery(self)
4334

    
4335

    
4336
class LUNodeRemove(LogicalUnit):
4337
  """Logical unit for removing a node.
4338

4339
  """
4340
  HPATH = "node-remove"
4341
  HTYPE = constants.HTYPE_NODE
4342

    
4343
  def BuildHooksEnv(self):
4344
    """Build hooks env.
4345

4346
    This doesn't run on the target node in the pre phase as a failed
4347
    node would then be impossible to remove.
4348

4349
    """
4350
    return {
4351
      "OP_TARGET": self.op.node_name,
4352
      "NODE_NAME": self.op.node_name,
4353
      }
4354

    
4355
  def BuildHooksNodes(self):
4356
    """Build hooks nodes.
4357

4358
    """
4359
    all_nodes = self.cfg.GetNodeList()
4360
    try:
4361
      all_nodes.remove(self.op.node_name)
4362
    except ValueError:
4363
      logging.warning("Node '%s', which is about to be removed, was not found"
4364
                      " in the list of all nodes", self.op.node_name)
4365
    return (all_nodes, all_nodes)
4366

    
4367
  def CheckPrereq(self):
4368
    """Check prerequisites.
4369

4370
    This checks:
4371
     - the node exists in the configuration
4372
     - it does not have primary or secondary instances
4373
     - it's not the master
4374

4375
    Any errors are signaled by raising errors.OpPrereqError.
4376

4377
    """
4378
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4379
    node = self.cfg.GetNodeInfo(self.op.node_name)
4380
    assert node is not None
4381

    
4382
    masternode = self.cfg.GetMasterNode()
4383
    if node.name == masternode:
4384
      raise errors.OpPrereqError("Node is the master node, failover to another"
4385
                                 " node is required", errors.ECODE_INVAL)
4386

    
4387
    for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4388
      if node.name in instance.all_nodes:
4389
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4390
                                   " please remove first" % instance_name,
4391
                                   errors.ECODE_INVAL)
4392
    self.op.node_name = node.name
4393
    self.node = node
4394

    
4395
  def Exec(self, feedback_fn):
4396
    """Removes the node from the cluster.
4397

4398
    """
4399
    node = self.node
4400
    logging.info("Stopping the node daemon and removing configs from node %s",
4401
                 node.name)
4402

    
4403
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4404

    
4405
    # Promote nodes to master candidate as needed
4406
    _AdjustCandidatePool(self, exceptions=[node.name])
4407
    self.context.RemoveNode(node.name)
4408

    
4409
    # Run post hooks on the node before it's removed
4410
    _RunPostHook(self, node.name)
4411

    
4412
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4413
    msg = result.fail_msg
4414
    if msg:
4415
      self.LogWarning("Errors encountered on the remote node while leaving"
4416
                      " the cluster: %s", msg)
4417

    
4418
    # Remove node from our /etc/hosts
4419
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4420
      master_node = self.cfg.GetMasterNode()
4421
      result = self.rpc.call_etc_hosts_modify(master_node,
4422
                                              constants.ETC_HOSTS_REMOVE,
4423
                                              node.name, None)
4424
      result.Raise("Can't update hosts file with new host data")
4425
      _RedistributeAncillaryFiles(self)
4426

    
4427

    
4428
class _NodeQuery(_QueryBase):
4429
  FIELDS = query.NODE_FIELDS
4430

    
4431
  def ExpandNames(self, lu):
4432
    lu.needed_locks = {}
4433
    lu.share_locks = _ShareAll()
4434

    
4435
    if self.names:
4436
      self.wanted = _GetWantedNodes(lu, self.names)
4437
    else:
4438
      self.wanted = locking.ALL_SET
4439

    
4440
    self.do_locking = (self.use_locking and
4441
                       query.NQ_LIVE in self.requested_data)
4442

    
4443
    if self.do_locking:
4444
      # If any non-static field is requested we need to lock the nodes
4445
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4446

    
4447
  def DeclareLocks(self, lu, level):
4448
    pass
4449

    
4450
  def _GetQueryData(self, lu):
4451
    """Computes the list of nodes and their attributes.
4452

4453
    """
4454
    all_info = lu.cfg.GetAllNodesInfo()
4455

    
4456
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4457

    
4458
    # Gather data as requested
4459
    if query.NQ_LIVE in self.requested_data:
4460
      # filter out non-vm_capable nodes
4461
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4462

    
4463
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4464
                                        lu.cfg.GetHypervisorType())
4465
      live_data = dict((name, nresult.payload)
4466
                       for (name, nresult) in node_data.items()
4467
                       if not nresult.fail_msg and nresult.payload)
4468
    else:
4469
      live_data = None
4470

    
4471
    if query.NQ_INST in self.requested_data:
4472
      node_to_primary = dict([(name, set()) for name in nodenames])
4473
      node_to_secondary = dict([(name, set()) for name in nodenames])
4474

    
4475
      inst_data = lu.cfg.GetAllInstancesInfo()
4476

    
4477
      for inst in inst_data.values():
4478
        if inst.primary_node in node_to_primary:
4479
          node_to_primary[inst.primary_node].add(inst.name)
4480
        for secnode in inst.secondary_nodes:
4481
          if secnode in node_to_secondary:
4482
            node_to_secondary[secnode].add(inst.name)
4483
    else:
4484
      node_to_primary = None
4485
      node_to_secondary = None
4486

    
4487
    if query.NQ_OOB in self.requested_data:
4488
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4489
                         for name, node in all_info.iteritems())
4490
    else:
4491
      oob_support = None
4492

    
4493
    if query.NQ_GROUP in self.requested_data:
4494
      groups = lu.cfg.GetAllNodeGroupsInfo()
4495
    else:
4496
      groups = {}
4497

    
4498
    return query.NodeQueryData([all_info[name] for name in nodenames],
4499
                               live_data, lu.cfg.GetMasterNode(),
4500
                               node_to_primary, node_to_secondary, groups,
4501
                               oob_support, lu.cfg.GetClusterInfo())
4502

    
4503

    
4504
class LUNodeQuery(NoHooksLU):
4505
  """Logical unit for querying nodes.
4506

4507
  """
4508
  # pylint: disable=W0142
4509
  REQ_BGL = False
4510

    
4511
  def CheckArguments(self):
4512
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4513
                         self.op.output_fields, self.op.use_locking)
4514

    
4515
  def ExpandNames(self):
4516
    self.nq.ExpandNames(self)
4517

    
4518
  def Exec(self, feedback_fn):
4519
    return self.nq.OldStyleQuery(self)
4520

    
4521

    
4522
class LUNodeQueryvols(NoHooksLU):
4523
  """Logical unit for getting volumes on node(s).
4524

4525
  """
4526
  REQ_BGL = False
4527
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4528
  _FIELDS_STATIC = utils.FieldSet("node")
4529

    
4530
  def CheckArguments(self):
4531
    _CheckOutputFields(static=self._FIELDS_STATIC,
4532
                       dynamic=self._FIELDS_DYNAMIC,
4533
                       selected=self.op.output_fields)
4534

    
4535
  def ExpandNames(self):
4536
    self.needed_locks = {}
4537
    self.share_locks[locking.LEVEL_NODE] = 1
4538
    if not self.op.nodes:
4539
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4540
    else:
4541
      self.needed_locks[locking.LEVEL_NODE] = \
4542
        _GetWantedNodes(self, self.op.nodes)
4543

    
4544
  def Exec(self, feedback_fn):
4545
    """Computes the list of nodes and their attributes.
4546

4547
    """
4548
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4549
    volumes = self.rpc.call_node_volumes(nodenames)
4550

    
4551
    ilist = self.cfg.GetAllInstancesInfo()
4552
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4553

    
4554
    output = []
4555
    for node in nodenames:
4556
      nresult = volumes[node]
4557
      if nresult.offline:
4558
        continue
4559
      msg = nresult.fail_msg
4560
      if msg:
4561
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4562
        continue
4563

    
4564
      node_vols = sorted(nresult.payload,
4565
                         key=operator.itemgetter("dev"))
4566

    
4567
      for vol in node_vols:
4568
        node_output = []
4569
        for field in self.op.output_fields:
4570
          if field == "node":
4571
            val = node
4572
          elif field == "phys":
4573
            val = vol["dev"]
4574
          elif field == "vg":
4575
            val = vol["vg"]
4576
          elif field == "name":
4577
            val = vol["name"]
4578
          elif field == "size":
4579
            val = int(float(vol["size"]))
4580
          elif field == "instance":
4581
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4582
          else:
4583
            raise errors.ParameterError(field)
4584
          node_output.append(str(val))
4585

    
4586
        output.append(node_output)
4587

    
4588
    return output
4589

    
4590

    
4591
class LUNodeQueryStorage(NoHooksLU):
4592
  """Logical unit for getting information on storage units on node(s).
4593

4594
  """
4595
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4596
  REQ_BGL = False
4597

    
4598
  def CheckArguments(self):
4599
    _CheckOutputFields(static=self._FIELDS_STATIC,
4600
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4601
                       selected=self.op.output_fields)
4602

    
4603
  def ExpandNames(self):
4604
    self.needed_locks = {}
4605
    self.share_locks[locking.LEVEL_NODE] = 1
4606

    
4607
    if self.op.nodes:
4608
      self.needed_locks[locking.LEVEL_NODE] = \
4609
        _GetWantedNodes(self, self.op.nodes)
4610
    else:
4611
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4612

    
4613
  def Exec(self, feedback_fn):
4614
    """Computes the list of nodes and their attributes.
4615

4616
    """
4617
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4618

    
4619
    # Always get name to sort by
4620
    if constants.SF_NAME in self.op.output_fields:
4621
      fields = self.op.output_fields[:]
4622
    else:
4623
      fields = [constants.SF_NAME] + self.op.output_fields
4624

    
4625
    # Never ask for node or type as it's only known to the LU
4626
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4627
      while extra in fields:
4628
        fields.remove(extra)
4629

    
4630
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4631
    name_idx = field_idx[constants.SF_NAME]
4632

    
4633
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4634
    data = self.rpc.call_storage_list(self.nodes,
4635
                                      self.op.storage_type, st_args,
4636
                                      self.op.name, fields)
4637

    
4638
    result = []
4639

    
4640
    for node in utils.NiceSort(self.nodes):
4641
      nresult = data[node]
4642
      if nresult.offline:
4643
        continue
4644

    
4645
      msg = nresult.fail_msg
4646
      if msg:
4647
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4648
        continue
4649

    
4650
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4651

    
4652
      for name in utils.NiceSort(rows.keys()):
4653
        row = rows[name]
4654

    
4655
        out = []
4656

    
4657
        for field in self.op.output_fields:
4658
          if field == constants.SF_NODE:
4659
            val = node
4660
          elif field == constants.SF_TYPE:
4661
            val = self.op.storage_type
4662
          elif field in field_idx:
4663
            val = row[field_idx[field]]
4664
          else:
4665
            raise errors.ParameterError(field)
4666

    
4667
          out.append(val)
4668

    
4669
        result.append(out)
4670

    
4671
    return result
4672

    
4673

    
4674
class _InstanceQuery(_QueryBase):
4675
  FIELDS = query.INSTANCE_FIELDS
4676

    
4677
  def ExpandNames(self, lu):
4678
    lu.needed_locks = {}
4679
    lu.share_locks = _ShareAll()
4680

    
4681
    if self.names:
4682
      self.wanted = _GetWantedInstances(lu, self.names)
4683
    else:
4684
      self.wanted = locking.ALL_SET
4685

    
4686
    self.do_locking = (self.use_locking and
4687
                       query.IQ_LIVE in self.requested_data)
4688
    if self.do_locking:
4689
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4690
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4691
      lu.needed_locks[locking.LEVEL_NODE] = []
4692
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4693

    
4694
    self.do_grouplocks = (self.do_locking and
4695
                          query.IQ_NODES in self.requested_data)
4696

    
4697
  def DeclareLocks(self, lu, level):
4698
    if self.do_locking:
4699
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4700
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4701

    
4702
        # Lock all groups used by instances optimistically; this requires going
4703
        # via the node before it's locked, requiring verification later on
4704
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4705
          set(group_uuid
4706
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4707
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4708
      elif level == locking.LEVEL_NODE:
4709
        lu._LockInstancesNodes() # pylint: disable=W0212
4710

    
4711
  @staticmethod
4712
  def _CheckGroupLocks(lu):
4713
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4714
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4715

    
4716
    # Check if node groups for locked instances are still correct
4717
    for instance_name in owned_instances:
4718
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4719

    
4720
  def _GetQueryData(self, lu):
4721
    """Computes the list of instances and their attributes.
4722

4723
    """
4724
    if self.do_grouplocks:
4725
      self._CheckGroupLocks(lu)
4726

    
4727
    cluster = lu.cfg.GetClusterInfo()
4728
    all_info = lu.cfg.GetAllInstancesInfo()
4729

    
4730
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4731

    
4732
    instance_list = [all_info[name] for name in instance_names]
4733
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4734
                                        for inst in instance_list)))
4735
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4736
    bad_nodes = []
4737
    offline_nodes = []
4738
    wrongnode_inst = set()
4739

    
4740
    # Gather data as requested
4741
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4742
      live_data = {}
4743
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4744
      for name in nodes:
4745
        result = node_data[name]
4746
        if result.offline:
4747
          # offline nodes will be in both lists
4748
          assert result.fail_msg
4749
          offline_nodes.append(name)
4750
        if result.fail_msg:
4751
          bad_nodes.append(name)
4752
        elif result.payload:
4753
          for inst in result.payload:
4754
            if inst in all_info:
4755
              if all_info[inst].primary_node == name:
4756
                live_data.update(result.payload)
4757
              else:
4758
                wrongnode_inst.add(inst)
4759
            else:
4760
              # orphan instance; we don't list it here as we don't
4761
              # handle this case yet in the output of instance listing
4762
              logging.warning("Orphan instance '%s' found on node %s",
4763
                              inst, name)
4764
        # else no instance is alive
4765
    else:
4766
      live_data = {}
4767

    
4768
    if query.IQ_DISKUSAGE in self.requested_data:
4769
      disk_usage = dict((inst.name,
4770
                         _ComputeDiskSize(inst.disk_template,
4771
                                          [{constants.IDISK_SIZE: disk.size}
4772
                                           for disk in inst.disks]))
4773
                        for inst in instance_list)
4774
    else:
4775
      disk_usage = None
4776

    
4777
    if query.IQ_CONSOLE in self.requested_data:
4778
      consinfo = {}
4779
      for inst in instance_list:
4780
        if inst.name in live_data:
4781
          # Instance is running
4782
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4783
        else:
4784
          consinfo[inst.name] = None
4785
      assert set(consinfo.keys()) == set(instance_names)
4786
    else:
4787
      consinfo = None
4788

    
4789
    if query.IQ_NODES in self.requested_data:
4790
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4791
                                            instance_list)))
4792
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4793
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4794
                    for uuid in set(map(operator.attrgetter("group"),
4795
                                        nodes.values())))
4796
    else:
4797
      nodes = None
4798
      groups = None
4799

    
4800
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4801
                                   disk_usage, offline_nodes, bad_nodes,
4802
                                   live_data, wrongnode_inst, consinfo,
4803
                                   nodes, groups)
4804

    
4805

    
4806
class LUQuery(NoHooksLU):
4807
  """Query for resources/items of a certain kind.
4808

4809
  """
4810
  # pylint: disable=W0142
4811
  REQ_BGL = False
4812

    
4813
  def CheckArguments(self):
4814
    qcls = _GetQueryImplementation(self.op.what)
4815

    
4816
    self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4817

    
4818
  def ExpandNames(self):
4819
    self.impl.ExpandNames(self)
4820

    
4821
  def DeclareLocks(self, level):
4822
    self.impl.DeclareLocks(self, level)
4823

    
4824
  def Exec(self, feedback_fn):
4825
    return self.impl.NewStyleQuery(self)
4826

    
4827

    
4828
class LUQueryFields(NoHooksLU):
4829
  """Query for resources/items of a certain kind.
4830

4831
  """
4832
  # pylint: disable=W0142
4833
  REQ_BGL = False
4834

    
4835
  def CheckArguments(self):
4836
    self.qcls = _GetQueryImplementation(self.op.what)
4837

    
4838
  def ExpandNames(self):
4839
    self.needed_locks = {}
4840

    
4841
  def Exec(self, feedback_fn):
4842
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4843

    
4844

    
4845
class LUNodeModifyStorage(NoHooksLU):
4846
  """Logical unit for modifying a storage volume on a node.
4847

4848
  """
4849
  REQ_BGL = False
4850

    
4851
  def CheckArguments(self):
4852
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4853

    
4854
    storage_type = self.op.storage_type
4855

    
4856
    try:
4857
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4858
    except KeyError:
4859
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4860
                                 " modified" % storage_type,
4861
                                 errors.ECODE_INVAL)
4862

    
4863
    diff = set(self.op.changes.keys()) - modifiable
4864
    if diff:
4865
      raise errors.OpPrereqError("The following fields can not be modified for"
4866
                                 " storage units of type '%s': %r" %
4867
                                 (storage_type, list(diff)),
4868
                                 errors.ECODE_INVAL)
4869

    
4870
  def ExpandNames(self):
4871
    self.needed_locks = {
4872
      locking.LEVEL_NODE: self.op.node_name,
4873
      }
4874

    
4875
  def Exec(self, feedback_fn):
4876
    """Computes the list of nodes and their attributes.
4877

4878
    """
4879
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4880
    result = self.rpc.call_storage_modify(self.op.node_name,
4881
                                          self.op.storage_type, st_args,
4882
                                          self.op.name, self.op.changes)
4883
    result.Raise("Failed to modify storage unit '%s' on %s" %
4884
                 (self.op.name, self.op.node_name))
4885

    
4886

    
4887
class LUNodeAdd(LogicalUnit):
4888
  """Logical unit for adding node to the cluster.
4889

4890
  """
4891
  HPATH = "node-add"
4892
  HTYPE = constants.HTYPE_NODE
4893
  _NFLAGS = ["master_capable", "vm_capable"]
4894

    
4895
  def CheckArguments(self):
4896
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4897
    # validate/normalize the node name
4898
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4899
                                         family=self.primary_ip_family)
4900
    self.op.node_name = self.hostname.name
4901

    
4902
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4903
      raise errors.OpPrereqError("Cannot readd the master node",
4904
                                 errors.ECODE_STATE)
4905

    
4906
    if self.op.readd and self.op.group:
4907
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4908
                                 " being readded", errors.ECODE_INVAL)
4909

    
4910
  def BuildHooksEnv(self):
4911
    """Build hooks env.
4912

4913
    This will run on all nodes before, and on all nodes + the new node after.
4914

4915
    """
4916
    return {
4917
      "OP_TARGET": self.op.node_name,
4918
      "NODE_NAME": self.op.node_name,
4919
      "NODE_PIP": self.op.primary_ip,
4920
      "NODE_SIP": self.op.secondary_ip,
4921
      "MASTER_CAPABLE": str(self.op.master_capable),
4922
      "VM_CAPABLE": str(self.op.vm_capable),
4923
      }
4924

    
4925
  def BuildHooksNodes(self):
4926
    """Build hooks nodes.
4927

4928
    """
4929
    # Exclude added node
4930
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4931
    post_nodes = pre_nodes + [self.op.node_name, ]
4932

    
4933
    return (pre_nodes, post_nodes)
4934

    
4935
  def CheckPrereq(self):
4936
    """Check prerequisites.
4937

4938
    This checks:
4939
     - the new node is not already in the config
4940
     - it is resolvable
4941
     - its parameters (single/dual homed) matches the cluster
4942

4943
    Any errors are signaled by raising errors.OpPrereqError.
4944

4945
    """
4946
    cfg = self.cfg
4947
    hostname = self.hostname
4948
    node = hostname.name
4949
    primary_ip = self.op.primary_ip = hostname.ip
4950
    if self.op.secondary_ip is None:
4951
      if self.primary_ip_family == netutils.IP6Address.family:
4952
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4953
                                   " IPv4 address must be given as secondary",
4954
                                   errors.ECODE_INVAL)
4955
      self.op.secondary_ip = primary_ip
4956

    
4957
    secondary_ip = self.op.secondary_ip
4958
    if not netutils.IP4Address.IsValid(secondary_ip):
4959
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4960
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4961

    
4962
    node_list = cfg.GetNodeList()
4963
    if not self.op.readd and node in node_list:
4964
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4965
                                 node, errors.ECODE_EXISTS)
4966
    elif self.op.readd and node not in node_list:
4967
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4968
                                 errors.ECODE_NOENT)
4969

    
4970
    self.changed_primary_ip = False
4971

    
4972
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4973
      if self.op.readd and node == existing_node_name:
4974
        if existing_node.secondary_ip != secondary_ip:
4975
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4976
                                     " address configuration as before",
4977
                                     errors.ECODE_INVAL)
4978
        if existing_node.primary_ip != primary_ip:
4979
          self.changed_primary_ip = True
4980

    
4981
        continue
4982

    
4983
      if (existing_node.primary_ip == primary_ip or
4984
          existing_node.secondary_ip == primary_ip or
4985
          existing_node.primary_ip == secondary_ip or
4986
          existing_node.secondary_ip == secondary_ip):
4987
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4988
                                   " existing node %s" % existing_node.name,
4989
                                   errors.ECODE_NOTUNIQUE)
4990

    
4991
    # After this 'if' block, None is no longer a valid value for the
4992
    # _capable op attributes
4993
    if self.op.readd:
4994
      old_node = self.cfg.GetNodeInfo(node)
4995
      assert old_node is not None, "Can't retrieve locked node %s" % node
4996
      for attr in self._NFLAGS:
4997
        if getattr(self.op, attr) is None:
4998
          setattr(self.op, attr, getattr(old_node, attr))
4999
    else:
5000
      for attr in self._NFLAGS:
5001
        if getattr(self.op, attr) is None:
5002
          setattr(self.op, attr, True)
5003

    
5004
    if self.op.readd and not self.op.vm_capable:
5005
      pri, sec = cfg.GetNodeInstances(node)
5006
      if pri or sec:
5007
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5008
                                   " flag set to false, but it already holds"
5009
                                   " instances" % node,
5010
                                   errors.ECODE_STATE)
5011

    
5012
    # check that the type of the node (single versus dual homed) is the
5013
    # same as for the master
5014
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5015
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5016
    newbie_singlehomed = secondary_ip == primary_ip
5017
    if master_singlehomed != newbie_singlehomed:
5018
      if master_singlehomed:
5019
        raise errors.OpPrereqError("The master has no secondary ip but the"
5020
                                   " new node has one",
5021
                                   errors.ECODE_INVAL)
5022
      else:
5023
        raise errors.OpPrereqError("The master has a secondary ip but the"
5024
                                   " new node doesn't have one",
5025
                                   errors.ECODE_INVAL)
5026

    
5027
    # checks reachability
5028
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5029
      raise errors.OpPrereqError("Node not reachable by ping",
5030
                                 errors.ECODE_ENVIRON)
5031

    
5032
    if not newbie_singlehomed:
5033
      # check reachability from my secondary ip to newbie's secondary ip
5034
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5035
                           source=myself.secondary_ip):
5036
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5037
                                   " based ping to node daemon port",
5038
                                   errors.ECODE_ENVIRON)
5039

    
5040
    if self.op.readd:
5041
      exceptions = [node]
5042
    else:
5043
      exceptions = []
5044

    
5045
    if self.op.master_capable:
5046
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5047
    else:
5048
      self.master_candidate = False
5049

    
5050
    if self.op.readd:
5051
      self.new_node = old_node
5052
    else:
5053
      node_group = cfg.LookupNodeGroup(self.op.group)
5054
      self.new_node = objects.Node(name=node,
5055
                                   primary_ip=primary_ip,
5056
                                   secondary_ip=secondary_ip,
5057
                                   master_candidate=self.master_candidate,
5058
                                   offline=False, drained=False,
5059
                                   group=node_group)
5060

    
5061
    if self.op.ndparams:
5062
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5063

    
5064
  def Exec(self, feedback_fn):
5065
    """Adds the new node to the cluster.
5066

5067
    """
5068
    new_node = self.new_node
5069
    node = new_node.name
5070

    
5071
    # We adding a new node so we assume it's powered
5072
    new_node.powered = True
5073

    
5074
    # for re-adds, reset the offline/drained/master-candidate flags;
5075
    # we need to reset here, otherwise offline would prevent RPC calls
5076
    # later in the procedure; this also means that if the re-add
5077
    # fails, we are left with a non-offlined, broken node
5078
    if self.op.readd:
5079
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5080
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5081
      # if we demote the node, we do cleanup later in the procedure
5082
      new_node.master_candidate = self.master_candidate
5083
      if self.changed_primary_ip:
5084
        new_node.primary_ip = self.op.primary_ip
5085

    
5086
    # copy the master/vm_capable flags
5087
    for attr in self._NFLAGS:
5088
      setattr(new_node, attr, getattr(self.op, attr))
5089

    
5090
    # notify the user about any possible mc promotion
5091
    if new_node.master_candidate:
5092
      self.LogInfo("Node will be a master candidate")
5093

    
5094
    if self.op.ndparams:
5095
      new_node.ndparams = self.op.ndparams
5096
    else:
5097
      new_node.ndparams = {}
5098

    
5099
    # check connectivity
5100
    result = self.rpc.call_version([node])[node]
5101
    result.Raise("Can't get version information from node %s" % node)
5102
    if constants.PROTOCOL_VERSION == result.payload:
5103
      logging.info("Communication to node %s fine, sw version %s match",
5104
                   node, result.payload)
5105
    else:
5106
      raise errors.OpExecError("Version mismatch master version %s,"
5107
                               " node version %s" %
5108
                               (constants.PROTOCOL_VERSION, result.payload))
5109

    
5110
    # Add node to our /etc/hosts, and add key to known_hosts
5111
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5112
      master_node = self.cfg.GetMasterNode()
5113
      result = self.rpc.call_etc_hosts_modify(master_node,
5114
                                              constants.ETC_HOSTS_ADD,
5115
                                              self.hostname.name,
5116
                                              self.hostname.ip)
5117
      result.Raise("Can't update hosts file with new host data")
5118

    
5119
    if new_node.secondary_ip != new_node.primary_ip:
5120
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5121
                               False)
5122

    
5123
    node_verify_list = [self.cfg.GetMasterNode()]
5124
    node_verify_param = {
5125
      constants.NV_NODELIST: ([node], {}),
5126
      # TODO: do a node-net-test as well?
5127
    }
5128

    
5129
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5130
                                       self.cfg.GetClusterName())
5131
    for verifier in node_verify_list:
5132
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5133
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5134
      if nl_payload:
5135
        for failed in nl_payload:
5136
          feedback_fn("ssh/hostname verification failed"
5137
                      " (checking from %s): %s" %
5138
                      (verifier, nl_payload[failed]))
5139
        raise errors.OpExecError("ssh/hostname verification failed")
5140

    
5141
    if self.op.readd:
5142
      _RedistributeAncillaryFiles(self)
5143
      self.context.ReaddNode(new_node)
5144
      # make sure we redistribute the config
5145
      self.cfg.Update(new_node, feedback_fn)
5146
      # and make sure the new node will not have old files around
5147
      if not new_node.master_candidate:
5148
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5149
        msg = result.fail_msg
5150
        if msg:
5151
          self.LogWarning("Node failed to demote itself from master"
5152
                          " candidate status: %s" % msg)
5153
    else:
5154
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5155
                                  additional_vm=self.op.vm_capable)
5156
      self.context.AddNode(new_node, self.proc.GetECId())
5157

    
5158

    
5159
class LUNodeSetParams(LogicalUnit):
5160
  """Modifies the parameters of a node.
5161

5162
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5163
      to the node role (as _ROLE_*)
5164
  @cvar _R2F: a dictionary from node role to tuples of flags
5165
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5166

5167
  """
5168
  HPATH = "node-modify"
5169
  HTYPE = constants.HTYPE_NODE
5170
  REQ_BGL = False
5171
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5172
  _F2R = {
5173
    (True, False, False): _ROLE_CANDIDATE,
5174
    (False, True, False): _ROLE_DRAINED,
5175
    (False, False, True): _ROLE_OFFLINE,
5176
    (False, False, False): _ROLE_REGULAR,
5177
    }
5178
  _R2F = dict((v, k) for k, v in _F2R.items())
5179
  _FLAGS = ["master_candidate", "drained", "offline"]
5180

    
5181
  def CheckArguments(self):
5182
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5183
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5184
                self.op.master_capable, self.op.vm_capable,
5185
                self.op.secondary_ip, self.op.ndparams]
5186
    if all_mods.count(None) == len(all_mods):
5187
      raise errors.OpPrereqError("Please pass at least one modification",
5188
                                 errors.ECODE_INVAL)
5189
    if all_mods.count(True) > 1:
5190
      raise errors.OpPrereqError("Can't set the node into more than one"
5191
                                 " state at the same time",
5192
                                 errors.ECODE_INVAL)
5193

    
5194
    # Boolean value that tells us whether we might be demoting from MC
5195
    self.might_demote = (self.op.master_candidate == False or
5196
                         self.op.offline == True or
5197
                         self.op.drained == True or
5198
                         self.op.master_capable == False)
5199

    
5200
    if self.op.secondary_ip:
5201
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5202
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5203
                                   " address" % self.op.secondary_ip,
5204
                                   errors.ECODE_INVAL)
5205

    
5206
    self.lock_all = self.op.auto_promote and self.might_demote
5207
    self.lock_instances = self.op.secondary_ip is not None
5208

    
5209
  def ExpandNames(self):
5210
    if self.lock_all:
5211
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5212
    else:
5213
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5214

    
5215
    if self.lock_instances:
5216
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5217

    
5218
  def DeclareLocks(self, level):
5219
    # If we have locked all instances, before waiting to lock nodes, release
5220
    # all the ones living on nodes unrelated to the current operation.
5221
    if level == locking.LEVEL_NODE and self.lock_instances:
5222
      self.affected_instances = []
5223
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5224
        instances_keep = []
5225

    
5226
        # Build list of instances to release
5227
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5228
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5229
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5230
              self.op.node_name in instance.all_nodes):
5231
            instances_keep.append(instance_name)
5232
            self.affected_instances.append(instance)
5233

    
5234
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5235

    
5236
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5237
                set(instances_keep))
5238

    
5239
  def BuildHooksEnv(self):
5240
    """Build hooks env.
5241

5242
    This runs on the master node.
5243

5244
    """
5245
    return {
5246
      "OP_TARGET": self.op.node_name,
5247
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5248
      "OFFLINE": str(self.op.offline),
5249
      "DRAINED": str(self.op.drained),
5250
      "MASTER_CAPABLE": str(self.op.master_capable),
5251
      "VM_CAPABLE": str(self.op.vm_capable),
5252
      }
5253

    
5254
  def BuildHooksNodes(self):
5255
    """Build hooks nodes.
5256

5257
    """
5258
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5259
    return (nl, nl)
5260

    
5261
  def CheckPrereq(self):
5262
    """Check prerequisites.
5263

5264
    This only checks the instance list against the existing names.
5265

5266
    """
5267
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5268

    
5269
    if (self.op.master_candidate is not None or
5270
        self.op.drained is not None or
5271
        self.op.offline is not None):
5272
      # we can't change the master's node flags
5273
      if self.op.node_name == self.cfg.GetMasterNode():
5274
        raise errors.OpPrereqError("The master role can be changed"
5275
                                   " only via master-failover",
5276
                                   errors.ECODE_INVAL)
5277

    
5278
    if self.op.master_candidate and not node.master_capable:
5279
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5280
                                 " it a master candidate" % node.name,
5281
                                 errors.ECODE_STATE)
5282

    
5283
    if self.op.vm_capable == False:
5284
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5285
      if ipri or isec:
5286
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5287
                                   " the vm_capable flag" % node.name,
5288
                                   errors.ECODE_STATE)
5289

    
5290
    if node.master_candidate and self.might_demote and not self.lock_all:
5291
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5292
      # check if after removing the current node, we're missing master
5293
      # candidates
5294
      (mc_remaining, mc_should, _) = \
5295
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5296
      if mc_remaining < mc_should:
5297
        raise errors.OpPrereqError("Not enough master candidates, please"
5298
                                   " pass auto promote option to allow"
5299
                                   " promotion", errors.ECODE_STATE)
5300

    
5301
    self.old_flags = old_flags = (node.master_candidate,
5302
                                  node.drained, node.offline)
5303
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5304
    self.old_role = old_role = self._F2R[old_flags]
5305

    
5306
    # Check for ineffective changes
5307
    for attr in self._FLAGS:
5308
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5309
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5310
        setattr(self.op, attr, None)
5311

    
5312
    # Past this point, any flag change to False means a transition
5313
    # away from the respective state, as only real changes are kept
5314

    
5315
    # TODO: We might query the real power state if it supports OOB
5316
    if _SupportsOob(self.cfg, node):
5317
      if self.op.offline is False and not (node.powered or
5318
                                           self.op.powered == True):
5319
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5320
                                    " offline status can be reset") %
5321
                                   self.op.node_name)
5322
    elif self.op.powered is not None:
5323
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5324
                                  " as it does not support out-of-band"
5325
                                  " handling") % self.op.node_name)
5326

    
5327
    # If we're being deofflined/drained, we'll MC ourself if needed
5328
    if (self.op.drained == False or self.op.offline == False or
5329
        (self.op.master_capable and not node.master_capable)):
5330
      if _DecideSelfPromotion(self):
5331
        self.op.master_candidate = True
5332
        self.LogInfo("Auto-promoting node to master candidate")
5333

    
5334
    # If we're no longer master capable, we'll demote ourselves from MC
5335
    if self.op.master_capable == False and node.master_candidate:
5336
      self.LogInfo("Demoting from master candidate")
5337
      self.op.master_candidate = False
5338

    
5339
    # Compute new role
5340
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5341
    if self.op.master_candidate:
5342
      new_role = self._ROLE_CANDIDATE
5343
    elif self.op.drained:
5344
      new_role = self._ROLE_DRAINED
5345
    elif self.op.offline:
5346
      new_role = self._ROLE_OFFLINE
5347
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5348
      # False is still in new flags, which means we're un-setting (the
5349
      # only) True flag
5350
      new_role = self._ROLE_REGULAR
5351
    else: # no new flags, nothing, keep old role
5352
      new_role = old_role
5353

    
5354
    self.new_role = new_role
5355

    
5356
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5357
      # Trying to transition out of offline status
5358
      result = self.rpc.call_version([node.name])[node.name]
5359
      if result.fail_msg:
5360
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5361
                                   " to report its version: %s" %
5362
                                   (node.name, result.fail_msg),
5363
                                   errors.ECODE_STATE)
5364
      else:
5365
        self.LogWarning("Transitioning node from offline to online state"
5366
                        " without using re-add. Please make sure the node"
5367
                        " is healthy!")
5368

    
5369
    if self.op.secondary_ip:
5370
      # Ok even without locking, because this can't be changed by any LU
5371
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5372
      master_singlehomed = master.secondary_ip == master.primary_ip
5373
      if master_singlehomed and self.op.secondary_ip:
5374
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5375
                                   " homed cluster", errors.ECODE_INVAL)
5376

    
5377
      if node.offline:
5378
        if self.affected_instances:
5379
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5380
                                     " node has instances (%s) configured"
5381
                                     " to use it" % self.affected_instances)
5382
      else:
5383
        # On online nodes, check that no instances are running, and that
5384
        # the node has the new ip and we can reach it.
5385
        for instance in self.affected_instances:
5386
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5387

    
5388
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5389
        if master.name != node.name:
5390
          # check reachability from master secondary ip to new secondary ip
5391
          if not netutils.TcpPing(self.op.secondary_ip,
5392
                                  constants.DEFAULT_NODED_PORT,
5393
                                  source=master.secondary_ip):
5394
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5395
                                       " based ping to node daemon port",
5396
                                       errors.ECODE_ENVIRON)
5397

    
5398
    if self.op.ndparams:
5399
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5400
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5401
      self.new_ndparams = new_ndparams
5402

    
5403
  def Exec(self, feedback_fn):
5404
    """Modifies a node.
5405

5406
    """
5407
    node = self.node
5408
    old_role = self.old_role
5409
    new_role = self.new_role
5410

    
5411
    result = []
5412

    
5413
    if self.op.ndparams:
5414
      node.ndparams = self.new_ndparams
5415

    
5416
    if self.op.powered is not None:
5417
      node.powered = self.op.powered
5418

    
5419
    for attr in ["master_capable", "vm_capable"]:
5420
      val = getattr(self.op, attr)
5421
      if val is not None:
5422
        setattr(node, attr, val)
5423
        result.append((attr, str(val)))
5424

    
5425
    if new_role != old_role:
5426
      # Tell the node to demote itself, if no longer MC and not offline
5427
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5428
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5429
        if msg:
5430
          self.LogWarning("Node failed to demote itself: %s", msg)
5431

    
5432
      new_flags = self._R2F[new_role]
5433
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5434
        if of != nf:
5435
          result.append((desc, str(nf)))
5436
      (node.master_candidate, node.drained, node.offline) = new_flags
5437

    
5438
      # we locked all nodes, we adjust the CP before updating this node
5439
      if self.lock_all:
5440
        _AdjustCandidatePool(self, [node.name])
5441

    
5442
    if self.op.secondary_ip:
5443
      node.secondary_ip = self.op.secondary_ip
5444
      result.append(("secondary_ip", self.op.secondary_ip))
5445

    
5446
    # this will trigger configuration file update, if needed
5447
    self.cfg.Update(node, feedback_fn)
5448

    
5449
    # this will trigger job queue propagation or cleanup if the mc
5450
    # flag changed
5451
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5452
      self.context.ReaddNode(node)
5453

    
5454
    return result
5455

    
5456

    
5457
class LUNodePowercycle(NoHooksLU):
5458
  """Powercycles a node.
5459

5460
  """
5461
  REQ_BGL = False
5462

    
5463
  def CheckArguments(self):
5464
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5465
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5466
      raise errors.OpPrereqError("The node is the master and the force"
5467
                                 " parameter was not set",
5468
                                 errors.ECODE_INVAL)
5469

    
5470
  def ExpandNames(self):
5471
    """Locking for PowercycleNode.
5472

5473
    This is a last-resort option and shouldn't block on other
5474
    jobs. Therefore, we grab no locks.
5475

5476
    """
5477
    self.needed_locks = {}
5478

    
5479
  def Exec(self, feedback_fn):
5480
    """Reboots a node.
5481

5482
    """
5483
    result = self.rpc.call_node_powercycle(self.op.node_name,
5484
                                           self.cfg.GetHypervisorType())
5485
    result.Raise("Failed to schedule the reboot")
5486
    return result.payload
5487

    
5488

    
5489
class LUClusterQuery(NoHooksLU):
5490
  """Query cluster configuration.
5491

5492
  """
5493
  REQ_BGL = False
5494

    
5495
  def ExpandNames(self):
5496
    self.needed_locks = {}
5497

    
5498
  def Exec(self, feedback_fn):
5499
    """Return cluster config.
5500

5501
    """
5502
    cluster = self.cfg.GetClusterInfo()
5503
    os_hvp = {}
5504

    
5505
    # Filter just for enabled hypervisors
5506
    for os_name, hv_dict in cluster.os_hvp.items():
5507
      os_hvp[os_name] = {}
5508
      for hv_name, hv_params in hv_dict.items():
5509
        if hv_name in cluster.enabled_hypervisors:
5510
          os_hvp[os_name][hv_name] = hv_params
5511

    
5512
    # Convert ip_family to ip_version
5513
    primary_ip_version = constants.IP4_VERSION
5514
    if cluster.primary_ip_family == netutils.IP6Address.family:
5515
      primary_ip_version = constants.IP6_VERSION
5516

    
5517
    result = {
5518
      "software_version": constants.RELEASE_VERSION,
5519
      "protocol_version": constants.PROTOCOL_VERSION,
5520
      "config_version": constants.CONFIG_VERSION,
5521
      "os_api_version": max(constants.OS_API_VERSIONS),
5522
      "export_version": constants.EXPORT_VERSION,
5523
      "architecture": runtime.GetArchInfo(),
5524
      "name": cluster.cluster_name,
5525
      "master": cluster.master_node,
5526
      "default_hypervisor": cluster.enabled_hypervisors[0],
5527
      "enabled_hypervisors": cluster.enabled_hypervisors,
5528
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5529
                        for hypervisor_name in cluster.enabled_hypervisors]),
5530
      "os_hvp": os_hvp,
5531
      "beparams": cluster.beparams,
5532
      "osparams": cluster.osparams,
5533
      "nicparams": cluster.nicparams,
5534
      "ndparams": cluster.ndparams,
5535
      "candidate_pool_size": cluster.candidate_pool_size,
5536
      "master_netdev": cluster.master_netdev,
5537
      "volume_group_name": cluster.volume_group_name,
5538
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5539
      "file_storage_dir": cluster.file_storage_dir,
5540
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5541
      "maintain_node_health": cluster.maintain_node_health,
5542
      "ctime": cluster.ctime,
5543
      "mtime": cluster.mtime,
5544
      "uuid": cluster.uuid,
5545
      "tags": list(cluster.GetTags()),
5546
      "uid_pool": cluster.uid_pool,
5547
      "default_iallocator": cluster.default_iallocator,
5548
      "reserved_lvs": cluster.reserved_lvs,
5549
      "primary_ip_version": primary_ip_version,
5550
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5551
      "hidden_os": cluster.hidden_os,
5552
      "blacklisted_os": cluster.blacklisted_os,
5553
      }
5554

    
5555
    return result
5556

    
5557

    
5558
class LUClusterConfigQuery(NoHooksLU):
5559
  """Return configuration values.
5560

5561
  """
5562
  REQ_BGL = False
5563
  _FIELDS_DYNAMIC = utils.FieldSet()
5564
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5565
                                  "watcher_pause", "volume_group_name")
5566

    
5567
  def CheckArguments(self):
5568
    _CheckOutputFields(static=self._FIELDS_STATIC,
5569
                       dynamic=self._FIELDS_DYNAMIC,
5570
                       selected=self.op.output_fields)
5571

    
5572
  def ExpandNames(self):
5573
    self.needed_locks = {}
5574

    
5575
  def Exec(self, feedback_fn):
5576
    """Dump a representation of the cluster config to the standard output.
5577

5578
    """
5579
    values = []
5580
    for field in self.op.output_fields:
5581
      if field == "cluster_name":
5582
        entry = self.cfg.GetClusterName()
5583
      elif field == "master_node":
5584
        entry = self.cfg.GetMasterNode()
5585
      elif field == "drain_flag":
5586
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5587
      elif field == "watcher_pause":
5588
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5589
      elif field == "volume_group_name":
5590
        entry = self.cfg.GetVGName()
5591
      else:
5592
        raise errors.ParameterError(field)
5593
      values.append(entry)
5594
    return values
5595

    
5596

    
5597
class LUInstanceActivateDisks(NoHooksLU):
5598
  """Bring up an instance's disks.
5599

5600
  """
5601
  REQ_BGL = False
5602

    
5603
  def ExpandNames(self):
5604
    self._ExpandAndLockInstance()
5605
    self.needed_locks[locking.LEVEL_NODE] = []
5606
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5607

    
5608
  def DeclareLocks(self, level):
5609
    if level == locking.LEVEL_NODE:
5610
      self._LockInstancesNodes()
5611

    
5612
  def CheckPrereq(self):
5613
    """Check prerequisites.
5614

5615
    This checks that the instance is in the cluster.
5616

5617
    """
5618
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5619
    assert self.instance is not None, \
5620
      "Cannot retrieve locked instance %s" % self.op.instance_name
5621
    _CheckNodeOnline(self, self.instance.primary_node)
5622

    
5623
  def Exec(self, feedback_fn):
5624
    """Activate the disks.
5625

5626
    """
5627
    disks_ok, disks_info = \
5628
              _AssembleInstanceDisks(self, self.instance,
5629
                                     ignore_size=self.op.ignore_size)
5630
    if not disks_ok:
5631
      raise errors.OpExecError("Cannot activate block devices")
5632

    
5633
    return disks_info
5634

    
5635

    
5636
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5637
                           ignore_size=False):
5638
  """Prepare the block devices for an instance.
5639

5640
  This sets up the block devices on all nodes.
5641

5642
  @type lu: L{LogicalUnit}
5643
  @param lu: the logical unit on whose behalf we execute
5644
  @type instance: L{objects.Instance}
5645
  @param instance: the instance for whose disks we assemble
5646
  @type disks: list of L{objects.Disk} or None
5647
  @param disks: which disks to assemble (or all, if None)
5648
  @type ignore_secondaries: boolean
5649
  @param ignore_secondaries: if true, errors on secondary nodes
5650
      won't result in an error return from the function
5651
  @type ignore_size: boolean
5652
  @param ignore_size: if true, the current known size of the disk
5653
      will not be used during the disk activation, useful for cases
5654
      when the size is wrong
5655
  @return: False if the operation failed, otherwise a list of
5656
      (host, instance_visible_name, node_visible_name)
5657
      with the mapping from node devices to instance devices
5658

5659
  """
5660
  device_info = []
5661
  disks_ok = True
5662
  iname = instance.name
5663
  disks = _ExpandCheckDisks(instance, disks)
5664

    
5665
  # With the two passes mechanism we try to reduce the window of
5666
  # opportunity for the race condition of switching DRBD to primary
5667
  # before handshaking occured, but we do not eliminate it
5668

    
5669
  # The proper fix would be to wait (with some limits) until the
5670
  # connection has been made and drbd transitions from WFConnection
5671
  # into any other network-connected state (Connected, SyncTarget,
5672
  # SyncSource, etc.)
5673

    
5674
  # 1st pass, assemble on all nodes in secondary mode
5675
  for idx, inst_disk in enumerate(disks):
5676
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5677
      if ignore_size:
5678
        node_disk = node_disk.Copy()
5679
        node_disk.UnsetSize()
5680
      lu.cfg.SetDiskID(node_disk, node)
5681
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5682
      msg = result.fail_msg
5683
      if msg:
5684
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5685
                           " (is_primary=False, pass=1): %s",
5686
                           inst_disk.iv_name, node, msg)
5687
        if not ignore_secondaries:
5688
          disks_ok = False
5689

    
5690
  # FIXME: race condition on drbd migration to primary
5691

    
5692
  # 2nd pass, do only the primary node
5693
  for idx, inst_disk in enumerate(disks):
5694
    dev_path = None
5695

    
5696
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5697
      if node != instance.primary_node:
5698
        continue
5699
      if ignore_size:
5700
        node_disk = node_disk.Copy()
5701
        node_disk.UnsetSize()
5702
      lu.cfg.SetDiskID(node_disk, node)
5703
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5704
      msg = result.fail_msg
5705
      if msg:
5706
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5707
                           " (is_primary=True, pass=2): %s",
5708
                           inst_disk.iv_name, node, msg)
5709
        disks_ok = False
5710
      else:
5711
        dev_path = result.payload
5712

    
5713
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5714

    
5715
  # leave the disks configured for the primary node
5716
  # this is a workaround that would be fixed better by
5717
  # improving the logical/physical id handling
5718
  for disk in disks:
5719
    lu.cfg.SetDiskID(disk, instance.primary_node)
5720

    
5721
  return disks_ok, device_info
5722

    
5723

    
5724
def _StartInstanceDisks(lu, instance, force):
5725
  """Start the disks of an instance.
5726

5727
  """
5728
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5729
                                           ignore_secondaries=force)
5730
  if not disks_ok:
5731
    _ShutdownInstanceDisks(lu, instance)
5732
    if force is not None and not force:
5733
      lu.proc.LogWarning("", hint="If the message above refers to a"
5734
                         " secondary node,"
5735
                         " you can retry the operation using '--force'.")
5736
    raise errors.OpExecError("Disk consistency error")
5737

    
5738

    
5739
class LUInstanceDeactivateDisks(NoHooksLU):
5740
  """Shutdown an instance's disks.
5741

5742
  """
5743
  REQ_BGL = False
5744

    
5745
  def ExpandNames(self):
5746
    self._ExpandAndLockInstance()
5747
    self.needed_locks[locking.LEVEL_NODE] = []
5748
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5749

    
5750
  def DeclareLocks(self, level):
5751
    if level == locking.LEVEL_NODE:
5752
      self._LockInstancesNodes()
5753

    
5754
  def CheckPrereq(self):
5755
    """Check prerequisites.
5756

5757
    This checks that the instance is in the cluster.
5758

5759
    """
5760
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5761
    assert self.instance is not None, \
5762
      "Cannot retrieve locked instance %s" % self.op.instance_name
5763

    
5764
  def Exec(self, feedback_fn):
5765
    """Deactivate the disks
5766

5767
    """
5768
    instance = self.instance
5769
    if self.op.force:
5770
      _ShutdownInstanceDisks(self, instance)
5771
    else:
5772
      _SafeShutdownInstanceDisks(self, instance)
5773

    
5774

    
5775
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5776
  """Shutdown block devices of an instance.
5777

5778
  This function checks if an instance is running, before calling
5779
  _ShutdownInstanceDisks.
5780

5781
  """
5782
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5783
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5784

    
5785

    
5786
def _ExpandCheckDisks(instance, disks):
5787
  """Return the instance disks selected by the disks list
5788

5789
  @type disks: list of L{objects.Disk} or None
5790
  @param disks: selected disks
5791
  @rtype: list of L{objects.Disk}
5792
  @return: selected instance disks to act on
5793

5794
  """
5795
  if disks is None:
5796
    return instance.disks
5797
  else:
5798
    if not set(disks).issubset(instance.disks):
5799
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5800
                                   " target instance")
5801
    return disks
5802

    
5803

    
5804
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5805
  """Shutdown block devices of an instance.
5806

5807
  This does the shutdown on all nodes of the instance.
5808

5809
  If the ignore_primary is false, errors on the primary node are
5810
  ignored.
5811

5812
  """
5813
  all_result = True
5814
  disks = _ExpandCheckDisks(instance, disks)
5815

    
5816
  for disk in disks:
5817
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5818
      lu.cfg.SetDiskID(top_disk, node)
5819
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5820
      msg = result.fail_msg
5821
      if msg:
5822
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5823
                      disk.iv_name, node, msg)
5824
        if ((node == instance.primary_node and not ignore_primary) or
5825
            (node != instance.primary_node and not result.offline)):
5826
          all_result = False
5827
  return all_result
5828

    
5829

    
5830
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5831
  """Checks if a node has enough free memory.
5832

5833
  This function check if a given node has the needed amount of free
5834
  memory. In case the node has less memory or we cannot get the
5835
  information from the node, this function raise an OpPrereqError
5836
  exception.
5837

5838
  @type lu: C{LogicalUnit}
5839
  @param lu: a logical unit from which we get configuration data
5840
  @type node: C{str}
5841
  @param node: the node to check
5842
  @type reason: C{str}
5843
  @param reason: string to use in the error message
5844
  @type requested: C{int}
5845
  @param requested: the amount of memory in MiB to check for
5846
  @type hypervisor_name: C{str}
5847
  @param hypervisor_name: the hypervisor to ask for memory stats
5848
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5849
      we cannot check the node
5850

5851
  """
5852
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5853
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5854
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5855
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5856
  if not isinstance(free_mem, int):
5857
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5858
                               " was '%s'" % (node, free_mem),
5859
                               errors.ECODE_ENVIRON)
5860
  if requested > free_mem:
5861
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5862
                               " needed %s MiB, available %s MiB" %
5863
                               (node, reason, requested, free_mem),
5864
                               errors.ECODE_NORES)
5865

    
5866

    
5867
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5868
  """Checks if nodes have enough free disk space in the all VGs.
5869

5870
  This function check if all given nodes have the needed amount of
5871
  free disk. In case any node has less disk or we cannot get the
5872
  information from the node, this function raise an OpPrereqError
5873
  exception.
5874

5875
  @type lu: C{LogicalUnit}
5876
  @param lu: a logical unit from which we get configuration data
5877
  @type nodenames: C{list}
5878
  @param nodenames: the list of node names to check
5879
  @type req_sizes: C{dict}
5880
  @param req_sizes: the hash of vg and corresponding amount of disk in
5881
      MiB to check for
5882
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5883
      or we cannot check the node
5884

5885
  """
5886
  for vg, req_size in req_sizes.items():
5887
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5888

    
5889

    
5890
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5891
  """Checks if nodes have enough free disk space in the specified VG.
5892

5893
  This function check if all given nodes have the needed amount of
5894
  free disk. In case any node has less disk or we cannot get the
5895
  information from the node, this function raise an OpPrereqError
5896
  exception.
5897

5898
  @type lu: C{LogicalUnit}
5899
  @param lu: a logical unit from which we get configuration data
5900
  @type nodenames: C{list}
5901
  @param nodenames: the list of node names to check
5902
  @type vg: C{str}
5903
  @param vg: the volume group to check
5904
  @type requested: C{int}
5905
  @param requested: the amount of disk in MiB to check for
5906
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5907
      or we cannot check the node
5908

5909
  """
5910
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5911
  for node in nodenames:
5912
    info = nodeinfo[node]
5913
    info.Raise("Cannot get current information from node %s" % node,
5914
               prereq=True, ecode=errors.ECODE_ENVIRON)
5915
    vg_free = info.payload.get("vg_free", None)
5916
    if not isinstance(vg_free, int):
5917
      raise errors.OpPrereqError("Can't compute free disk space on node"
5918
                                 " %s for vg %s, result was '%s'" %
5919
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5920
    if requested > vg_free:
5921
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5922
                                 " vg %s: required %d MiB, available %d MiB" %
5923
                                 (node, vg, requested, vg_free),
5924
                                 errors.ECODE_NORES)
5925

    
5926

    
5927
class LUInstanceStartup(LogicalUnit):
5928
  """Starts an instance.
5929

5930
  """
5931
  HPATH = "instance-start"
5932
  HTYPE = constants.HTYPE_INSTANCE
5933
  REQ_BGL = False
5934

    
5935
  def CheckArguments(self):
5936
    # extra beparams
5937
    if self.op.beparams:
5938
      # fill the beparams dict
5939
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5940

    
5941
  def ExpandNames(self):
5942
    self._ExpandAndLockInstance()
5943

    
5944
  def BuildHooksEnv(self):
5945
    """Build hooks env.
5946

5947
    This runs on master, primary and secondary nodes of the instance.
5948

5949
    """
5950
    env = {
5951
      "FORCE": self.op.force,
5952
      }
5953

    
5954
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5955

    
5956
    return env
5957

    
5958
  def BuildHooksNodes(self):
5959
    """Build hooks nodes.
5960

5961
    """
5962
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5963
    return (nl, nl)
5964

    
5965
  def CheckPrereq(self):
5966
    """Check prerequisites.
5967

5968
    This checks that the instance is in the cluster.
5969

5970
    """
5971
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5972
    assert self.instance is not None, \
5973
      "Cannot retrieve locked instance %s" % self.op.instance_name
5974

    
5975
    # extra hvparams
5976
    if self.op.hvparams:
5977
      # check hypervisor parameter syntax (locally)
5978
      cluster = self.cfg.GetClusterInfo()
5979
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5980
      filled_hvp = cluster.FillHV(instance)
5981
      filled_hvp.update(self.op.hvparams)
5982
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5983
      hv_type.CheckParameterSyntax(filled_hvp)
5984
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5985

    
5986
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5987

    
5988
    if self.primary_offline and self.op.ignore_offline_nodes:
5989
      self.proc.LogWarning("Ignoring offline primary node")
5990

    
5991
      if self.op.hvparams or self.op.beparams:
5992
        self.proc.LogWarning("Overridden parameters are ignored")
5993
    else:
5994
      _CheckNodeOnline(self, instance.primary_node)
5995

    
5996
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5997

    
5998
      # check bridges existence
5999
      _CheckInstanceBridgesExist(self, instance)
6000

    
6001
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6002
                                                instance.name,
6003
                                                instance.hypervisor)
6004
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6005
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6006
      if not remote_info.payload: # not running already
6007
        _CheckNodeFreeMemory(self, instance.primary_node,
6008
                             "starting instance %s" % instance.name,
6009
                             bep[constants.BE_MEMORY], instance.hypervisor)
6010

    
6011
  def Exec(self, feedback_fn):
6012
    """Start the instance.
6013

6014
    """
6015
    instance = self.instance
6016
    force = self.op.force
6017

    
6018
    if not self.op.no_remember:
6019
      self.cfg.MarkInstanceUp(instance.name)
6020

    
6021
    if self.primary_offline:
6022
      assert self.op.ignore_offline_nodes
6023
      self.proc.LogInfo("Primary node offline, marked instance as started")
6024
    else:
6025
      node_current = instance.primary_node
6026

    
6027
      _StartInstanceDisks(self, instance, force)
6028

    
6029
      result = self.rpc.call_instance_start(node_current, instance,
6030
                                            self.op.hvparams, self.op.beparams,
6031
                                            self.op.startup_paused)
6032
      msg = result.fail_msg
6033
      if msg:
6034
        _ShutdownInstanceDisks(self, instance)
6035
        raise errors.OpExecError("Could not start instance: %s" % msg)
6036

    
6037

    
6038
class LUInstanceReboot(LogicalUnit):
6039
  """Reboot an instance.
6040

6041
  """
6042
  HPATH = "instance-reboot"
6043
  HTYPE = constants.HTYPE_INSTANCE
6044
  REQ_BGL = False
6045

    
6046
  def ExpandNames(self):
6047
    self._ExpandAndLockInstance()
6048

    
6049
  def BuildHooksEnv(self):
6050
    """Build hooks env.
6051

6052
    This runs on master, primary and secondary nodes of the instance.
6053

6054
    """
6055
    env = {
6056
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6057
      "REBOOT_TYPE": self.op.reboot_type,
6058
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6059
      }
6060

    
6061
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6062

    
6063
    return env
6064

    
6065
  def BuildHooksNodes(self):
6066
    """Build hooks nodes.
6067

6068
    """
6069
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6070
    return (nl, nl)
6071

    
6072
  def CheckPrereq(self):
6073
    """Check prerequisites.
6074

6075
    This checks that the instance is in the cluster.
6076

6077
    """
6078
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6079
    assert self.instance is not None, \
6080
      "Cannot retrieve locked instance %s" % self.op.instance_name
6081

    
6082
    _CheckNodeOnline(self, instance.primary_node)
6083

    
6084
    # check bridges existence
6085
    _CheckInstanceBridgesExist(self, instance)
6086

    
6087
  def Exec(self, feedback_fn):
6088
    """Reboot the instance.
6089

6090
    """
6091
    instance = self.instance
6092
    ignore_secondaries = self.op.ignore_secondaries
6093
    reboot_type = self.op.reboot_type
6094

    
6095
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6096
                                              instance.name,
6097
                                              instance.hypervisor)
6098
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6099
    instance_running = bool(remote_info.payload)
6100

    
6101
    node_current = instance.primary_node
6102

    
6103
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6104
                                            constants.INSTANCE_REBOOT_HARD]:
6105
      for disk in instance.disks:
6106
        self.cfg.SetDiskID(disk, node_current)
6107
      result = self.rpc.call_instance_reboot(node_current, instance,
6108
                                             reboot_type,
6109
                                             self.op.shutdown_timeout)
6110
      result.Raise("Could not reboot instance")
6111
    else:
6112
      if instance_running:
6113
        result = self.rpc.call_instance_shutdown(node_current, instance,
6114
                                                 self.op.shutdown_timeout)
6115
        result.Raise("Could not shutdown instance for full reboot")
6116
        _ShutdownInstanceDisks(self, instance)
6117
      else:
6118
        self.LogInfo("Instance %s was already stopped, starting now",
6119
                     instance.name)
6120
      _StartInstanceDisks(self, instance, ignore_secondaries)
6121
      result = self.rpc.call_instance_start(node_current, instance,
6122
                                            None, None, False)
6123
      msg = result.fail_msg
6124
      if msg:
6125
        _ShutdownInstanceDisks(self, instance)
6126
        raise errors.OpExecError("Could not start instance for"
6127
                                 " full reboot: %s" % msg)
6128

    
6129
    self.cfg.MarkInstanceUp(instance.name)
6130

    
6131

    
6132
class LUInstanceShutdown(LogicalUnit):
6133
  """Shutdown an instance.
6134

6135
  """
6136
  HPATH = "instance-stop"
6137
  HTYPE = constants.HTYPE_INSTANCE
6138
  REQ_BGL = False
6139

    
6140
  def ExpandNames(self):
6141
    self._ExpandAndLockInstance()
6142

    
6143
  def BuildHooksEnv(self):
6144
    """Build hooks env.
6145

6146
    This runs on master, primary and secondary nodes of the instance.
6147

6148
    """
6149
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6150
    env["TIMEOUT"] = self.op.timeout
6151
    return env
6152

    
6153
  def BuildHooksNodes(self):
6154
    """Build hooks nodes.
6155

6156
    """
6157
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6158
    return (nl, nl)
6159

    
6160
  def CheckPrereq(self):
6161
    """Check prerequisites.
6162

6163
    This checks that the instance is in the cluster.
6164

6165
    """
6166
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6167
    assert self.instance is not None, \
6168
      "Cannot retrieve locked instance %s" % self.op.instance_name
6169

    
6170
    self.primary_offline = \
6171
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6172

    
6173
    if self.primary_offline and self.op.ignore_offline_nodes:
6174
      self.proc.LogWarning("Ignoring offline primary node")
6175
    else:
6176
      _CheckNodeOnline(self, self.instance.primary_node)
6177

    
6178
  def Exec(self, feedback_fn):
6179
    """Shutdown the instance.
6180

6181
    """
6182
    instance = self.instance
6183
    node_current = instance.primary_node
6184
    timeout = self.op.timeout
6185

    
6186
    if not self.op.no_remember:
6187
      self.cfg.MarkInstanceDown(instance.name)
6188

    
6189
    if self.primary_offline:
6190
      assert self.op.ignore_offline_nodes
6191
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6192
    else:
6193
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6194
      msg = result.fail_msg
6195
      if msg:
6196
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6197

    
6198
      _ShutdownInstanceDisks(self, instance)
6199

    
6200

    
6201
class LUInstanceReinstall(LogicalUnit):
6202
  """Reinstall an instance.
6203

6204
  """
6205
  HPATH = "instance-reinstall"
6206
  HTYPE = constants.HTYPE_INSTANCE
6207
  REQ_BGL = False
6208

    
6209
  def ExpandNames(self):
6210
    self._ExpandAndLockInstance()
6211

    
6212
  def BuildHooksEnv(self):
6213
    """Build hooks env.
6214

6215
    This runs on master, primary and secondary nodes of the instance.
6216

6217
    """
6218
    return _BuildInstanceHookEnvByObject(self, self.instance)
6219

    
6220
  def BuildHooksNodes(self):
6221
    """Build hooks nodes.
6222

6223
    """
6224
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6225
    return (nl, nl)
6226

    
6227
  def CheckPrereq(self):
6228
    """Check prerequisites.
6229

6230
    This checks that the instance is in the cluster and is not running.
6231

6232
    """
6233
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6234
    assert instance is not None, \
6235
      "Cannot retrieve locked instance %s" % self.op.instance_name
6236
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6237
                     " offline, cannot reinstall")
6238
    for node in instance.secondary_nodes:
6239
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6240
                       " cannot reinstall")
6241

    
6242
    if instance.disk_template == constants.DT_DISKLESS:
6243
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6244
                                 self.op.instance_name,
6245
                                 errors.ECODE_INVAL)
6246
    _CheckInstanceDown(self, instance, "cannot reinstall")
6247

    
6248
    if self.op.os_type is not None:
6249
      # OS verification
6250
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6251
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6252
      instance_os = self.op.os_type
6253
    else:
6254
      instance_os = instance.os
6255

    
6256
    nodelist = list(instance.all_nodes)
6257

    
6258
    if self.op.osparams:
6259
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6260
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6261
      self.os_inst = i_osdict # the new dict (without defaults)
6262
    else:
6263
      self.os_inst = None
6264

    
6265
    self.instance = instance
6266

    
6267
  def Exec(self, feedback_fn):
6268
    """Reinstall the instance.
6269

6270
    """
6271
    inst = self.instance
6272

    
6273
    if self.op.os_type is not None:
6274
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6275
      inst.os = self.op.os_type
6276
      # Write to configuration
6277
      self.cfg.Update(inst, feedback_fn)
6278

    
6279
    _StartInstanceDisks(self, inst, None)
6280
    try:
6281
      feedback_fn("Running the instance OS create scripts...")
6282
      # FIXME: pass debug option from opcode to backend
6283
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6284
                                             self.op.debug_level,
6285
                                             osparams=self.os_inst)
6286
      result.Raise("Could not install OS for instance %s on node %s" %
6287
                   (inst.name, inst.primary_node))
6288
    finally:
6289
      _ShutdownInstanceDisks(self, inst)
6290

    
6291

    
6292
class LUInstanceRecreateDisks(LogicalUnit):
6293
  """Recreate an instance's missing disks.
6294

6295
  """
6296
  HPATH = "instance-recreate-disks"
6297
  HTYPE = constants.HTYPE_INSTANCE
6298
  REQ_BGL = False
6299

    
6300
  def CheckArguments(self):
6301
    # normalise the disk list
6302
    self.op.disks = sorted(frozenset(self.op.disks))
6303

    
6304
  def ExpandNames(self):
6305
    self._ExpandAndLockInstance()
6306
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6307
    if self.op.nodes:
6308
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6309
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6310
    else:
6311
      self.needed_locks[locking.LEVEL_NODE] = []
6312

    
6313
  def DeclareLocks(self, level):
6314
    if level == locking.LEVEL_NODE:
6315
      # if we replace the nodes, we only need to lock the old primary,
6316
      # otherwise we need to lock all nodes for disk re-creation
6317
      primary_only = bool(self.op.nodes)
6318
      self._LockInstancesNodes(primary_only=primary_only)
6319

    
6320
  def BuildHooksEnv(self):
6321
    """Build hooks env.
6322

6323
    This runs on master, primary and secondary nodes of the instance.
6324

6325
    """
6326
    return _BuildInstanceHookEnvByObject(self, self.instance)
6327

    
6328
  def BuildHooksNodes(self):
6329
    """Build hooks nodes.
6330

6331
    """
6332
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6333
    return (nl, nl)
6334

    
6335
  def CheckPrereq(self):
6336
    """Check prerequisites.
6337

6338
    This checks that the instance is in the cluster and is not running.
6339

6340
    """
6341
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6342
    assert instance is not None, \
6343
      "Cannot retrieve locked instance %s" % self.op.instance_name
6344
    if self.op.nodes:
6345
      if len(self.op.nodes) != len(instance.all_nodes):
6346
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6347
                                   " %d replacement nodes were specified" %
6348
                                   (instance.name, len(instance.all_nodes),
6349
                                    len(self.op.nodes)),
6350
                                   errors.ECODE_INVAL)
6351
      assert instance.disk_template != constants.DT_DRBD8 or \
6352
          len(self.op.nodes) == 2
6353
      assert instance.disk_template != constants.DT_PLAIN or \
6354
          len(self.op.nodes) == 1
6355
      primary_node = self.op.nodes[0]
6356
    else:
6357
      primary_node = instance.primary_node
6358
    _CheckNodeOnline(self, primary_node)
6359

    
6360
    if instance.disk_template == constants.DT_DISKLESS:
6361
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6362
                                 self.op.instance_name, errors.ECODE_INVAL)
6363
    # if we replace nodes *and* the old primary is offline, we don't
6364
    # check
6365
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6366
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6367
    if not (self.op.nodes and old_pnode.offline):
6368
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6369

    
6370
    if not self.op.disks:
6371
      self.op.disks = range(len(instance.disks))
6372
    else:
6373
      for idx in self.op.disks:
6374
        if idx >= len(instance.disks):
6375
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6376
                                     errors.ECODE_INVAL)
6377
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6378
      raise errors.OpPrereqError("Can't recreate disks partially and"
6379
                                 " change the nodes at the same time",
6380
                                 errors.ECODE_INVAL)
6381
    self.instance = instance
6382

    
6383
  def Exec(self, feedback_fn):
6384
    """Recreate the disks.
6385

6386
    """
6387
    instance = self.instance
6388

    
6389
    to_skip = []
6390
    mods = [] # keeps track of needed logical_id changes
6391

    
6392
    for idx, disk in enumerate(instance.disks):
6393
      if idx not in self.op.disks: # disk idx has not been passed in
6394
        to_skip.append(idx)
6395
        continue
6396
      # update secondaries for disks, if needed
6397
      if self.op.nodes:
6398
        if disk.dev_type == constants.LD_DRBD8:
6399
          # need to update the nodes and minors
6400
          assert len(self.op.nodes) == 2
6401
          assert len(disk.logical_id) == 6 # otherwise disk internals
6402
                                           # have changed
6403
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6404
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6405
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6406
                    new_minors[0], new_minors[1], old_secret)
6407
          assert len(disk.logical_id) == len(new_id)
6408
          mods.append((idx, new_id))
6409

    
6410
    # now that we have passed all asserts above, we can apply the mods
6411
    # in a single run (to avoid partial changes)
6412
    for idx, new_id in mods:
6413
      instance.disks[idx].logical_id = new_id
6414

    
6415
    # change primary node, if needed
6416
    if self.op.nodes:
6417
      instance.primary_node = self.op.nodes[0]
6418
      self.LogWarning("Changing the instance's nodes, you will have to"
6419
                      " remove any disks left on the older nodes manually")
6420

    
6421
    if self.op.nodes:
6422
      self.cfg.Update(instance, feedback_fn)
6423

    
6424
    _CreateDisks(self, instance, to_skip=to_skip)
6425

    
6426

    
6427
class LUInstanceRename(LogicalUnit):
6428
  """Rename an instance.
6429

6430
  """
6431
  HPATH = "instance-rename"
6432
  HTYPE = constants.HTYPE_INSTANCE
6433

    
6434
  def CheckArguments(self):
6435
    """Check arguments.
6436

6437
    """
6438
    if self.op.ip_check and not self.op.name_check:
6439
      # TODO: make the ip check more flexible and not depend on the name check
6440
      raise errors.OpPrereqError("IP address check requires a name check",
6441
                                 errors.ECODE_INVAL)
6442

    
6443
  def BuildHooksEnv(self):
6444
    """Build hooks env.
6445

6446
    This runs on master, primary and secondary nodes of the instance.
6447

6448
    """
6449
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6450
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6451
    return env
6452

    
6453
  def BuildHooksNodes(self):
6454
    """Build hooks nodes.
6455

6456
    """
6457
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6458
    return (nl, nl)
6459

    
6460
  def CheckPrereq(self):
6461
    """Check prerequisites.
6462

6463
    This checks that the instance is in the cluster and is not running.
6464

6465
    """
6466
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6467
                                                self.op.instance_name)
6468
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6469
    assert instance is not None
6470
    _CheckNodeOnline(self, instance.primary_node)
6471
    _CheckInstanceDown(self, instance, "cannot rename")
6472
    self.instance = instance
6473

    
6474
    new_name = self.op.new_name
6475
    if self.op.name_check:
6476
      hostname = netutils.GetHostname(name=new_name)
6477
      if hostname.name != new_name:
6478
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6479
                     hostname.name)
6480
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6481
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6482
                                    " same as given hostname '%s'") %
6483
                                    (hostname.name, self.op.new_name),
6484
                                    errors.ECODE_INVAL)
6485
      new_name = self.op.new_name = hostname.name
6486
      if (self.op.ip_check and
6487
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6488
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6489
                                   (hostname.ip, new_name),
6490
                                   errors.ECODE_NOTUNIQUE)
6491

    
6492
    instance_list = self.cfg.GetInstanceList()
6493
    if new_name in instance_list and new_name != instance.name:
6494
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6495
                                 new_name, errors.ECODE_EXISTS)
6496

    
6497
  def Exec(self, feedback_fn):
6498
    """Rename the instance.
6499

6500
    """
6501
    inst = self.instance
6502
    old_name = inst.name
6503

    
6504
    rename_file_storage = False
6505
    if (inst.disk_template in constants.DTS_FILEBASED and
6506
        self.op.new_name != inst.name):
6507
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6508
      rename_file_storage = True
6509

    
6510
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6511
    # Change the instance lock. This is definitely safe while we hold the BGL.
6512
    # Otherwise the new lock would have to be added in acquired mode.
6513
    assert self.REQ_BGL
6514
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6515
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6516

    
6517
    # re-read the instance from the configuration after rename
6518
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6519

    
6520
    if rename_file_storage:
6521
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6522
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6523
                                                     old_file_storage_dir,
6524
                                                     new_file_storage_dir)
6525
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6526
                   " (but the instance has been renamed in Ganeti)" %
6527
                   (inst.primary_node, old_file_storage_dir,
6528
                    new_file_storage_dir))
6529

    
6530
    _StartInstanceDisks(self, inst, None)
6531
    try:
6532
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6533
                                                 old_name, self.op.debug_level)
6534
      msg = result.fail_msg
6535
      if msg:
6536
        msg = ("Could not run OS rename script for instance %s on node %s"
6537
               " (but the instance has been renamed in Ganeti): %s" %
6538
               (inst.name, inst.primary_node, msg))
6539
        self.proc.LogWarning(msg)
6540
    finally:
6541
      _ShutdownInstanceDisks(self, inst)
6542

    
6543
    return inst.name
6544

    
6545

    
6546
class LUInstanceRemove(LogicalUnit):
6547
  """Remove an instance.
6548

6549
  """
6550
  HPATH = "instance-remove"
6551
  HTYPE = constants.HTYPE_INSTANCE
6552
  REQ_BGL = False
6553

    
6554
  def ExpandNames(self):
6555
    self._ExpandAndLockInstance()
6556
    self.needed_locks[locking.LEVEL_NODE] = []
6557
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6558

    
6559
  def DeclareLocks(self, level):
6560
    if level == locking.LEVEL_NODE:
6561
      self._LockInstancesNodes()
6562

    
6563
  def BuildHooksEnv(self):
6564
    """Build hooks env.
6565

6566
    This runs on master, primary and secondary nodes of the instance.
6567

6568
    """
6569
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6570
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6571
    return env
6572

    
6573
  def BuildHooksNodes(self):
6574
    """Build hooks nodes.
6575

6576
    """
6577
    nl = [self.cfg.GetMasterNode()]
6578
    nl_post = list(self.instance.all_nodes) + nl
6579
    return (nl, nl_post)
6580

    
6581
  def CheckPrereq(self):
6582
    """Check prerequisites.
6583

6584
    This checks that the instance is in the cluster.
6585

6586
    """
6587
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6588
    assert self.instance is not None, \
6589
      "Cannot retrieve locked instance %s" % self.op.instance_name
6590

    
6591
  def Exec(self, feedback_fn):
6592
    """Remove the instance.
6593

6594
    """
6595
    instance = self.instance
6596
    logging.info("Shutting down instance %s on node %s",
6597
                 instance.name, instance.primary_node)
6598

    
6599
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6600
                                             self.op.shutdown_timeout)
6601
    msg = result.fail_msg
6602
    if msg:
6603
      if self.op.ignore_failures:
6604
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6605
      else:
6606
        raise errors.OpExecError("Could not shutdown instance %s on"
6607
                                 " node %s: %s" %
6608
                                 (instance.name, instance.primary_node, msg))
6609

    
6610
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6611

    
6612

    
6613
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6614
  """Utility function to remove an instance.
6615

6616
  """
6617
  logging.info("Removing block devices for instance %s", instance.name)
6618

    
6619
  if not _RemoveDisks(lu, instance):
6620
    if not ignore_failures:
6621
      raise errors.OpExecError("Can't remove instance's disks")
6622
    feedback_fn("Warning: can't remove instance's disks")
6623

    
6624
  logging.info("Removing instance %s out of cluster config", instance.name)
6625

    
6626
  lu.cfg.RemoveInstance(instance.name)
6627

    
6628
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6629
    "Instance lock removal conflict"
6630

    
6631
  # Remove lock for the instance
6632
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6633

    
6634

    
6635
class LUInstanceQuery(NoHooksLU):
6636
  """Logical unit for querying instances.
6637

6638
  """
6639
  # pylint: disable=W0142
6640
  REQ_BGL = False
6641

    
6642
  def CheckArguments(self):
6643
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6644
                             self.op.output_fields, self.op.use_locking)
6645

    
6646
  def ExpandNames(self):
6647
    self.iq.ExpandNames(self)
6648

    
6649
  def DeclareLocks(self, level):
6650
    self.iq.DeclareLocks(self, level)
6651

    
6652
  def Exec(self, feedback_fn):
6653
    return self.iq.OldStyleQuery(self)
6654

    
6655

    
6656
class LUInstanceFailover(LogicalUnit):
6657
  """Failover an instance.
6658

6659
  """
6660
  HPATH = "instance-failover"
6661
  HTYPE = constants.HTYPE_INSTANCE
6662
  REQ_BGL = False
6663

    
6664
  def CheckArguments(self):
6665
    """Check the arguments.
6666

6667
    """
6668
    self.iallocator = getattr(self.op, "iallocator", None)
6669
    self.target_node = getattr(self.op, "target_node", None)
6670

    
6671
  def ExpandNames(self):
6672
    self._ExpandAndLockInstance()
6673

    
6674
    if self.op.target_node is not None:
6675
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6676

    
6677
    self.needed_locks[locking.LEVEL_NODE] = []
6678
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6679

    
6680
    ignore_consistency = self.op.ignore_consistency
6681
    shutdown_timeout = self.op.shutdown_timeout
6682
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6683
                                       cleanup=False,
6684
                                       failover=True,
6685
                                       ignore_consistency=ignore_consistency,
6686
                                       shutdown_timeout=shutdown_timeout)
6687
    self.tasklets = [self._migrater]
6688

    
6689
  def DeclareLocks(self, level):
6690
    if level == locking.LEVEL_NODE:
6691
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6692
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6693
        if self.op.target_node is None:
6694
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6695
        else:
6696
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6697
                                                   self.op.target_node]
6698
        del self.recalculate_locks[locking.LEVEL_NODE]
6699
      else:
6700
        self._LockInstancesNodes()
6701

    
6702
  def BuildHooksEnv(self):
6703
    """Build hooks env.
6704

6705
    This runs on master, primary and secondary nodes of the instance.
6706

6707
    """
6708
    instance = self._migrater.instance
6709
    source_node = instance.primary_node
6710
    target_node = self.op.target_node
6711
    env = {
6712
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6713
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6714
      "OLD_PRIMARY": source_node,
6715
      "NEW_PRIMARY": target_node,
6716
      }
6717

    
6718
    if instance.disk_template in constants.DTS_INT_MIRROR:
6719
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6720
      env["NEW_SECONDARY"] = source_node
6721
    else:
6722
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6723

    
6724
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6725

    
6726
    return env
6727

    
6728
  def BuildHooksNodes(self):
6729
    """Build hooks nodes.
6730

6731
    """
6732
    instance = self._migrater.instance
6733
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6734
    return (nl, nl + [instance.primary_node])
6735

    
6736

    
6737
class LUInstanceMigrate(LogicalUnit):
6738
  """Migrate an instance.
6739

6740
  This is migration without shutting down, compared to the failover,
6741
  which is done with shutdown.
6742

6743
  """
6744
  HPATH = "instance-migrate"
6745
  HTYPE = constants.HTYPE_INSTANCE
6746
  REQ_BGL = False
6747

    
6748
  def ExpandNames(self):
6749
    self._ExpandAndLockInstance()
6750

    
6751
    if self.op.target_node is not None:
6752
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6753

    
6754
    self.needed_locks[locking.LEVEL_NODE] = []
6755
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6756

    
6757
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6758
                                       cleanup=self.op.cleanup,
6759
                                       failover=False,
6760
                                       fallback=self.op.allow_failover)
6761
    self.tasklets = [self._migrater]
6762

    
6763
  def DeclareLocks(self, level):
6764
    if level == locking.LEVEL_NODE:
6765
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6766
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6767
        if self.op.target_node is None:
6768
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6769
        else:
6770
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6771
                                                   self.op.target_node]
6772
        del self.recalculate_locks[locking.LEVEL_NODE]
6773
      else:
6774
        self._LockInstancesNodes()
6775

    
6776
  def BuildHooksEnv(self):
6777
    """Build hooks env.
6778

6779
    This runs on master, primary and secondary nodes of the instance.
6780

6781
    """
6782
    instance = self._migrater.instance
6783
    source_node = instance.primary_node
6784
    target_node = self.op.target_node
6785
    env = _BuildInstanceHookEnvByObject(self, instance)
6786
    env.update({
6787
      "MIGRATE_LIVE": self._migrater.live,
6788
      "MIGRATE_CLEANUP": self.op.cleanup,
6789
      "OLD_PRIMARY": source_node,
6790
      "NEW_PRIMARY": target_node,
6791
      })
6792

    
6793
    if instance.disk_template in constants.DTS_INT_MIRROR:
6794
      env["OLD_SECONDARY"] = target_node
6795
      env["NEW_SECONDARY"] = source_node
6796
    else:
6797
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6798

    
6799
    return env
6800

    
6801
  def BuildHooksNodes(self):
6802
    """Build hooks nodes.
6803

6804
    """
6805
    instance = self._migrater.instance
6806
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6807
    return (nl, nl + [instance.primary_node])
6808

    
6809

    
6810
class LUInstanceMove(LogicalUnit):
6811
  """Move an instance by data-copying.
6812

6813
  """
6814
  HPATH = "instance-move"
6815
  HTYPE = constants.HTYPE_INSTANCE
6816
  REQ_BGL = False
6817

    
6818
  def ExpandNames(self):
6819
    self._ExpandAndLockInstance()
6820
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6821
    self.op.target_node = target_node
6822
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6823
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6824

    
6825
  def DeclareLocks(self, level):
6826
    if level == locking.LEVEL_NODE:
6827
      self._LockInstancesNodes(primary_only=True)
6828

    
6829
  def BuildHooksEnv(self):
6830
    """Build hooks env.
6831

6832
    This runs on master, primary and secondary nodes of the instance.
6833

6834
    """
6835
    env = {
6836
      "TARGET_NODE": self.op.target_node,
6837
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6838
      }
6839
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6840
    return env
6841

    
6842
  def BuildHooksNodes(self):
6843
    """Build hooks nodes.
6844

6845
    """
6846
    nl = [
6847
      self.cfg.GetMasterNode(),
6848
      self.instance.primary_node,
6849
      self.op.target_node,
6850
      ]
6851
    return (nl, nl)
6852

    
6853
  def CheckPrereq(self):
6854
    """Check prerequisites.
6855

6856
    This checks that the instance is in the cluster.
6857

6858
    """
6859
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6860
    assert self.instance is not None, \
6861
      "Cannot retrieve locked instance %s" % self.op.instance_name
6862

    
6863
    node = self.cfg.GetNodeInfo(self.op.target_node)
6864
    assert node is not None, \
6865
      "Cannot retrieve locked node %s" % self.op.target_node
6866

    
6867
    self.target_node = target_node = node.name
6868

    
6869
    if target_node == instance.primary_node:
6870
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6871
                                 (instance.name, target_node),
6872
                                 errors.ECODE_STATE)
6873

    
6874
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6875

    
6876
    for idx, dsk in enumerate(instance.disks):
6877
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6878
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6879
                                   " cannot copy" % idx, errors.ECODE_STATE)
6880

    
6881
    _CheckNodeOnline(self, target_node)
6882
    _CheckNodeNotDrained(self, target_node)
6883
    _CheckNodeVmCapable(self, target_node)
6884

    
6885
    if instance.admin_up:
6886
      # check memory requirements on the secondary node
6887
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6888
                           instance.name, bep[constants.BE_MEMORY],
6889
                           instance.hypervisor)
6890
    else:
6891
      self.LogInfo("Not checking memory on the secondary node as"
6892
                   " instance will not be started")
6893

    
6894
    # check bridge existance
6895
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6896

    
6897
  def Exec(self, feedback_fn):
6898
    """Move an instance.
6899

6900
    The move is done by shutting it down on its present node, copying
6901
    the data over (slow) and starting it on the new node.
6902

6903
    """
6904
    instance = self.instance
6905

    
6906
    source_node = instance.primary_node
6907
    target_node = self.target_node
6908

    
6909
    self.LogInfo("Shutting down instance %s on source node %s",
6910
                 instance.name, source_node)
6911

    
6912
    result = self.rpc.call_instance_shutdown(source_node, instance,
6913
                                             self.op.shutdown_timeout)
6914
    msg = result.fail_msg
6915
    if msg:
6916
      if self.op.ignore_consistency:
6917
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6918
                             " Proceeding anyway. Please make sure node"
6919
                             " %s is down. Error details: %s",
6920
                             instance.name, source_node, source_node, msg)
6921
      else:
6922
        raise errors.OpExecError("Could not shutdown instance %s on"
6923
                                 " node %s: %s" %
6924
                                 (instance.name, source_node, msg))
6925

    
6926
    # create the target disks
6927
    try:
6928
      _CreateDisks(self, instance, target_node=target_node)
6929
    except errors.OpExecError:
6930
      self.LogWarning("Device creation failed, reverting...")
6931
      try:
6932
        _RemoveDisks(self, instance, target_node=target_node)
6933
      finally:
6934
        self.cfg.ReleaseDRBDMinors(instance.name)
6935
        raise
6936

    
6937
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6938

    
6939
    errs = []
6940
    # activate, get path, copy the data over
6941
    for idx, disk in enumerate(instance.disks):
6942
      self.LogInfo("Copying data for disk %d", idx)
6943
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6944
                                               instance.name, True, idx)
6945
      if result.fail_msg:
6946
        self.LogWarning("Can't assemble newly created disk %d: %s",
6947
                        idx, result.fail_msg)
6948
        errs.append(result.fail_msg)
6949
        break
6950
      dev_path = result.payload
6951
      result = self.rpc.call_blockdev_export(source_node, disk,
6952
                                             target_node, dev_path,
6953
                                             cluster_name)
6954
      if result.fail_msg:
6955
        self.LogWarning("Can't copy data over for disk %d: %s",
6956
                        idx, result.fail_msg)
6957
        errs.append(result.fail_msg)
6958
        break
6959

    
6960
    if errs:
6961
      self.LogWarning("Some disks failed to copy, aborting")
6962
      try:
6963
        _RemoveDisks(self, instance, target_node=target_node)
6964
      finally:
6965
        self.cfg.ReleaseDRBDMinors(instance.name)
6966
        raise errors.OpExecError("Errors during disk copy: %s" %
6967
                                 (",".join(errs),))
6968

    
6969
    instance.primary_node = target_node
6970
    self.cfg.Update(instance, feedback_fn)
6971

    
6972
    self.LogInfo("Removing the disks on the original node")
6973
    _RemoveDisks(self, instance, target_node=source_node)
6974

    
6975
    # Only start the instance if it's marked as up
6976
    if instance.admin_up:
6977
      self.LogInfo("Starting instance %s on node %s",
6978
                   instance.name, target_node)
6979

    
6980
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6981
                                           ignore_secondaries=True)
6982
      if not disks_ok:
6983
        _ShutdownInstanceDisks(self, instance)
6984
        raise errors.OpExecError("Can't activate the instance's disks")
6985

    
6986
      result = self.rpc.call_instance_start(target_node, instance,
6987
                                            None, None, False)
6988
      msg = result.fail_msg
6989
      if msg:
6990
        _ShutdownInstanceDisks(self, instance)
6991
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6992
                                 (instance.name, target_node, msg))
6993

    
6994

    
6995
class LUNodeMigrate(LogicalUnit):
6996
  """Migrate all instances from a node.
6997

6998
  """
6999
  HPATH = "node-migrate"
7000
  HTYPE = constants.HTYPE_NODE
7001
  REQ_BGL = False
7002

    
7003
  def CheckArguments(self):
7004
    pass
7005

    
7006
  def ExpandNames(self):
7007
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7008

    
7009
    self.share_locks = _ShareAll()
7010
    self.needed_locks = {
7011
      locking.LEVEL_NODE: [self.op.node_name],
7012
      }
7013

    
7014
  def BuildHooksEnv(self):
7015
    """Build hooks env.
7016

7017
    This runs on the master, the primary and all the secondaries.
7018

7019
    """
7020
    return {
7021
      "NODE_NAME": self.op.node_name,
7022
      }
7023

    
7024
  def BuildHooksNodes(self):
7025
    """Build hooks nodes.
7026

7027
    """
7028
    nl = [self.cfg.GetMasterNode()]
7029
    return (nl, nl)
7030

    
7031
  def CheckPrereq(self):
7032
    pass
7033

    
7034
  def Exec(self, feedback_fn):
7035
    # Prepare jobs for migration instances
7036
    jobs = [
7037
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7038
                                 mode=self.op.mode,
7039
                                 live=self.op.live,
7040
                                 iallocator=self.op.iallocator,
7041
                                 target_node=self.op.target_node)]
7042
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7043
      ]
7044

    
7045
    # TODO: Run iallocator in this opcode and pass correct placement options to
7046
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7047
    # running the iallocator and the actual migration, a good consistency model
7048
    # will have to be found.
7049

    
7050
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7051
            frozenset([self.op.node_name]))
7052

    
7053
    return ResultWithJobs(jobs)
7054

    
7055

    
7056
class TLMigrateInstance(Tasklet):
7057
  """Tasklet class for instance migration.
7058

7059
  @type live: boolean
7060
  @ivar live: whether the migration will be done live or non-live;
7061
      this variable is initalized only after CheckPrereq has run
7062
  @type cleanup: boolean
7063
  @ivar cleanup: Wheater we cleanup from a failed migration
7064
  @type iallocator: string
7065
  @ivar iallocator: The iallocator used to determine target_node
7066
  @type target_node: string
7067
  @ivar target_node: If given, the target_node to reallocate the instance to
7068
  @type failover: boolean
7069
  @ivar failover: Whether operation results in failover or migration
7070
  @type fallback: boolean
7071
  @ivar fallback: Whether fallback to failover is allowed if migration not
7072
                  possible
7073
  @type ignore_consistency: boolean
7074
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7075
                            and target node
7076
  @type shutdown_timeout: int
7077
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7078

7079
  """
7080
  def __init__(self, lu, instance_name, cleanup=False,
7081
               failover=False, fallback=False,
7082
               ignore_consistency=False,
7083
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7084
    """Initializes this class.
7085

7086
    """
7087
    Tasklet.__init__(self, lu)
7088

    
7089
    # Parameters
7090
    self.instance_name = instance_name
7091
    self.cleanup = cleanup
7092
    self.live = False # will be overridden later
7093
    self.failover = failover
7094
    self.fallback = fallback
7095
    self.ignore_consistency = ignore_consistency
7096
    self.shutdown_timeout = shutdown_timeout
7097

    
7098
  def CheckPrereq(self):
7099
    """Check prerequisites.
7100

7101
    This checks that the instance is in the cluster.
7102

7103
    """
7104
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7105
    instance = self.cfg.GetInstanceInfo(instance_name)
7106
    assert instance is not None
7107
    self.instance = instance
7108

    
7109
    if (not self.cleanup and not instance.admin_up and not self.failover and
7110
        self.fallback):
7111
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7112
                      " to failover")
7113
      self.failover = True
7114

    
7115
    if instance.disk_template not in constants.DTS_MIRRORED:
7116
      if self.failover:
7117
        text = "failovers"
7118
      else:
7119
        text = "migrations"
7120
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7121
                                 " %s" % (instance.disk_template, text),
7122
                                 errors.ECODE_STATE)
7123

    
7124
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7125
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7126

    
7127
      if self.lu.op.iallocator:
7128
        self._RunAllocator()
7129
      else:
7130
        # We set set self.target_node as it is required by
7131
        # BuildHooksEnv
7132
        self.target_node = self.lu.op.target_node
7133

    
7134
      # self.target_node is already populated, either directly or by the
7135
      # iallocator run
7136
      target_node = self.target_node
7137
      if self.target_node == instance.primary_node:
7138
        raise errors.OpPrereqError("Cannot migrate instance %s"
7139
                                   " to its primary (%s)" %
7140
                                   (instance.name, instance.primary_node))
7141

    
7142
      if len(self.lu.tasklets) == 1:
7143
        # It is safe to release locks only when we're the only tasklet
7144
        # in the LU
7145
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7146
                      keep=[instance.primary_node, self.target_node])
7147

    
7148
    else:
7149
      secondary_nodes = instance.secondary_nodes
7150
      if not secondary_nodes:
7151
        raise errors.ConfigurationError("No secondary node but using"
7152
                                        " %s disk template" %
7153
                                        instance.disk_template)
7154
      target_node = secondary_nodes[0]
7155
      if self.lu.op.iallocator or (self.lu.op.target_node and
7156
                                   self.lu.op.target_node != target_node):
7157
        if self.failover:
7158
          text = "failed over"
7159
        else:
7160
          text = "migrated"
7161
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7162
                                   " be %s to arbitrary nodes"
7163
                                   " (neither an iallocator nor a target"
7164
                                   " node can be passed)" %
7165
                                   (instance.disk_template, text),
7166
                                   errors.ECODE_INVAL)
7167

    
7168
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7169

    
7170
    # check memory requirements on the secondary node
7171
    if not self.cleanup and (not self.failover or instance.admin_up):
7172
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7173
                           instance.name, i_be[constants.BE_MEMORY],
7174
                           instance.hypervisor)
7175
    else:
7176
      self.lu.LogInfo("Not checking memory on the secondary node as"
7177
                      " instance will not be started")
7178

    
7179
    # check bridge existance
7180
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7181

    
7182
    if not self.cleanup:
7183
      _CheckNodeNotDrained(self.lu, target_node)
7184
      if not self.failover:
7185
        result = self.rpc.call_instance_migratable(instance.primary_node,
7186
                                                   instance)
7187
        if result.fail_msg and self.fallback:
7188
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7189
                          " failover")
7190
          self.failover = True
7191
        else:
7192
          result.Raise("Can't migrate, please use failover",
7193
                       prereq=True, ecode=errors.ECODE_STATE)
7194

    
7195
    assert not (self.failover and self.cleanup)
7196

    
7197
    if not self.failover:
7198
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7199
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7200
                                   " parameters are accepted",
7201
                                   errors.ECODE_INVAL)
7202
      if self.lu.op.live is not None:
7203
        if self.lu.op.live:
7204
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7205
        else:
7206
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7207
        # reset the 'live' parameter to None so that repeated
7208
        # invocations of CheckPrereq do not raise an exception
7209
        self.lu.op.live = None
7210
      elif self.lu.op.mode is None:
7211
        # read the default value from the hypervisor
7212
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7213
                                                skip_globals=False)
7214
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7215

    
7216
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7217
    else:
7218
      # Failover is never live
7219
      self.live = False
7220

    
7221
  def _RunAllocator(self):
7222
    """Run the allocator based on input opcode.
7223

7224
    """
7225
    ial = IAllocator(self.cfg, self.rpc,
7226
                     mode=constants.IALLOCATOR_MODE_RELOC,
7227
                     name=self.instance_name,
7228
                     # TODO See why hail breaks with a single node below
7229
                     relocate_from=[self.instance.primary_node,
7230
                                    self.instance.primary_node],
7231
                     )
7232

    
7233
    ial.Run(self.lu.op.iallocator)
7234

    
7235
    if not ial.success:
7236
      raise errors.OpPrereqError("Can't compute nodes using"
7237
                                 " iallocator '%s': %s" %
7238
                                 (self.lu.op.iallocator, ial.info),
7239
                                 errors.ECODE_NORES)
7240
    if len(ial.result) != ial.required_nodes:
7241
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7242
                                 " of nodes (%s), required %s" %
7243
                                 (self.lu.op.iallocator, len(ial.result),
7244
                                  ial.required_nodes), errors.ECODE_FAULT)
7245
    self.target_node = ial.result[0]
7246
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7247
                 self.instance_name, self.lu.op.iallocator,
7248
                 utils.CommaJoin(ial.result))
7249

    
7250
  def _WaitUntilSync(self):
7251
    """Poll with custom rpc for disk sync.
7252

7253
    This uses our own step-based rpc call.
7254

7255
    """
7256
    self.feedback_fn("* wait until resync is done")
7257
    all_done = False
7258
    while not all_done:
7259
      all_done = True
7260
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7261
                                            self.nodes_ip,
7262
                                            self.instance.disks)
7263
      min_percent = 100
7264
      for node, nres in result.items():
7265
        nres.Raise("Cannot resync disks on node %s" % node)
7266
        node_done, node_percent = nres.payload
7267
        all_done = all_done and node_done
7268
        if node_percent is not None:
7269
          min_percent = min(min_percent, node_percent)
7270
      if not all_done:
7271
        if min_percent < 100:
7272
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7273
        time.sleep(2)
7274

    
7275
  def _EnsureSecondary(self, node):
7276
    """Demote a node to secondary.
7277

7278
    """
7279
    self.feedback_fn("* switching node %s to secondary mode" % node)
7280

    
7281
    for dev in self.instance.disks:
7282
      self.cfg.SetDiskID(dev, node)
7283

    
7284
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7285
                                          self.instance.disks)
7286
    result.Raise("Cannot change disk to secondary on node %s" % node)
7287

    
7288
  def _GoStandalone(self):
7289
    """Disconnect from the network.
7290

7291
    """
7292
    self.feedback_fn("* changing into standalone mode")
7293
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7294
                                               self.instance.disks)
7295
    for node, nres in result.items():
7296
      nres.Raise("Cannot disconnect disks node %s" % node)
7297

    
7298
  def _GoReconnect(self, multimaster):
7299
    """Reconnect to the network.
7300

7301
    """
7302
    if multimaster:
7303
      msg = "dual-master"
7304
    else:
7305
      msg = "single-master"
7306
    self.feedback_fn("* changing disks into %s mode" % msg)
7307
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7308
                                           self.instance.disks,
7309
                                           self.instance.name, multimaster)
7310
    for node, nres in result.items():
7311
      nres.Raise("Cannot change disks config on node %s" % node)
7312

    
7313
  def _ExecCleanup(self):
7314
    """Try to cleanup after a failed migration.
7315

7316
    The cleanup is done by:
7317
      - check that the instance is running only on one node
7318
        (and update the config if needed)
7319
      - change disks on its secondary node to secondary
7320
      - wait until disks are fully synchronized
7321
      - disconnect from the network
7322
      - change disks into single-master mode
7323
      - wait again until disks are fully synchronized
7324

7325
    """
7326
    instance = self.instance
7327
    target_node = self.target_node
7328
    source_node = self.source_node
7329

    
7330
    # check running on only one node
7331
    self.feedback_fn("* checking where the instance actually runs"
7332
                     " (if this hangs, the hypervisor might be in"
7333
                     " a bad state)")
7334
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7335
    for node, result in ins_l.items():
7336
      result.Raise("Can't contact node %s" % node)
7337

    
7338
    runningon_source = instance.name in ins_l[source_node].payload
7339
    runningon_target = instance.name in ins_l[target_node].payload
7340

    
7341
    if runningon_source and runningon_target:
7342
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7343
                               " or the hypervisor is confused; you will have"
7344
                               " to ensure manually that it runs only on one"
7345
                               " and restart this operation")
7346

    
7347
    if not (runningon_source or runningon_target):
7348
      raise errors.OpExecError("Instance does not seem to be running at all;"
7349
                               " in this case it's safer to repair by"
7350
                               " running 'gnt-instance stop' to ensure disk"
7351
                               " shutdown, and then restarting it")
7352

    
7353
    if runningon_target:
7354
      # the migration has actually succeeded, we need to update the config
7355
      self.feedback_fn("* instance running on secondary node (%s),"
7356
                       " updating config" % target_node)
7357
      instance.primary_node = target_node
7358
      self.cfg.Update(instance, self.feedback_fn)
7359
      demoted_node = source_node
7360
    else:
7361
      self.feedback_fn("* instance confirmed to be running on its"
7362
                       " primary node (%s)" % source_node)
7363
      demoted_node = target_node
7364

    
7365
    if instance.disk_template in constants.DTS_INT_MIRROR:
7366
      self._EnsureSecondary(demoted_node)
7367
      try:
7368
        self._WaitUntilSync()
7369
      except errors.OpExecError:
7370
        # we ignore here errors, since if the device is standalone, it
7371
        # won't be able to sync
7372
        pass
7373
      self._GoStandalone()
7374
      self._GoReconnect(False)
7375
      self._WaitUntilSync()
7376

    
7377
    self.feedback_fn("* done")
7378

    
7379
  def _RevertDiskStatus(self):
7380
    """Try to revert the disk status after a failed migration.
7381

7382
    """
7383
    target_node = self.target_node
7384
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7385
      return
7386

    
7387
    try:
7388
      self._EnsureSecondary(target_node)
7389
      self._GoStandalone()
7390
      self._GoReconnect(False)
7391
      self._WaitUntilSync()
7392
    except errors.OpExecError, err:
7393
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7394
                         " please try to recover the instance manually;"
7395
                         " error '%s'" % str(err))
7396

    
7397
  def _AbortMigration(self):
7398
    """Call the hypervisor code to abort a started migration.
7399

7400
    """
7401
    instance = self.instance
7402
    target_node = self.target_node
7403
    migration_info = self.migration_info
7404

    
7405
    abort_result = self.rpc.call_finalize_migration(target_node,
7406
                                                    instance,
7407
                                                    migration_info,
7408
                                                    False)
7409
    abort_msg = abort_result.fail_msg
7410
    if abort_msg:
7411
      logging.error("Aborting migration failed on target node %s: %s",
7412
                    target_node, abort_msg)
7413
      # Don't raise an exception here, as we stil have to try to revert the
7414
      # disk status, even if this step failed.
7415

    
7416
  def _ExecMigration(self):
7417
    """Migrate an instance.
7418

7419
    The migrate is done by:
7420
      - change the disks into dual-master mode
7421
      - wait until disks are fully synchronized again
7422
      - migrate the instance
7423
      - change disks on the new secondary node (the old primary) to secondary
7424
      - wait until disks are fully synchronized
7425
      - change disks into single-master mode
7426

7427
    """
7428
    instance = self.instance
7429
    target_node = self.target_node
7430
    source_node = self.source_node
7431

    
7432
    # Check for hypervisor version mismatch and warn the user.
7433
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7434
                                       None, self.instance.hypervisor)
7435
    src_info = nodeinfo[source_node]
7436
    dst_info = nodeinfo[target_node]
7437

    
7438
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7439
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7440
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7441
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7442
      if src_version != dst_version:
7443
        self.feedback_fn("* warning: hypervisor version mismatch between"
7444
                         " source (%s) and target (%s) node" %
7445
                         (src_version, dst_version))
7446

    
7447
    self.feedback_fn("* checking disk consistency between source and target")
7448
    for dev in instance.disks:
7449
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7450
        raise errors.OpExecError("Disk %s is degraded or not fully"
7451
                                 " synchronized on target node,"
7452
                                 " aborting migration" % dev.iv_name)
7453

    
7454
    # First get the migration information from the remote node
7455
    result = self.rpc.call_migration_info(source_node, instance)
7456
    msg = result.fail_msg
7457
    if msg:
7458
      log_err = ("Failed fetching source migration information from %s: %s" %
7459
                 (source_node, msg))
7460
      logging.error(log_err)
7461
      raise errors.OpExecError(log_err)
7462

    
7463
    self.migration_info = migration_info = result.payload
7464

    
7465
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7466
      # Then switch the disks to master/master mode
7467
      self._EnsureSecondary(target_node)
7468
      self._GoStandalone()
7469
      self._GoReconnect(True)
7470
      self._WaitUntilSync()
7471

    
7472
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7473
    result = self.rpc.call_accept_instance(target_node,
7474
                                           instance,
7475
                                           migration_info,
7476
                                           self.nodes_ip[target_node])
7477

    
7478
    msg = result.fail_msg
7479
    if msg:
7480
      logging.error("Instance pre-migration failed, trying to revert"
7481
                    " disk status: %s", msg)
7482
      self.feedback_fn("Pre-migration failed, aborting")
7483
      self._AbortMigration()
7484
      self._RevertDiskStatus()
7485
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7486
                               (instance.name, msg))
7487

    
7488
    self.feedback_fn("* migrating instance to %s" % target_node)
7489
    result = self.rpc.call_instance_migrate(source_node, instance,
7490
                                            self.nodes_ip[target_node],
7491
                                            self.live)
7492
    msg = result.fail_msg
7493
    if msg:
7494
      logging.error("Instance migration failed, trying to revert"
7495
                    " disk status: %s", msg)
7496
      self.feedback_fn("Migration failed, aborting")
7497
      self._AbortMigration()
7498
      self._RevertDiskStatus()
7499
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7500
                               (instance.name, msg))
7501

    
7502
    instance.primary_node = target_node
7503
    # distribute new instance config to the other nodes
7504
    self.cfg.Update(instance, self.feedback_fn)
7505

    
7506
    result = self.rpc.call_finalize_migration(target_node,
7507
                                              instance,
7508
                                              migration_info,
7509
                                              True)
7510
    msg = result.fail_msg
7511
    if msg:
7512
      logging.error("Instance migration succeeded, but finalization failed:"
7513
                    " %s", msg)
7514
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7515
                               msg)
7516

    
7517
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7518
      self._EnsureSecondary(source_node)
7519
      self._WaitUntilSync()
7520
      self._GoStandalone()
7521
      self._GoReconnect(False)
7522
      self._WaitUntilSync()
7523

    
7524
    self.feedback_fn("* done")
7525

    
7526
  def _ExecFailover(self):
7527
    """Failover an instance.
7528

7529
    The failover is done by shutting it down on its present node and
7530
    starting it on the secondary.
7531

7532
    """
7533
    instance = self.instance
7534
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7535

    
7536
    source_node = instance.primary_node
7537
    target_node = self.target_node
7538

    
7539
    if instance.admin_up:
7540
      self.feedback_fn("* checking disk consistency between source and target")
7541
      for dev in instance.disks:
7542
        # for drbd, these are drbd over lvm
7543
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7544
          if primary_node.offline:
7545
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7546
                             " target node %s" %
7547
                             (primary_node.name, dev.iv_name, target_node))
7548
          elif not self.ignore_consistency:
7549
            raise errors.OpExecError("Disk %s is degraded on target node,"
7550
                                     " aborting failover" % dev.iv_name)
7551
    else:
7552
      self.feedback_fn("* not checking disk consistency as instance is not"
7553
                       " running")
7554

    
7555
    self.feedback_fn("* shutting down instance on source node")
7556
    logging.info("Shutting down instance %s on node %s",
7557
                 instance.name, source_node)
7558

    
7559
    result = self.rpc.call_instance_shutdown(source_node, instance,
7560
                                             self.shutdown_timeout)
7561
    msg = result.fail_msg
7562
    if msg:
7563
      if self.ignore_consistency or primary_node.offline:
7564
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7565
                           " proceeding anyway; please make sure node"
7566
                           " %s is down; error details: %s",
7567
                           instance.name, source_node, source_node, msg)
7568
      else:
7569
        raise errors.OpExecError("Could not shutdown instance %s on"
7570
                                 " node %s: %s" %
7571
                                 (instance.name, source_node, msg))
7572

    
7573
    self.feedback_fn("* deactivating the instance's disks on source node")
7574
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7575
      raise errors.OpExecError("Can't shut down the instance's disks")
7576

    
7577
    instance.primary_node = target_node
7578
    # distribute new instance config to the other nodes
7579
    self.cfg.Update(instance, self.feedback_fn)
7580

    
7581
    # Only start the instance if it's marked as up
7582
    if instance.admin_up:
7583
      self.feedback_fn("* activating the instance's disks on target node %s" %
7584
                       target_node)
7585
      logging.info("Starting instance %s on node %s",
7586
                   instance.name, target_node)
7587

    
7588
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7589
                                           ignore_secondaries=True)
7590
      if not disks_ok:
7591
        _ShutdownInstanceDisks(self.lu, instance)
7592
        raise errors.OpExecError("Can't activate the instance's disks")
7593

    
7594
      self.feedback_fn("* starting the instance on the target node %s" %
7595
                       target_node)
7596
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7597
                                            False)
7598
      msg = result.fail_msg
7599
      if msg:
7600
        _ShutdownInstanceDisks(self.lu, instance)
7601
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7602
                                 (instance.name, target_node, msg))
7603

    
7604
  def Exec(self, feedback_fn):
7605
    """Perform the migration.
7606

7607
    """
7608
    self.feedback_fn = feedback_fn
7609
    self.source_node = self.instance.primary_node
7610

    
7611
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7612
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7613
      self.target_node = self.instance.secondary_nodes[0]
7614
      # Otherwise self.target_node has been populated either
7615
      # directly, or through an iallocator.
7616

    
7617
    self.all_nodes = [self.source_node, self.target_node]
7618
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7619
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7620

    
7621
    if self.failover:
7622
      feedback_fn("Failover instance %s" % self.instance.name)
7623
      self._ExecFailover()
7624
    else:
7625
      feedback_fn("Migrating instance %s" % self.instance.name)
7626

    
7627
      if self.cleanup:
7628
        return self._ExecCleanup()
7629
      else:
7630
        return self._ExecMigration()
7631

    
7632

    
7633
def _CreateBlockDev(lu, node, instance, device, force_create,
7634
                    info, force_open):
7635
  """Create a tree of block devices on a given node.
7636

7637
  If this device type has to be created on secondaries, create it and
7638
  all its children.
7639

7640
  If not, just recurse to children keeping the same 'force' value.
7641

7642
  @param lu: the lu on whose behalf we execute
7643
  @param node: the node on which to create the device
7644
  @type instance: L{objects.Instance}
7645
  @param instance: the instance which owns the device
7646
  @type device: L{objects.Disk}
7647
  @param device: the device to create
7648
  @type force_create: boolean
7649
  @param force_create: whether to force creation of this device; this
7650
      will be change to True whenever we find a device which has
7651
      CreateOnSecondary() attribute
7652
  @param info: the extra 'metadata' we should attach to the device
7653
      (this will be represented as a LVM tag)
7654
  @type force_open: boolean
7655
  @param force_open: this parameter will be passes to the
7656
      L{backend.BlockdevCreate} function where it specifies
7657
      whether we run on primary or not, and it affects both
7658
      the child assembly and the device own Open() execution
7659

7660
  """
7661
  if device.CreateOnSecondary():
7662
    force_create = True
7663

    
7664
  if device.children:
7665
    for child in device.children:
7666
      _CreateBlockDev(lu, node, instance, child, force_create,
7667
                      info, force_open)
7668

    
7669
  if not force_create:
7670
    return
7671

    
7672
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7673

    
7674

    
7675
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7676
  """Create a single block device on a given node.
7677

7678
  This will not recurse over children of the device, so they must be
7679
  created in advance.
7680

7681
  @param lu: the lu on whose behalf we execute
7682
  @param node: the node on which to create the device
7683
  @type instance: L{objects.Instance}
7684
  @param instance: the instance which owns the device
7685
  @type device: L{objects.Disk}
7686
  @param device: the device to create
7687
  @param info: the extra 'metadata' we should attach to the device
7688
      (this will be represented as a LVM tag)
7689
  @type force_open: boolean
7690
  @param force_open: this parameter will be passes to the
7691
      L{backend.BlockdevCreate} function where it specifies
7692
      whether we run on primary or not, and it affects both
7693
      the child assembly and the device own Open() execution
7694

7695
  """
7696
  lu.cfg.SetDiskID(device, node)
7697
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7698
                                       instance.name, force_open, info)
7699
  result.Raise("Can't create block device %s on"
7700
               " node %s for instance %s" % (device, node, instance.name))
7701
  if device.physical_id is None:
7702
    device.physical_id = result.payload
7703

    
7704

    
7705
def _GenerateUniqueNames(lu, exts):
7706
  """Generate a suitable LV name.
7707

7708
  This will generate a logical volume name for the given instance.
7709

7710
  """
7711
  results = []
7712
  for val in exts:
7713
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7714
    results.append("%s%s" % (new_id, val))
7715
  return results
7716

    
7717

    
7718
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7719
                         iv_name, p_minor, s_minor):
7720
  """Generate a drbd8 device complete with its children.
7721

7722
  """
7723
  assert len(vgnames) == len(names) == 2
7724
  port = lu.cfg.AllocatePort()
7725
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7726
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7727
                          logical_id=(vgnames[0], names[0]))
7728
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7729
                          logical_id=(vgnames[1], names[1]))
7730
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7731
                          logical_id=(primary, secondary, port,
7732
                                      p_minor, s_minor,
7733
                                      shared_secret),
7734
                          children=[dev_data, dev_meta],
7735
                          iv_name=iv_name)
7736
  return drbd_dev
7737

    
7738

    
7739
def _GenerateDiskTemplate(lu, template_name,
7740
                          instance_name, primary_node,
7741
                          secondary_nodes, disk_info,
7742
                          file_storage_dir, file_driver,
7743
                          base_index, feedback_fn):
7744
  """Generate the entire disk layout for a given template type.
7745

7746
  """
7747
  #TODO: compute space requirements
7748

    
7749
  vgname = lu.cfg.GetVGName()
7750
  disk_count = len(disk_info)
7751
  disks = []
7752
  if template_name == constants.DT_DISKLESS:
7753
    pass
7754
  elif template_name == constants.DT_PLAIN:
7755
    if len(secondary_nodes) != 0:
7756
      raise errors.ProgrammerError("Wrong template configuration")
7757

    
7758
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7759
                                      for i in range(disk_count)])
7760
    for idx, disk in enumerate(disk_info):
7761
      disk_index = idx + base_index
7762
      vg = disk.get(constants.IDISK_VG, vgname)
7763
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7764
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7765
                              size=disk[constants.IDISK_SIZE],
7766
                              logical_id=(vg, names[idx]),
7767
                              iv_name="disk/%d" % disk_index,
7768
                              mode=disk[constants.IDISK_MODE])
7769
      disks.append(disk_dev)
7770
  elif template_name == constants.DT_DRBD8:
7771
    if len(secondary_nodes) != 1:
7772
      raise errors.ProgrammerError("Wrong template configuration")
7773
    remote_node = secondary_nodes[0]
7774
    minors = lu.cfg.AllocateDRBDMinor(
7775
      [primary_node, remote_node] * len(disk_info), instance_name)
7776

    
7777
    names = []
7778
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7779
                                               for i in range(disk_count)]):
7780
      names.append(lv_prefix + "_data")
7781
      names.append(lv_prefix + "_meta")
7782
    for idx, disk in enumerate(disk_info):
7783
      disk_index = idx + base_index
7784
      data_vg = disk.get(constants.IDISK_VG, vgname)
7785
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7786
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7787
                                      disk[constants.IDISK_SIZE],
7788
                                      [data_vg, meta_vg],
7789
                                      names[idx * 2:idx * 2 + 2],
7790
                                      "disk/%d" % disk_index,
7791
                                      minors[idx * 2], minors[idx * 2 + 1])
7792
      disk_dev.mode = disk[constants.IDISK_MODE]
7793
      disks.append(disk_dev)
7794
  elif template_name == constants.DT_FILE:
7795
    if len(secondary_nodes) != 0:
7796
      raise errors.ProgrammerError("Wrong template configuration")
7797

    
7798
    opcodes.RequireFileStorage()
7799

    
7800
    for idx, disk in enumerate(disk_info):
7801
      disk_index = idx + base_index
7802
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7803
                              size=disk[constants.IDISK_SIZE],
7804
                              iv_name="disk/%d" % disk_index,
7805
                              logical_id=(file_driver,
7806
                                          "%s/disk%d" % (file_storage_dir,
7807
                                                         disk_index)),
7808
                              mode=disk[constants.IDISK_MODE])
7809
      disks.append(disk_dev)
7810
  elif template_name == constants.DT_SHARED_FILE:
7811
    if len(secondary_nodes) != 0:
7812
      raise errors.ProgrammerError("Wrong template configuration")
7813

    
7814
    opcodes.RequireSharedFileStorage()
7815

    
7816
    for idx, disk in enumerate(disk_info):
7817
      disk_index = idx + base_index
7818
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7819
                              size=disk[constants.IDISK_SIZE],
7820
                              iv_name="disk/%d" % disk_index,
7821
                              logical_id=(file_driver,
7822
                                          "%s/disk%d" % (file_storage_dir,
7823
                                                         disk_index)),
7824
                              mode=disk[constants.IDISK_MODE])
7825
      disks.append(disk_dev)
7826
  elif template_name == constants.DT_BLOCK:
7827
    if len(secondary_nodes) != 0:
7828
      raise errors.ProgrammerError("Wrong template configuration")
7829

    
7830
    for idx, disk in enumerate(disk_info):
7831
      disk_index = idx + base_index
7832
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7833
                              size=disk[constants.IDISK_SIZE],
7834
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7835
                                          disk[constants.IDISK_ADOPT]),
7836
                              iv_name="disk/%d" % disk_index,
7837
                              mode=disk[constants.IDISK_MODE])
7838
      disks.append(disk_dev)
7839

    
7840
  else:
7841
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7842
  return disks
7843

    
7844

    
7845
def _GetInstanceInfoText(instance):
7846
  """Compute that text that should be added to the disk's metadata.
7847

7848
  """
7849
  return "originstname+%s" % instance.name
7850

    
7851

    
7852
def _CalcEta(time_taken, written, total_size):
7853
  """Calculates the ETA based on size written and total size.
7854

7855
  @param time_taken: The time taken so far
7856
  @param written: amount written so far
7857
  @param total_size: The total size of data to be written
7858
  @return: The remaining time in seconds
7859

7860
  """
7861
  avg_time = time_taken / float(written)
7862
  return (total_size - written) * avg_time
7863

    
7864

    
7865
def _WipeDisks(lu, instance):
7866
  """Wipes instance disks.
7867

7868
  @type lu: L{LogicalUnit}
7869
  @param lu: the logical unit on whose behalf we execute
7870
  @type instance: L{objects.Instance}
7871
  @param instance: the instance whose disks we should create
7872
  @return: the success of the wipe
7873

7874
  """
7875
  node = instance.primary_node
7876

    
7877
  for device in instance.disks:
7878
    lu.cfg.SetDiskID(device, node)
7879

    
7880
  logging.info("Pause sync of instance %s disks", instance.name)
7881
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7882

    
7883
  for idx, success in enumerate(result.payload):
7884
    if not success:
7885
      logging.warn("pause-sync of instance %s for disks %d failed",
7886
                   instance.name, idx)
7887

    
7888
  try:
7889
    for idx, device in enumerate(instance.disks):
7890
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7891
      # MAX_WIPE_CHUNK at max
7892
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7893
                            constants.MIN_WIPE_CHUNK_PERCENT)
7894
      # we _must_ make this an int, otherwise rounding errors will
7895
      # occur
7896
      wipe_chunk_size = int(wipe_chunk_size)
7897

    
7898
      lu.LogInfo("* Wiping disk %d", idx)
7899
      logging.info("Wiping disk %d for instance %s, node %s using"
7900
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7901

    
7902
      offset = 0
7903
      size = device.size
7904
      last_output = 0
7905
      start_time = time.time()
7906

    
7907
      while offset < size:
7908
        wipe_size = min(wipe_chunk_size, size - offset)
7909
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7910
                      idx, offset, wipe_size)
7911
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7912
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7913
                     (idx, offset, wipe_size))
7914
        now = time.time()
7915
        offset += wipe_size
7916
        if now - last_output >= 60:
7917
          eta = _CalcEta(now - start_time, offset, size)
7918
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7919
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7920
          last_output = now
7921
  finally:
7922
    logging.info("Resume sync of instance %s disks", instance.name)
7923

    
7924
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7925

    
7926
    for idx, success in enumerate(result.payload):
7927
      if not success:
7928
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7929
                      " look at the status and troubleshoot the issue", idx)
7930
        logging.warn("resume-sync of instance %s for disks %d failed",
7931
                     instance.name, idx)
7932

    
7933

    
7934
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7935
  """Create all disks for an instance.
7936

7937
  This abstracts away some work from AddInstance.
7938

7939
  @type lu: L{LogicalUnit}
7940
  @param lu: the logical unit on whose behalf we execute
7941
  @type instance: L{objects.Instance}
7942
  @param instance: the instance whose disks we should create
7943
  @type to_skip: list
7944
  @param to_skip: list of indices to skip
7945
  @type target_node: string
7946
  @param target_node: if passed, overrides the target node for creation
7947
  @rtype: boolean
7948
  @return: the success of the creation
7949

7950
  """
7951
  info = _GetInstanceInfoText(instance)
7952
  if target_node is None:
7953
    pnode = instance.primary_node
7954
    all_nodes = instance.all_nodes
7955
  else:
7956
    pnode = target_node
7957
    all_nodes = [pnode]
7958

    
7959
  if instance.disk_template in constants.DTS_FILEBASED:
7960
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7961
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7962

    
7963
    result.Raise("Failed to create directory '%s' on"
7964
                 " node %s" % (file_storage_dir, pnode))
7965

    
7966
  # Note: this needs to be kept in sync with adding of disks in
7967
  # LUInstanceSetParams
7968
  for idx, device in enumerate(instance.disks):
7969
    if to_skip and idx in to_skip:
7970
      continue
7971
    logging.info("Creating volume %s for instance %s",
7972
                 device.iv_name, instance.name)
7973
    #HARDCODE
7974
    for node in all_nodes:
7975
      f_create = node == pnode
7976
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7977

    
7978

    
7979
def _RemoveDisks(lu, instance, target_node=None):
7980
  """Remove all disks for an instance.
7981

7982
  This abstracts away some work from `AddInstance()` and
7983
  `RemoveInstance()`. Note that in case some of the devices couldn't
7984
  be removed, the removal will continue with the other ones (compare
7985
  with `_CreateDisks()`).
7986

7987
  @type lu: L{LogicalUnit}
7988
  @param lu: the logical unit on whose behalf we execute
7989
  @type instance: L{objects.Instance}
7990
  @param instance: the instance whose disks we should remove
7991
  @type target_node: string
7992
  @param target_node: used to override the node on which to remove the disks
7993
  @rtype: boolean
7994
  @return: the success of the removal
7995

7996
  """
7997
  logging.info("Removing block devices for instance %s", instance.name)
7998

    
7999
  all_result = True
8000
  for device in instance.disks:
8001
    if target_node:
8002
      edata = [(target_node, device)]
8003
    else:
8004
      edata = device.ComputeNodeTree(instance.primary_node)
8005
    for node, disk in edata:
8006
      lu.cfg.SetDiskID(disk, node)
8007
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8008
      if msg:
8009
        lu.LogWarning("Could not remove block device %s on node %s,"
8010
                      " continuing anyway: %s", device.iv_name, node, msg)
8011
        all_result = False
8012

    
8013
    # if this is a DRBD disk, return its port to the pool
8014
    if device.dev_type in constants.LDS_DRBD:
8015
      tcp_port = device.logical_id[2]
8016
      lu.cfg.AddTcpUdpPort(tcp_port)
8017

    
8018
  if instance.disk_template == constants.DT_FILE:
8019
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8020
    if target_node:
8021
      tgt = target_node
8022
    else:
8023
      tgt = instance.primary_node
8024
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8025
    if result.fail_msg:
8026
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8027
                    file_storage_dir, instance.primary_node, result.fail_msg)
8028
      all_result = False
8029

    
8030
  return all_result
8031

    
8032

    
8033
def _ComputeDiskSizePerVG(disk_template, disks):
8034
  """Compute disk size requirements in the volume group
8035

8036
  """
8037
  def _compute(disks, payload):
8038
    """Universal algorithm.
8039

8040
    """
8041
    vgs = {}
8042
    for disk in disks:
8043
      vgs[disk[constants.IDISK_VG]] = \
8044
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8045

    
8046
    return vgs
8047

    
8048
  # Required free disk space as a function of disk and swap space
8049
  req_size_dict = {
8050
    constants.DT_DISKLESS: {},
8051
    constants.DT_PLAIN: _compute(disks, 0),
8052
    # 128 MB are added for drbd metadata for each disk
8053
    constants.DT_DRBD8: _compute(disks, 128),
8054
    constants.DT_FILE: {},
8055
    constants.DT_SHARED_FILE: {},
8056
  }
8057

    
8058
  if disk_template not in req_size_dict:
8059
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8060
                                 " is unknown" % disk_template)
8061

    
8062
  return req_size_dict[disk_template]
8063

    
8064

    
8065
def _ComputeDiskSize(disk_template, disks):
8066
  """Compute disk size requirements in the volume group
8067

8068
  """
8069
  # Required free disk space as a function of disk and swap space
8070
  req_size_dict = {
8071
    constants.DT_DISKLESS: None,
8072
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8073
    # 128 MB are added for drbd metadata for each disk
8074
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8075
    constants.DT_FILE: None,
8076
    constants.DT_SHARED_FILE: 0,
8077
    constants.DT_BLOCK: 0,
8078
  }
8079

    
8080
  if disk_template not in req_size_dict:
8081
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8082
                                 " is unknown" % disk_template)
8083

    
8084
  return req_size_dict[disk_template]
8085

    
8086

    
8087
def _FilterVmNodes(lu, nodenames):
8088
  """Filters out non-vm_capable nodes from a list.
8089

8090
  @type lu: L{LogicalUnit}
8091
  @param lu: the logical unit for which we check
8092
  @type nodenames: list
8093
  @param nodenames: the list of nodes on which we should check
8094
  @rtype: list
8095
  @return: the list of vm-capable nodes
8096

8097
  """
8098
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8099
  return [name for name in nodenames if name not in vm_nodes]
8100

    
8101

    
8102
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8103
  """Hypervisor parameter validation.
8104

8105
  This function abstract the hypervisor parameter validation to be
8106
  used in both instance create and instance modify.
8107

8108
  @type lu: L{LogicalUnit}
8109
  @param lu: the logical unit for which we check
8110
  @type nodenames: list
8111
  @param nodenames: the list of nodes on which we should check
8112
  @type hvname: string
8113
  @param hvname: the name of the hypervisor we should use
8114
  @type hvparams: dict
8115
  @param hvparams: the parameters which we need to check
8116
  @raise errors.OpPrereqError: if the parameters are not valid
8117

8118
  """
8119
  nodenames = _FilterVmNodes(lu, nodenames)
8120
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8121
                                                  hvname,
8122
                                                  hvparams)
8123
  for node in nodenames:
8124
    info = hvinfo[node]
8125
    if info.offline:
8126
      continue
8127
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8128

    
8129

    
8130
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8131
  """OS parameters validation.
8132

8133
  @type lu: L{LogicalUnit}
8134
  @param lu: the logical unit for which we check
8135
  @type required: boolean
8136
  @param required: whether the validation should fail if the OS is not
8137
      found
8138
  @type nodenames: list
8139
  @param nodenames: the list of nodes on which we should check
8140
  @type osname: string
8141
  @param osname: the name of the hypervisor we should use
8142
  @type osparams: dict
8143
  @param osparams: the parameters which we need to check
8144
  @raise errors.OpPrereqError: if the parameters are not valid
8145

8146
  """
8147
  nodenames = _FilterVmNodes(lu, nodenames)
8148
  result = lu.rpc.call_os_validate(required, nodenames, osname,
8149
                                   [constants.OS_VALIDATE_PARAMETERS],
8150
                                   osparams)
8151
  for node, nres in result.items():
8152
    # we don't check for offline cases since this should be run only
8153
    # against the master node and/or an instance's nodes
8154
    nres.Raise("OS Parameters validation failed on node %s" % node)
8155
    if not nres.payload:
8156
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8157
                 osname, node)
8158

    
8159

    
8160
class LUInstanceCreate(LogicalUnit):
8161
  """Create an instance.
8162

8163
  """
8164
  HPATH = "instance-add"
8165
  HTYPE = constants.HTYPE_INSTANCE
8166
  REQ_BGL = False
8167

    
8168
  def CheckArguments(self):
8169
    """Check arguments.
8170

8171
    """
8172
    # do not require name_check to ease forward/backward compatibility
8173
    # for tools
8174
    if self.op.no_install and self.op.start:
8175
      self.LogInfo("No-installation mode selected, disabling startup")
8176
      self.op.start = False
8177
    # validate/normalize the instance name
8178
    self.op.instance_name = \
8179
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8180

    
8181
    if self.op.ip_check and not self.op.name_check:
8182
      # TODO: make the ip check more flexible and not depend on the name check
8183
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8184
                                 " check", errors.ECODE_INVAL)
8185

    
8186
    # check nics' parameter names
8187
    for nic in self.op.nics:
8188
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8189

    
8190
    # check disks. parameter names and consistent adopt/no-adopt strategy
8191
    has_adopt = has_no_adopt = False
8192
    for disk in self.op.disks:
8193
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8194
      if constants.IDISK_ADOPT in disk:
8195
        has_adopt = True
8196
      else:
8197
        has_no_adopt = True
8198
    if has_adopt and has_no_adopt:
8199
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8200
                                 errors.ECODE_INVAL)
8201
    if has_adopt:
8202
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8203
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8204
                                   " '%s' disk template" %
8205
                                   self.op.disk_template,
8206
                                   errors.ECODE_INVAL)
8207
      if self.op.iallocator is not None:
8208
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8209
                                   " iallocator script", errors.ECODE_INVAL)
8210
      if self.op.mode == constants.INSTANCE_IMPORT:
8211
        raise errors.OpPrereqError("Disk adoption not allowed for"
8212
                                   " instance import", errors.ECODE_INVAL)
8213
    else:
8214
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8215
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8216
                                   " but no 'adopt' parameter given" %
8217
                                   self.op.disk_template,
8218
                                   errors.ECODE_INVAL)
8219

    
8220
    self.adopt_disks = has_adopt
8221

    
8222
    # instance name verification
8223
    if self.op.name_check:
8224
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8225
      self.op.instance_name = self.hostname1.name
8226
      # used in CheckPrereq for ip ping check
8227
      self.check_ip = self.hostname1.ip
8228
    else:
8229
      self.check_ip = None
8230

    
8231
    # file storage checks
8232
    if (self.op.file_driver and
8233
        not self.op.file_driver in constants.FILE_DRIVER):
8234
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8235
                                 self.op.file_driver, errors.ECODE_INVAL)
8236

    
8237
    if self.op.disk_template == constants.DT_FILE:
8238
      opcodes.RequireFileStorage()
8239
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8240
      opcodes.RequireSharedFileStorage()
8241

    
8242
    ### Node/iallocator related checks
8243
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8244

    
8245
    if self.op.pnode is not None:
8246
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8247
        if self.op.snode is None:
8248
          raise errors.OpPrereqError("The networked disk templates need"
8249
                                     " a mirror node", errors.ECODE_INVAL)
8250
      elif self.op.snode:
8251
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8252
                        " template")
8253
        self.op.snode = None
8254

    
8255
    self._cds = _GetClusterDomainSecret()
8256

    
8257
    if self.op.mode == constants.INSTANCE_IMPORT:
8258
      # On import force_variant must be True, because if we forced it at
8259
      # initial install, our only chance when importing it back is that it
8260
      # works again!
8261
      self.op.force_variant = True
8262

    
8263
      if self.op.no_install:
8264
        self.LogInfo("No-installation mode has no effect during import")
8265

    
8266
    elif self.op.mode == constants.INSTANCE_CREATE:
8267
      if self.op.os_type is None:
8268
        raise errors.OpPrereqError("No guest OS specified",
8269
                                   errors.ECODE_INVAL)
8270
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8271
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8272
                                   " installation" % self.op.os_type,
8273
                                   errors.ECODE_STATE)
8274
      if self.op.disk_template is None:
8275
        raise errors.OpPrereqError("No disk template specified",
8276
                                   errors.ECODE_INVAL)
8277

    
8278
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8279
      # Check handshake to ensure both clusters have the same domain secret
8280
      src_handshake = self.op.source_handshake
8281
      if not src_handshake:
8282
        raise errors.OpPrereqError("Missing source handshake",
8283
                                   errors.ECODE_INVAL)
8284

    
8285
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8286
                                                           src_handshake)
8287
      if errmsg:
8288
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8289
                                   errors.ECODE_INVAL)
8290

    
8291
      # Load and check source CA
8292
      self.source_x509_ca_pem = self.op.source_x509_ca
8293
      if not self.source_x509_ca_pem:
8294
        raise errors.OpPrereqError("Missing source X509 CA",
8295
                                   errors.ECODE_INVAL)
8296

    
8297
      try:
8298
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8299
                                                    self._cds)
8300
      except OpenSSL.crypto.Error, err:
8301
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8302
                                   (err, ), errors.ECODE_INVAL)
8303

    
8304
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8305
      if errcode is not None:
8306
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8307
                                   errors.ECODE_INVAL)
8308

    
8309
      self.source_x509_ca = cert
8310

    
8311
      src_instance_name = self.op.source_instance_name
8312
      if not src_instance_name:
8313
        raise errors.OpPrereqError("Missing source instance name",
8314
                                   errors.ECODE_INVAL)
8315

    
8316
      self.source_instance_name = \
8317
          netutils.GetHostname(name=src_instance_name).name
8318

    
8319
    else:
8320
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8321
                                 self.op.mode, errors.ECODE_INVAL)
8322

    
8323
  def ExpandNames(self):
8324
    """ExpandNames for CreateInstance.
8325

8326
    Figure out the right locks for instance creation.
8327

8328
    """
8329
    self.needed_locks = {}
8330

    
8331
    instance_name = self.op.instance_name
8332
    # this is just a preventive check, but someone might still add this
8333
    # instance in the meantime, and creation will fail at lock-add time
8334
    if instance_name in self.cfg.GetInstanceList():
8335
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8336
                                 instance_name, errors.ECODE_EXISTS)
8337

    
8338
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8339

    
8340
    if self.op.iallocator:
8341
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8342
    else:
8343
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8344
      nodelist = [self.op.pnode]
8345
      if self.op.snode is not None:
8346
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8347
        nodelist.append(self.op.snode)
8348
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8349

    
8350
    # in case of import lock the source node too
8351
    if self.op.mode == constants.INSTANCE_IMPORT:
8352
      src_node = self.op.src_node
8353
      src_path = self.op.src_path
8354

    
8355
      if src_path is None:
8356
        self.op.src_path = src_path = self.op.instance_name
8357

    
8358
      if src_node is None:
8359
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8360
        self.op.src_node = None
8361
        if os.path.isabs(src_path):
8362
          raise errors.OpPrereqError("Importing an instance from a path"
8363
                                     " requires a source node option",
8364
                                     errors.ECODE_INVAL)
8365
      else:
8366
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8367
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8368
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8369
        if not os.path.isabs(src_path):
8370
          self.op.src_path = src_path = \
8371
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8372

    
8373
  def _RunAllocator(self):
8374
    """Run the allocator based on input opcode.
8375

8376
    """
8377
    nics = [n.ToDict() for n in self.nics]
8378
    ial = IAllocator(self.cfg, self.rpc,
8379
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8380
                     name=self.op.instance_name,
8381
                     disk_template=self.op.disk_template,
8382
                     tags=self.op.tags,
8383
                     os=self.op.os_type,
8384
                     vcpus=self.be_full[constants.BE_VCPUS],
8385
                     memory=self.be_full[constants.BE_MEMORY],
8386
                     disks=self.disks,
8387
                     nics=nics,
8388
                     hypervisor=self.op.hypervisor,
8389
                     )
8390

    
8391
    ial.Run(self.op.iallocator)
8392

    
8393
    if not ial.success:
8394
      raise errors.OpPrereqError("Can't compute nodes using"
8395
                                 " iallocator '%s': %s" %
8396
                                 (self.op.iallocator, ial.info),
8397
                                 errors.ECODE_NORES)
8398
    if len(ial.result) != ial.required_nodes:
8399
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8400
                                 " of nodes (%s), required %s" %
8401
                                 (self.op.iallocator, len(ial.result),
8402
                                  ial.required_nodes), errors.ECODE_FAULT)
8403
    self.op.pnode = ial.result[0]
8404
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8405
                 self.op.instance_name, self.op.iallocator,
8406
                 utils.CommaJoin(ial.result))
8407
    if ial.required_nodes == 2:
8408
      self.op.snode = ial.result[1]
8409

    
8410
  def BuildHooksEnv(self):
8411
    """Build hooks env.
8412

8413
    This runs on master, primary and secondary nodes of the instance.
8414

8415
    """
8416
    env = {
8417
      "ADD_MODE": self.op.mode,
8418
      }
8419
    if self.op.mode == constants.INSTANCE_IMPORT:
8420
      env["SRC_NODE"] = self.op.src_node
8421
      env["SRC_PATH"] = self.op.src_path
8422
      env["SRC_IMAGES"] = self.src_images
8423

    
8424
    env.update(_BuildInstanceHookEnv(
8425
      name=self.op.instance_name,
8426
      primary_node=self.op.pnode,
8427
      secondary_nodes=self.secondaries,
8428
      status=self.op.start,
8429
      os_type=self.op.os_type,
8430
      memory=self.be_full[constants.BE_MEMORY],
8431
      vcpus=self.be_full[constants.BE_VCPUS],
8432
      nics=_NICListToTuple(self, self.nics),
8433
      disk_template=self.op.disk_template,
8434
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8435
             for d in self.disks],
8436
      bep=self.be_full,
8437
      hvp=self.hv_full,
8438
      hypervisor_name=self.op.hypervisor,
8439
      tags=self.op.tags,
8440
    ))
8441

    
8442
    return env
8443

    
8444
  def BuildHooksNodes(self):
8445
    """Build hooks nodes.
8446

8447
    """
8448
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8449
    return nl, nl
8450

    
8451
  def _ReadExportInfo(self):
8452
    """Reads the export information from disk.
8453

8454
    It will override the opcode source node and path with the actual
8455
    information, if these two were not specified before.
8456

8457
    @return: the export information
8458

8459
    """
8460
    assert self.op.mode == constants.INSTANCE_IMPORT
8461

    
8462
    src_node = self.op.src_node
8463
    src_path = self.op.src_path
8464

    
8465
    if src_node is None:
8466
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8467
      exp_list = self.rpc.call_export_list(locked_nodes)
8468
      found = False
8469
      for node in exp_list:
8470
        if exp_list[node].fail_msg:
8471
          continue
8472
        if src_path in exp_list[node].payload:
8473
          found = True
8474
          self.op.src_node = src_node = node
8475
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8476
                                                       src_path)
8477
          break
8478
      if not found:
8479
        raise errors.OpPrereqError("No export found for relative path %s" %
8480
                                    src_path, errors.ECODE_INVAL)
8481

    
8482
    _CheckNodeOnline(self, src_node)
8483
    result = self.rpc.call_export_info(src_node, src_path)
8484
    result.Raise("No export or invalid export found in dir %s" % src_path)
8485

    
8486
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8487
    if not export_info.has_section(constants.INISECT_EXP):
8488
      raise errors.ProgrammerError("Corrupted export config",
8489
                                   errors.ECODE_ENVIRON)
8490

    
8491
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8492
    if (int(ei_version) != constants.EXPORT_VERSION):
8493
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8494
                                 (ei_version, constants.EXPORT_VERSION),
8495
                                 errors.ECODE_ENVIRON)
8496
    return export_info
8497

    
8498
  def _ReadExportParams(self, einfo):
8499
    """Use export parameters as defaults.
8500

8501
    In case the opcode doesn't specify (as in override) some instance
8502
    parameters, then try to use them from the export information, if
8503
    that declares them.
8504

8505
    """
8506
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8507

    
8508
    if self.op.disk_template is None:
8509
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8510
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8511
                                          "disk_template")
8512
      else:
8513
        raise errors.OpPrereqError("No disk template specified and the export"
8514
                                   " is missing the disk_template information",
8515
                                   errors.ECODE_INVAL)
8516

    
8517
    if not self.op.disks:
8518
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8519
        disks = []
8520
        # TODO: import the disk iv_name too
8521
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8522
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8523
          disks.append({constants.IDISK_SIZE: disk_sz})
8524
        self.op.disks = disks
8525
      else:
8526
        raise errors.OpPrereqError("No disk info specified and the export"
8527
                                   " is missing the disk information",
8528
                                   errors.ECODE_INVAL)
8529

    
8530
    if (not self.op.nics and
8531
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8532
      nics = []
8533
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8534
        ndict = {}
8535
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8536
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8537
          ndict[name] = v
8538
        nics.append(ndict)
8539
      self.op.nics = nics
8540

    
8541
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8542
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8543

    
8544
    if (self.op.hypervisor is None and
8545
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8546
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8547

    
8548
    if einfo.has_section(constants.INISECT_HYP):
8549
      # use the export parameters but do not override the ones
8550
      # specified by the user
8551
      for name, value in einfo.items(constants.INISECT_HYP):
8552
        if name not in self.op.hvparams:
8553
          self.op.hvparams[name] = value
8554

    
8555
    if einfo.has_section(constants.INISECT_BEP):
8556
      # use the parameters, without overriding
8557
      for name, value in einfo.items(constants.INISECT_BEP):
8558
        if name not in self.op.beparams:
8559
          self.op.beparams[name] = value
8560
    else:
8561
      # try to read the parameters old style, from the main section
8562
      for name in constants.BES_PARAMETERS:
8563
        if (name not in self.op.beparams and
8564
            einfo.has_option(constants.INISECT_INS, name)):
8565
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8566

    
8567
    if einfo.has_section(constants.INISECT_OSP):
8568
      # use the parameters, without overriding
8569
      for name, value in einfo.items(constants.INISECT_OSP):
8570
        if name not in self.op.osparams:
8571
          self.op.osparams[name] = value
8572

    
8573
  def _RevertToDefaults(self, cluster):
8574
    """Revert the instance parameters to the default values.
8575

8576
    """
8577
    # hvparams
8578
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8579
    for name in self.op.hvparams.keys():
8580
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8581
        del self.op.hvparams[name]
8582
    # beparams
8583
    be_defs = cluster.SimpleFillBE({})
8584
    for name in self.op.beparams.keys():
8585
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8586
        del self.op.beparams[name]
8587
    # nic params
8588
    nic_defs = cluster.SimpleFillNIC({})
8589
    for nic in self.op.nics:
8590
      for name in constants.NICS_PARAMETERS:
8591
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8592
          del nic[name]
8593
    # osparams
8594
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8595
    for name in self.op.osparams.keys():
8596
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8597
        del self.op.osparams[name]
8598

    
8599
  def _CalculateFileStorageDir(self):
8600
    """Calculate final instance file storage dir.
8601

8602
    """
8603
    # file storage dir calculation/check
8604
    self.instance_file_storage_dir = None
8605
    if self.op.disk_template in constants.DTS_FILEBASED:
8606
      # build the full file storage dir path
8607
      joinargs = []
8608

    
8609
      if self.op.disk_template == constants.DT_SHARED_FILE:
8610
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8611
      else:
8612
        get_fsd_fn = self.cfg.GetFileStorageDir
8613

    
8614
      cfg_storagedir = get_fsd_fn()
8615
      if not cfg_storagedir:
8616
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8617
      joinargs.append(cfg_storagedir)
8618

    
8619
      if self.op.file_storage_dir is not None:
8620
        joinargs.append(self.op.file_storage_dir)
8621

    
8622
      joinargs.append(self.op.instance_name)
8623

    
8624
      # pylint: disable=W0142
8625
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8626

    
8627
  def CheckPrereq(self):
8628
    """Check prerequisites.
8629

8630
    """
8631
    self._CalculateFileStorageDir()
8632

    
8633
    if self.op.mode == constants.INSTANCE_IMPORT:
8634
      export_info = self._ReadExportInfo()
8635
      self._ReadExportParams(export_info)
8636

    
8637
    if (not self.cfg.GetVGName() and
8638
        self.op.disk_template not in constants.DTS_NOT_LVM):
8639
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8640
                                 " instances", errors.ECODE_STATE)
8641

    
8642
    if self.op.hypervisor is None:
8643
      self.op.hypervisor = self.cfg.GetHypervisorType()
8644

    
8645
    cluster = self.cfg.GetClusterInfo()
8646
    enabled_hvs = cluster.enabled_hypervisors
8647
    if self.op.hypervisor not in enabled_hvs:
8648
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8649
                                 " cluster (%s)" % (self.op.hypervisor,
8650
                                  ",".join(enabled_hvs)),
8651
                                 errors.ECODE_STATE)
8652

    
8653
    # Check tag validity
8654
    for tag in self.op.tags:
8655
      objects.TaggableObject.ValidateTag(tag)
8656

    
8657
    # check hypervisor parameter syntax (locally)
8658
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8659
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8660
                                      self.op.hvparams)
8661
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8662
    hv_type.CheckParameterSyntax(filled_hvp)
8663
    self.hv_full = filled_hvp
8664
    # check that we don't specify global parameters on an instance
8665
    _CheckGlobalHvParams(self.op.hvparams)
8666

    
8667
    # fill and remember the beparams dict
8668
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8669
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8670

    
8671
    # build os parameters
8672
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8673

    
8674
    # now that hvp/bep are in final format, let's reset to defaults,
8675
    # if told to do so
8676
    if self.op.identify_defaults:
8677
      self._RevertToDefaults(cluster)
8678

    
8679
    # NIC buildup
8680
    self.nics = []
8681
    for idx, nic in enumerate(self.op.nics):
8682
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8683
      nic_mode = nic_mode_req
8684
      if nic_mode is None:
8685
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8686

    
8687
      # in routed mode, for the first nic, the default ip is 'auto'
8688
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8689
        default_ip_mode = constants.VALUE_AUTO
8690
      else:
8691
        default_ip_mode = constants.VALUE_NONE
8692

    
8693
      # ip validity checks
8694
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8695
      if ip is None or ip.lower() == constants.VALUE_NONE:
8696
        nic_ip = None
8697
      elif ip.lower() == constants.VALUE_AUTO:
8698
        if not self.op.name_check:
8699
          raise errors.OpPrereqError("IP address set to auto but name checks"
8700
                                     " have been skipped",
8701
                                     errors.ECODE_INVAL)
8702
        nic_ip = self.hostname1.ip
8703
      else:
8704
        if not netutils.IPAddress.IsValid(ip):
8705
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8706
                                     errors.ECODE_INVAL)
8707
        nic_ip = ip
8708

    
8709
      # TODO: check the ip address for uniqueness
8710
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8711
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8712
                                   errors.ECODE_INVAL)
8713

    
8714
      # MAC address verification
8715
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8716
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8717
        mac = utils.NormalizeAndValidateMac(mac)
8718

    
8719
        try:
8720
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8721
        except errors.ReservationError:
8722
          raise errors.OpPrereqError("MAC address %s already in use"
8723
                                     " in cluster" % mac,
8724
                                     errors.ECODE_NOTUNIQUE)
8725

    
8726
      #  Build nic parameters
8727
      link = nic.get(constants.INIC_LINK, None)
8728
      nicparams = {}
8729
      if nic_mode_req:
8730
        nicparams[constants.NIC_MODE] = nic_mode_req
8731
      if link:
8732
        nicparams[constants.NIC_LINK] = link
8733

    
8734
      check_params = cluster.SimpleFillNIC(nicparams)
8735
      objects.NIC.CheckParameterSyntax(check_params)
8736
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8737

    
8738
    # disk checks/pre-build
8739
    default_vg = self.cfg.GetVGName()
8740
    self.disks = []
8741
    for disk in self.op.disks:
8742
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8743
      if mode not in constants.DISK_ACCESS_SET:
8744
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8745
                                   mode, errors.ECODE_INVAL)
8746
      size = disk.get(constants.IDISK_SIZE, None)
8747
      if size is None:
8748
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8749
      try:
8750
        size = int(size)
8751
      except (TypeError, ValueError):
8752
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8753
                                   errors.ECODE_INVAL)
8754

    
8755
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8756
      new_disk = {
8757
        constants.IDISK_SIZE: size,
8758
        constants.IDISK_MODE: mode,
8759
        constants.IDISK_VG: data_vg,
8760
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8761
        }
8762
      if constants.IDISK_ADOPT in disk:
8763
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8764
      self.disks.append(new_disk)
8765

    
8766
    if self.op.mode == constants.INSTANCE_IMPORT:
8767

    
8768
      # Check that the new instance doesn't have less disks than the export
8769
      instance_disks = len(self.disks)
8770
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8771
      if instance_disks < export_disks:
8772
        raise errors.OpPrereqError("Not enough disks to import."
8773
                                   " (instance: %d, export: %d)" %
8774
                                   (instance_disks, export_disks),
8775
                                   errors.ECODE_INVAL)
8776

    
8777
      disk_images = []
8778
      for idx in range(export_disks):
8779
        option = "disk%d_dump" % idx
8780
        if export_info.has_option(constants.INISECT_INS, option):
8781
          # FIXME: are the old os-es, disk sizes, etc. useful?
8782
          export_name = export_info.get(constants.INISECT_INS, option)
8783
          image = utils.PathJoin(self.op.src_path, export_name)
8784
          disk_images.append(image)
8785
        else:
8786
          disk_images.append(False)
8787

    
8788
      self.src_images = disk_images
8789

    
8790
      old_name = export_info.get(constants.INISECT_INS, "name")
8791
      try:
8792
        exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8793
      except (TypeError, ValueError), err:
8794
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8795
                                   " an integer: %s" % str(err),
8796
                                   errors.ECODE_STATE)
8797
      if self.op.instance_name == old_name:
8798
        for idx, nic in enumerate(self.nics):
8799
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8800
            nic_mac_ini = "nic%d_mac" % idx
8801
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8802

    
8803
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8804

    
8805
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8806
    if self.op.ip_check:
8807
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8808
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8809
                                   (self.check_ip, self.op.instance_name),
8810
                                   errors.ECODE_NOTUNIQUE)
8811

    
8812
    #### mac address generation
8813
    # By generating here the mac address both the allocator and the hooks get
8814
    # the real final mac address rather than the 'auto' or 'generate' value.
8815
    # There is a race condition between the generation and the instance object
8816
    # creation, which means that we know the mac is valid now, but we're not
8817
    # sure it will be when we actually add the instance. If things go bad
8818
    # adding the instance will abort because of a duplicate mac, and the
8819
    # creation job will fail.
8820
    for nic in self.nics:
8821
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8822
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8823

    
8824
    #### allocator run
8825

    
8826
    if self.op.iallocator is not None:
8827
      self._RunAllocator()
8828

    
8829
    # Release all unneeded node locks
8830
    _ReleaseLocks(self, locking.LEVEL_NODE,
8831
                  keep=filter(None, [self.op.pnode, self.op.snode,
8832
                                     self.op.src_node]))
8833

    
8834
    #### node related checks
8835

    
8836
    # check primary node
8837
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8838
    assert self.pnode is not None, \
8839
      "Cannot retrieve locked node %s" % self.op.pnode
8840
    if pnode.offline:
8841
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8842
                                 pnode.name, errors.ECODE_STATE)
8843
    if pnode.drained:
8844
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8845
                                 pnode.name, errors.ECODE_STATE)
8846
    if not pnode.vm_capable:
8847
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8848
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8849

    
8850
    self.secondaries = []
8851

    
8852
    # mirror node verification
8853
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8854
      if self.op.snode == pnode.name:
8855
        raise errors.OpPrereqError("The secondary node cannot be the"
8856
                                   " primary node", errors.ECODE_INVAL)
8857
      _CheckNodeOnline(self, self.op.snode)
8858
      _CheckNodeNotDrained(self, self.op.snode)
8859
      _CheckNodeVmCapable(self, self.op.snode)
8860
      self.secondaries.append(self.op.snode)
8861

    
8862
    nodenames = [pnode.name] + self.secondaries
8863

    
8864
    if not self.adopt_disks:
8865
      # Check lv size requirements, if not adopting
8866
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8867
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8868

    
8869
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8870
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8871
                                disk[constants.IDISK_ADOPT])
8872
                     for disk in self.disks])
8873
      if len(all_lvs) != len(self.disks):
8874
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8875
                                   errors.ECODE_INVAL)
8876
      for lv_name in all_lvs:
8877
        try:
8878
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8879
          # to ReserveLV uses the same syntax
8880
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8881
        except errors.ReservationError:
8882
          raise errors.OpPrereqError("LV named %s used by another instance" %
8883
                                     lv_name, errors.ECODE_NOTUNIQUE)
8884

    
8885
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8886
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8887

    
8888
      node_lvs = self.rpc.call_lv_list([pnode.name],
8889
                                       vg_names.payload.keys())[pnode.name]
8890
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8891
      node_lvs = node_lvs.payload
8892

    
8893
      delta = all_lvs.difference(node_lvs.keys())
8894
      if delta:
8895
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8896
                                   utils.CommaJoin(delta),
8897
                                   errors.ECODE_INVAL)
8898
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8899
      if online_lvs:
8900
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8901
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8902
                                   errors.ECODE_STATE)
8903
      # update the size of disk based on what is found
8904
      for dsk in self.disks:
8905
        dsk[constants.IDISK_SIZE] = \
8906
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8907
                                        dsk[constants.IDISK_ADOPT])][0]))
8908

    
8909
    elif self.op.disk_template == constants.DT_BLOCK:
8910
      # Normalize and de-duplicate device paths
8911
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8912
                       for disk in self.disks])
8913
      if len(all_disks) != len(self.disks):
8914
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8915
                                   errors.ECODE_INVAL)
8916
      baddisks = [d for d in all_disks
8917
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8918
      if baddisks:
8919
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8920
                                   " cannot be adopted" %
8921
                                   (", ".join(baddisks),
8922
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8923
                                   errors.ECODE_INVAL)
8924

    
8925
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8926
                                            list(all_disks))[pnode.name]
8927
      node_disks.Raise("Cannot get block device information from node %s" %
8928
                       pnode.name)
8929
      node_disks = node_disks.payload
8930
      delta = all_disks.difference(node_disks.keys())
8931
      if delta:
8932
        raise errors.OpPrereqError("Missing block device(s): %s" %
8933
                                   utils.CommaJoin(delta),
8934
                                   errors.ECODE_INVAL)
8935
      for dsk in self.disks:
8936
        dsk[constants.IDISK_SIZE] = \
8937
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8938

    
8939
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8940

    
8941
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8942
    # check OS parameters (remotely)
8943
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8944

    
8945
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8946

    
8947
    # memory check on primary node
8948
    if self.op.start:
8949
      _CheckNodeFreeMemory(self, self.pnode.name,
8950
                           "creating instance %s" % self.op.instance_name,
8951
                           self.be_full[constants.BE_MEMORY],
8952
                           self.op.hypervisor)
8953

    
8954
    self.dry_run_result = list(nodenames)
8955

    
8956
  def Exec(self, feedback_fn):
8957
    """Create and add the instance to the cluster.
8958

8959
    """
8960
    instance = self.op.instance_name
8961
    pnode_name = self.pnode.name
8962

    
8963
    ht_kind = self.op.hypervisor
8964
    if ht_kind in constants.HTS_REQ_PORT:
8965
      network_port = self.cfg.AllocatePort()
8966
    else:
8967
      network_port = None
8968

    
8969
    disks = _GenerateDiskTemplate(self,
8970
                                  self.op.disk_template,
8971
                                  instance, pnode_name,
8972
                                  self.secondaries,
8973
                                  self.disks,
8974
                                  self.instance_file_storage_dir,
8975
                                  self.op.file_driver,
8976
                                  0,
8977
                                  feedback_fn)
8978

    
8979
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8980
                            primary_node=pnode_name,
8981
                            nics=self.nics, disks=disks,
8982
                            disk_template=self.op.disk_template,
8983
                            admin_up=False,
8984
                            network_port=network_port,
8985
                            beparams=self.op.beparams,
8986
                            hvparams=self.op.hvparams,
8987
                            hypervisor=self.op.hypervisor,
8988
                            osparams=self.op.osparams,
8989
                            )
8990

    
8991
    if self.op.tags:
8992
      for tag in self.op.tags:
8993
        iobj.AddTag(tag)
8994

    
8995
    if self.adopt_disks:
8996
      if self.op.disk_template == constants.DT_PLAIN:
8997
        # rename LVs to the newly-generated names; we need to construct
8998
        # 'fake' LV disks with the old data, plus the new unique_id
8999
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9000
        rename_to = []
9001
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9002
          rename_to.append(t_dsk.logical_id)
9003
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9004
          self.cfg.SetDiskID(t_dsk, pnode_name)
9005
        result = self.rpc.call_blockdev_rename(pnode_name,
9006
                                               zip(tmp_disks, rename_to))
9007
        result.Raise("Failed to rename adoped LVs")
9008
    else:
9009
      feedback_fn("* creating instance disks...")
9010
      try:
9011
        _CreateDisks(self, iobj)
9012
      except errors.OpExecError:
9013
        self.LogWarning("Device creation failed, reverting...")
9014
        try:
9015
          _RemoveDisks(self, iobj)
9016
        finally:
9017
          self.cfg.ReleaseDRBDMinors(instance)
9018
          raise
9019

    
9020
    feedback_fn("adding instance %s to cluster config" % instance)
9021

    
9022
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9023

    
9024
    # Declare that we don't want to remove the instance lock anymore, as we've
9025
    # added the instance to the config
9026
    del self.remove_locks[locking.LEVEL_INSTANCE]
9027

    
9028
    if self.op.mode == constants.INSTANCE_IMPORT:
9029
      # Release unused nodes
9030
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9031
    else:
9032
      # Release all nodes
9033
      _ReleaseLocks(self, locking.LEVEL_NODE)
9034

    
9035
    disk_abort = False
9036
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9037
      feedback_fn("* wiping instance disks...")
9038
      try:
9039
        _WipeDisks(self, iobj)
9040
      except errors.OpExecError, err:
9041
        logging.exception("Wiping disks failed")
9042
        self.LogWarning("Wiping instance disks failed (%s)", err)
9043
        disk_abort = True
9044

    
9045
    if disk_abort:
9046
      # Something is already wrong with the disks, don't do anything else
9047
      pass
9048
    elif self.op.wait_for_sync:
9049
      disk_abort = not _WaitForSync(self, iobj)
9050
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9051
      # make sure the disks are not degraded (still sync-ing is ok)
9052
      feedback_fn("* checking mirrors status")
9053
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9054
    else:
9055
      disk_abort = False
9056

    
9057
    if disk_abort:
9058
      _RemoveDisks(self, iobj)
9059
      self.cfg.RemoveInstance(iobj.name)
9060
      # Make sure the instance lock gets removed
9061
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9062
      raise errors.OpExecError("There are some degraded disks for"
9063
                               " this instance")
9064

    
9065
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9066
      if self.op.mode == constants.INSTANCE_CREATE:
9067
        if not self.op.no_install:
9068
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9069
                        not self.op.wait_for_sync)
9070
          if pause_sync:
9071
            feedback_fn("* pausing disk sync to install instance OS")
9072
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9073
                                                              iobj.disks, True)
9074
            for idx, success in enumerate(result.payload):
9075
              if not success:
9076
                logging.warn("pause-sync of instance %s for disk %d failed",
9077
                             instance, idx)
9078

    
9079
          feedback_fn("* running the instance OS create scripts...")
9080
          # FIXME: pass debug option from opcode to backend
9081
          os_add_result = \
9082
            self.rpc.call_instance_os_add(pnode_name, iobj, False,
9083
                                          self.op.debug_level)
9084
          if pause_sync:
9085
            feedback_fn("* resuming disk sync")
9086
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9087
                                                              iobj.disks, False)
9088
            for idx, success in enumerate(result.payload):
9089
              if not success:
9090
                logging.warn("resume-sync of instance %s for disk %d failed",
9091
                             instance, idx)
9092

    
9093
          os_add_result.Raise("Could not add os for instance %s"
9094
                              " on node %s" % (instance, pnode_name))
9095

    
9096
      elif self.op.mode == constants.INSTANCE_IMPORT:
9097
        feedback_fn("* running the instance OS import scripts...")
9098

    
9099
        transfers = []
9100

    
9101
        for idx, image in enumerate(self.src_images):
9102
          if not image:
9103
            continue
9104

    
9105
          # FIXME: pass debug option from opcode to backend
9106
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9107
                                             constants.IEIO_FILE, (image, ),
9108
                                             constants.IEIO_SCRIPT,
9109
                                             (iobj.disks[idx], idx),
9110
                                             None)
9111
          transfers.append(dt)
9112

    
9113
        import_result = \
9114
          masterd.instance.TransferInstanceData(self, feedback_fn,
9115
                                                self.op.src_node, pnode_name,
9116
                                                self.pnode.secondary_ip,
9117
                                                iobj, transfers)
9118
        if not compat.all(import_result):
9119
          self.LogWarning("Some disks for instance %s on node %s were not"
9120
                          " imported successfully" % (instance, pnode_name))
9121

    
9122
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9123
        feedback_fn("* preparing remote import...")
9124
        # The source cluster will stop the instance before attempting to make a
9125
        # connection. In some cases stopping an instance can take a long time,
9126
        # hence the shutdown timeout is added to the connection timeout.
9127
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9128
                           self.op.source_shutdown_timeout)
9129
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9130

    
9131
        assert iobj.primary_node == self.pnode.name
9132
        disk_results = \
9133
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9134
                                        self.source_x509_ca,
9135
                                        self._cds, timeouts)
9136
        if not compat.all(disk_results):
9137
          # TODO: Should the instance still be started, even if some disks
9138
          # failed to import (valid for local imports, too)?
9139
          self.LogWarning("Some disks for instance %s on node %s were not"
9140
                          " imported successfully" % (instance, pnode_name))
9141

    
9142
        # Run rename script on newly imported instance
9143
        assert iobj.name == instance
9144
        feedback_fn("Running rename script for %s" % instance)
9145
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9146
                                                   self.source_instance_name,
9147
                                                   self.op.debug_level)
9148
        if result.fail_msg:
9149
          self.LogWarning("Failed to run rename script for %s on node"
9150
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9151

    
9152
      else:
9153
        # also checked in the prereq part
9154
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9155
                                     % self.op.mode)
9156

    
9157
    if self.op.start:
9158
      iobj.admin_up = True
9159
      self.cfg.Update(iobj, feedback_fn)
9160
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9161
      feedback_fn("* starting instance...")
9162
      result = self.rpc.call_instance_start(pnode_name, iobj,
9163
                                            None, None, False)
9164
      result.Raise("Could not start instance")
9165

    
9166
    return list(iobj.all_nodes)
9167

    
9168

    
9169
class LUInstanceConsole(NoHooksLU):
9170
  """Connect to an instance's console.
9171

9172
  This is somewhat special in that it returns the command line that
9173
  you need to run on the master node in order to connect to the
9174
  console.
9175

9176
  """
9177
  REQ_BGL = False
9178

    
9179
  def ExpandNames(self):
9180
    self._ExpandAndLockInstance()
9181

    
9182
  def CheckPrereq(self):
9183
    """Check prerequisites.
9184

9185
    This checks that the instance is in the cluster.
9186

9187
    """
9188
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9189
    assert self.instance is not None, \
9190
      "Cannot retrieve locked instance %s" % self.op.instance_name
9191
    _CheckNodeOnline(self, self.instance.primary_node)
9192

    
9193
  def Exec(self, feedback_fn):
9194
    """Connect to the console of an instance
9195

9196
    """
9197
    instance = self.instance
9198
    node = instance.primary_node
9199

    
9200
    node_insts = self.rpc.call_instance_list([node],
9201
                                             [instance.hypervisor])[node]
9202
    node_insts.Raise("Can't get node information from %s" % node)
9203

    
9204
    if instance.name not in node_insts.payload:
9205
      if instance.admin_up:
9206
        state = constants.INSTST_ERRORDOWN
9207
      else:
9208
        state = constants.INSTST_ADMINDOWN
9209
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9210
                               (instance.name, state))
9211

    
9212
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9213

    
9214
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9215

    
9216

    
9217
def _GetInstanceConsole(cluster, instance):
9218
  """Returns console information for an instance.
9219

9220
  @type cluster: L{objects.Cluster}
9221
  @type instance: L{objects.Instance}
9222
  @rtype: dict
9223

9224
  """
9225
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9226
  # beparams and hvparams are passed separately, to avoid editing the
9227
  # instance and then saving the defaults in the instance itself.
9228
  hvparams = cluster.FillHV(instance)
9229
  beparams = cluster.FillBE(instance)
9230
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9231

    
9232
  assert console.instance == instance.name
9233
  assert console.Validate()
9234

    
9235
  return console.ToDict()
9236

    
9237

    
9238
class LUInstanceReplaceDisks(LogicalUnit):
9239
  """Replace the disks of an instance.
9240

9241
  """
9242
  HPATH = "mirrors-replace"
9243
  HTYPE = constants.HTYPE_INSTANCE
9244
  REQ_BGL = False
9245

    
9246
  def CheckArguments(self):
9247
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9248
                                  self.op.iallocator)
9249

    
9250
  def ExpandNames(self):
9251
    self._ExpandAndLockInstance()
9252

    
9253
    assert locking.LEVEL_NODE not in self.needed_locks
9254
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9255

    
9256
    assert self.op.iallocator is None or self.op.remote_node is None, \
9257
      "Conflicting options"
9258

    
9259
    if self.op.remote_node is not None:
9260
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9261

    
9262
      # Warning: do not remove the locking of the new secondary here
9263
      # unless DRBD8.AddChildren is changed to work in parallel;
9264
      # currently it doesn't since parallel invocations of
9265
      # FindUnusedMinor will conflict
9266
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9267
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9268
    else:
9269
      self.needed_locks[locking.LEVEL_NODE] = []
9270
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9271

    
9272
      if self.op.iallocator is not None:
9273
        # iallocator will select a new node in the same group
9274
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9275

    
9276
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9277
                                   self.op.iallocator, self.op.remote_node,
9278
                                   self.op.disks, False, self.op.early_release)
9279

    
9280
    self.tasklets = [self.replacer]
9281

    
9282
  def DeclareLocks(self, level):
9283
    if level == locking.LEVEL_NODEGROUP:
9284
      assert self.op.remote_node is None
9285
      assert self.op.iallocator is not None
9286
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9287

    
9288
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9289
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9290
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9291

    
9292
    elif level == locking.LEVEL_NODE:
9293
      if self.op.iallocator is not None:
9294
        assert self.op.remote_node is None
9295
        assert not self.needed_locks[locking.LEVEL_NODE]
9296

    
9297
        # Lock member nodes of all locked groups
9298
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9299
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9300
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9301
      else:
9302
        self._LockInstancesNodes()
9303

    
9304
  def BuildHooksEnv(self):
9305
    """Build hooks env.
9306

9307
    This runs on the master, the primary and all the secondaries.
9308

9309
    """
9310
    instance = self.replacer.instance
9311
    env = {
9312
      "MODE": self.op.mode,
9313
      "NEW_SECONDARY": self.op.remote_node,
9314
      "OLD_SECONDARY": instance.secondary_nodes[0],
9315
      }
9316
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9317
    return env
9318

    
9319
  def BuildHooksNodes(self):
9320
    """Build hooks nodes.
9321

9322
    """
9323
    instance = self.replacer.instance
9324
    nl = [
9325
      self.cfg.GetMasterNode(),
9326
      instance.primary_node,
9327
      ]
9328
    if self.op.remote_node is not None:
9329
      nl.append(self.op.remote_node)
9330
    return nl, nl
9331

    
9332
  def CheckPrereq(self):
9333
    """Check prerequisites.
9334

9335
    """
9336
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9337
            self.op.iallocator is None)
9338

    
9339
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9340
    if owned_groups:
9341
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9342

    
9343
    return LogicalUnit.CheckPrereq(self)
9344

    
9345

    
9346
class TLReplaceDisks(Tasklet):
9347
  """Replaces disks for an instance.
9348

9349
  Note: Locking is not within the scope of this class.
9350

9351
  """
9352
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9353
               disks, delay_iallocator, early_release):
9354
    """Initializes this class.
9355

9356
    """
9357
    Tasklet.__init__(self, lu)
9358

    
9359
    # Parameters
9360
    self.instance_name = instance_name
9361
    self.mode = mode
9362
    self.iallocator_name = iallocator_name
9363
    self.remote_node = remote_node
9364
    self.disks = disks
9365
    self.delay_iallocator = delay_iallocator
9366
    self.early_release = early_release
9367

    
9368
    # Runtime data
9369
    self.instance = None
9370
    self.new_node = None
9371
    self.target_node = None
9372
    self.other_node = None
9373
    self.remote_node_info = None
9374
    self.node_secondary_ip = None
9375

    
9376
  @staticmethod
9377
  def CheckArguments(mode, remote_node, iallocator):
9378
    """Helper function for users of this class.
9379

9380
    """
9381
    # check for valid parameter combination
9382
    if mode == constants.REPLACE_DISK_CHG:
9383
      if remote_node is None and iallocator is None:
9384
        raise errors.OpPrereqError("When changing the secondary either an"
9385
                                   " iallocator script must be used or the"
9386
                                   " new node given", errors.ECODE_INVAL)
9387

    
9388
      if remote_node is not None and iallocator is not None:
9389
        raise errors.OpPrereqError("Give either the iallocator or the new"
9390
                                   " secondary, not both", errors.ECODE_INVAL)
9391

    
9392
    elif remote_node is not None or iallocator is not None:
9393
      # Not replacing the secondary
9394
      raise errors.OpPrereqError("The iallocator and new node options can"
9395
                                 " only be used when changing the"
9396
                                 " secondary node", errors.ECODE_INVAL)
9397

    
9398
  @staticmethod
9399
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9400
    """Compute a new secondary node using an IAllocator.
9401

9402
    """
9403
    ial = IAllocator(lu.cfg, lu.rpc,
9404
                     mode=constants.IALLOCATOR_MODE_RELOC,
9405
                     name=instance_name,
9406
                     relocate_from=list(relocate_from))
9407

    
9408
    ial.Run(iallocator_name)
9409

    
9410
    if not ial.success:
9411
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9412
                                 " %s" % (iallocator_name, ial.info),
9413
                                 errors.ECODE_NORES)
9414

    
9415
    if len(ial.result) != ial.required_nodes:
9416
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9417
                                 " of nodes (%s), required %s" %
9418
                                 (iallocator_name,
9419
                                  len(ial.result), ial.required_nodes),
9420
                                 errors.ECODE_FAULT)
9421

    
9422
    remote_node_name = ial.result[0]
9423

    
9424
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9425
               instance_name, remote_node_name)
9426

    
9427
    return remote_node_name
9428

    
9429
  def _FindFaultyDisks(self, node_name):
9430
    """Wrapper for L{_FindFaultyInstanceDisks}.
9431

9432
    """
9433
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9434
                                    node_name, True)
9435

    
9436
  def _CheckDisksActivated(self, instance):
9437
    """Checks if the instance disks are activated.
9438

9439
    @param instance: The instance to check disks
9440
    @return: True if they are activated, False otherwise
9441

9442
    """
9443
    nodes = instance.all_nodes
9444

    
9445
    for idx, dev in enumerate(instance.disks):
9446
      for node in nodes:
9447
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9448
        self.cfg.SetDiskID(dev, node)
9449

    
9450
        result = self.rpc.call_blockdev_find(node, dev)
9451

    
9452
        if result.offline:
9453
          continue
9454
        elif result.fail_msg or not result.payload:
9455
          return False
9456

    
9457
    return True
9458

    
9459
  def CheckPrereq(self):
9460
    """Check prerequisites.
9461

9462
    This checks that the instance is in the cluster.
9463

9464
    """
9465
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9466
    assert instance is not None, \
9467
      "Cannot retrieve locked instance %s" % self.instance_name
9468

    
9469
    if instance.disk_template != constants.DT_DRBD8:
9470
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9471
                                 " instances", errors.ECODE_INVAL)
9472

    
9473
    if len(instance.secondary_nodes) != 1:
9474
      raise errors.OpPrereqError("The instance has a strange layout,"
9475
                                 " expected one secondary but found %d" %
9476
                                 len(instance.secondary_nodes),
9477
                                 errors.ECODE_FAULT)
9478

    
9479
    if not self.delay_iallocator:
9480
      self._CheckPrereq2()
9481

    
9482
  def _CheckPrereq2(self):
9483
    """Check prerequisites, second part.
9484

9485
    This function should always be part of CheckPrereq. It was separated and is
9486
    now called from Exec because during node evacuation iallocator was only
9487
    called with an unmodified cluster model, not taking planned changes into
9488
    account.
9489

9490
    """
9491
    instance = self.instance
9492
    secondary_node = instance.secondary_nodes[0]
9493

    
9494
    if self.iallocator_name is None:
9495
      remote_node = self.remote_node
9496
    else:
9497
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9498
                                       instance.name, instance.secondary_nodes)
9499

    
9500
    if remote_node is None:
9501
      self.remote_node_info = None
9502
    else:
9503
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9504
             "Remote node '%s' is not locked" % remote_node
9505

    
9506
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9507
      assert self.remote_node_info is not None, \
9508
        "Cannot retrieve locked node %s" % remote_node
9509

    
9510
    if remote_node == self.instance.primary_node:
9511
      raise errors.OpPrereqError("The specified node is the primary node of"
9512
                                 " the instance", errors.ECODE_INVAL)
9513

    
9514
    if remote_node == secondary_node:
9515
      raise errors.OpPrereqError("The specified node is already the"
9516
                                 " secondary node of the instance",
9517
                                 errors.ECODE_INVAL)
9518

    
9519
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9520
                                    constants.REPLACE_DISK_CHG):
9521
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9522
                                 errors.ECODE_INVAL)
9523

    
9524
    if self.mode == constants.REPLACE_DISK_AUTO:
9525
      if not self._CheckDisksActivated(instance):
9526
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9527
                                   " first" % self.instance_name,
9528
                                   errors.ECODE_STATE)
9529
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9530
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9531

    
9532
      if faulty_primary and faulty_secondary:
9533
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9534
                                   " one node and can not be repaired"
9535
                                   " automatically" % self.instance_name,
9536
                                   errors.ECODE_STATE)
9537

    
9538
      if faulty_primary:
9539
        self.disks = faulty_primary
9540
        self.target_node = instance.primary_node
9541
        self.other_node = secondary_node
9542
        check_nodes = [self.target_node, self.other_node]
9543
      elif faulty_secondary:
9544
        self.disks = faulty_secondary
9545
        self.target_node = secondary_node
9546
        self.other_node = instance.primary_node
9547
        check_nodes = [self.target_node, self.other_node]
9548
      else:
9549
        self.disks = []
9550
        check_nodes = []
9551

    
9552
    else:
9553
      # Non-automatic modes
9554
      if self.mode == constants.REPLACE_DISK_PRI:
9555
        self.target_node = instance.primary_node
9556
        self.other_node = secondary_node
9557
        check_nodes = [self.target_node, self.other_node]
9558

    
9559
      elif self.mode == constants.REPLACE_DISK_SEC:
9560
        self.target_node = secondary_node
9561
        self.other_node = instance.primary_node
9562
        check_nodes = [self.target_node, self.other_node]
9563

    
9564
      elif self.mode == constants.REPLACE_DISK_CHG:
9565
        self.new_node = remote_node
9566
        self.other_node = instance.primary_node
9567
        self.target_node = secondary_node
9568
        check_nodes = [self.new_node, self.other_node]
9569

    
9570
        _CheckNodeNotDrained(self.lu, remote_node)
9571
        _CheckNodeVmCapable(self.lu, remote_node)
9572

    
9573
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9574
        assert old_node_info is not None
9575
        if old_node_info.offline and not self.early_release:
9576
          # doesn't make sense to delay the release
9577
          self.early_release = True
9578
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9579
                          " early-release mode", secondary_node)
9580

    
9581
      else:
9582
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9583
                                     self.mode)
9584

    
9585
      # If not specified all disks should be replaced
9586
      if not self.disks:
9587
        self.disks = range(len(self.instance.disks))
9588

    
9589
    for node in check_nodes:
9590
      _CheckNodeOnline(self.lu, node)
9591

    
9592
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9593
                                                          self.other_node,
9594
                                                          self.target_node]
9595
                              if node_name is not None)
9596

    
9597
    # Release unneeded node locks
9598
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9599

    
9600
    # Release any owned node group
9601
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9602
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9603

    
9604
    # Check whether disks are valid
9605
    for disk_idx in self.disks:
9606
      instance.FindDisk(disk_idx)
9607

    
9608
    # Get secondary node IP addresses
9609
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9610
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9611

    
9612
  def Exec(self, feedback_fn):
9613
    """Execute disk replacement.
9614

9615
    This dispatches the disk replacement to the appropriate handler.
9616

9617
    """
9618
    if self.delay_iallocator:
9619
      self._CheckPrereq2()
9620

    
9621
    if __debug__:
9622
      # Verify owned locks before starting operation
9623
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9624
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9625
          ("Incorrect node locks, owning %s, expected %s" %
9626
           (owned_nodes, self.node_secondary_ip.keys()))
9627

    
9628
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9629
      assert list(owned_instances) == [self.instance_name], \
9630
          "Instance '%s' not locked" % self.instance_name
9631

    
9632
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9633
          "Should not own any node group lock at this point"
9634

    
9635
    if not self.disks:
9636
      feedback_fn("No disks need replacement")
9637
      return
9638

    
9639
    feedback_fn("Replacing disk(s) %s for %s" %
9640
                (utils.CommaJoin(self.disks), self.instance.name))
9641

    
9642
    activate_disks = (not self.instance.admin_up)
9643

    
9644
    # Activate the instance disks if we're replacing them on a down instance
9645
    if activate_disks:
9646
      _StartInstanceDisks(self.lu, self.instance, True)
9647

    
9648
    try:
9649
      # Should we replace the secondary node?
9650
      if self.new_node is not None:
9651
        fn = self._ExecDrbd8Secondary
9652
      else:
9653
        fn = self._ExecDrbd8DiskOnly
9654

    
9655
      result = fn(feedback_fn)
9656
    finally:
9657
      # Deactivate the instance disks if we're replacing them on a
9658
      # down instance
9659
      if activate_disks:
9660
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9661

    
9662
    if __debug__:
9663
      # Verify owned locks
9664
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9665
      nodes = frozenset(self.node_secondary_ip)
9666
      assert ((self.early_release and not owned_nodes) or
9667
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9668
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9669
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9670

    
9671
    return result
9672

    
9673
  def _CheckVolumeGroup(self, nodes):
9674
    self.lu.LogInfo("Checking volume groups")
9675

    
9676
    vgname = self.cfg.GetVGName()
9677

    
9678
    # Make sure volume group exists on all involved nodes
9679
    results = self.rpc.call_vg_list(nodes)
9680
    if not results:
9681
      raise errors.OpExecError("Can't list volume groups on the nodes")
9682

    
9683
    for node in nodes:
9684
      res = results[node]
9685
      res.Raise("Error checking node %s" % node)
9686
      if vgname not in res.payload:
9687
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9688
                                 (vgname, node))
9689

    
9690
  def _CheckDisksExistence(self, nodes):
9691
    # Check disk existence
9692
    for idx, dev in enumerate(self.instance.disks):
9693
      if idx not in self.disks:
9694
        continue
9695

    
9696
      for node in nodes:
9697
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9698
        self.cfg.SetDiskID(dev, node)
9699

    
9700
        result = self.rpc.call_blockdev_find(node, dev)
9701

    
9702
        msg = result.fail_msg
9703
        if msg or not result.payload:
9704
          if not msg:
9705
            msg = "disk not found"
9706
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9707
                                   (idx, node, msg))
9708

    
9709
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9710
    for idx, dev in enumerate(self.instance.disks):
9711
      if idx not in self.disks:
9712
        continue
9713

    
9714
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9715
                      (idx, node_name))
9716

    
9717
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9718
                                   ldisk=ldisk):
9719
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9720
                                 " replace disks for instance %s" %
9721
                                 (node_name, self.instance.name))
9722

    
9723
  def _CreateNewStorage(self, node_name):
9724
    """Create new storage on the primary or secondary node.
9725

9726
    This is only used for same-node replaces, not for changing the
9727
    secondary node, hence we don't want to modify the existing disk.
9728

9729
    """
9730
    iv_names = {}
9731

    
9732
    for idx, dev in enumerate(self.instance.disks):
9733
      if idx not in self.disks:
9734
        continue
9735

    
9736
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9737

    
9738
      self.cfg.SetDiskID(dev, node_name)
9739

    
9740
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9741
      names = _GenerateUniqueNames(self.lu, lv_names)
9742

    
9743
      vg_data = dev.children[0].logical_id[0]
9744
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9745
                             logical_id=(vg_data, names[0]))
9746
      vg_meta = dev.children[1].logical_id[0]
9747
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9748
                             logical_id=(vg_meta, names[1]))
9749

    
9750
      new_lvs = [lv_data, lv_meta]
9751
      old_lvs = [child.Copy() for child in dev.children]
9752
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9753

    
9754
      # we pass force_create=True to force the LVM creation
9755
      for new_lv in new_lvs:
9756
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9757
                        _GetInstanceInfoText(self.instance), False)
9758

    
9759
    return iv_names
9760

    
9761
  def _CheckDevices(self, node_name, iv_names):
9762
    for name, (dev, _, _) in iv_names.iteritems():
9763
      self.cfg.SetDiskID(dev, node_name)
9764

    
9765
      result = self.rpc.call_blockdev_find(node_name, dev)
9766

    
9767
      msg = result.fail_msg
9768
      if msg or not result.payload:
9769
        if not msg:
9770
          msg = "disk not found"
9771
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9772
                                 (name, msg))
9773

    
9774
      if result.payload.is_degraded:
9775
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9776

    
9777
  def _RemoveOldStorage(self, node_name, iv_names):
9778
    for name, (_, old_lvs, _) in iv_names.iteritems():
9779
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9780

    
9781
      for lv in old_lvs:
9782
        self.cfg.SetDiskID(lv, node_name)
9783

    
9784
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9785
        if msg:
9786
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9787
                             hint="remove unused LVs manually")
9788

    
9789
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9790
    """Replace a disk on the primary or secondary for DRBD 8.
9791

9792
    The algorithm for replace is quite complicated:
9793

9794
      1. for each disk to be replaced:
9795

9796
        1. create new LVs on the target node with unique names
9797
        1. detach old LVs from the drbd device
9798
        1. rename old LVs to name_replaced.<time_t>
9799
        1. rename new LVs to old LVs
9800
        1. attach the new LVs (with the old names now) to the drbd device
9801

9802
      1. wait for sync across all devices
9803

9804
      1. for each modified disk:
9805

9806
        1. remove old LVs (which have the name name_replaces.<time_t>)
9807

9808
    Failures are not very well handled.
9809

9810
    """
9811
    steps_total = 6
9812

    
9813
    # Step: check device activation
9814
    self.lu.LogStep(1, steps_total, "Check device existence")
9815
    self._CheckDisksExistence([self.other_node, self.target_node])
9816
    self._CheckVolumeGroup([self.target_node, self.other_node])
9817

    
9818
    # Step: check other node consistency
9819
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9820
    self._CheckDisksConsistency(self.other_node,
9821
                                self.other_node == self.instance.primary_node,
9822
                                False)
9823

    
9824
    # Step: create new storage
9825
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9826
    iv_names = self._CreateNewStorage(self.target_node)
9827

    
9828
    # Step: for each lv, detach+rename*2+attach
9829
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9830
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9831
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9832

    
9833
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9834
                                                     old_lvs)
9835
      result.Raise("Can't detach drbd from local storage on node"
9836
                   " %s for device %s" % (self.target_node, dev.iv_name))
9837
      #dev.children = []
9838
      #cfg.Update(instance)
9839

    
9840
      # ok, we created the new LVs, so now we know we have the needed
9841
      # storage; as such, we proceed on the target node to rename
9842
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9843
      # using the assumption that logical_id == physical_id (which in
9844
      # turn is the unique_id on that node)
9845

    
9846
      # FIXME(iustin): use a better name for the replaced LVs
9847
      temp_suffix = int(time.time())
9848
      ren_fn = lambda d, suff: (d.physical_id[0],
9849
                                d.physical_id[1] + "_replaced-%s" % suff)
9850

    
9851
      # Build the rename list based on what LVs exist on the node
9852
      rename_old_to_new = []
9853
      for to_ren in old_lvs:
9854
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9855
        if not result.fail_msg and result.payload:
9856
          # device exists
9857
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9858

    
9859
      self.lu.LogInfo("Renaming the old LVs on the target node")
9860
      result = self.rpc.call_blockdev_rename(self.target_node,
9861
                                             rename_old_to_new)
9862
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9863

    
9864
      # Now we rename the new LVs to the old LVs
9865
      self.lu.LogInfo("Renaming the new LVs on the target node")
9866
      rename_new_to_old = [(new, old.physical_id)
9867
                           for old, new in zip(old_lvs, new_lvs)]
9868
      result = self.rpc.call_blockdev_rename(self.target_node,
9869
                                             rename_new_to_old)
9870
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9871

    
9872
      # Intermediate steps of in memory modifications
9873
      for old, new in zip(old_lvs, new_lvs):
9874
        new.logical_id = old.logical_id
9875
        self.cfg.SetDiskID(new, self.target_node)
9876

    
9877
      # We need to modify old_lvs so that removal later removes the
9878
      # right LVs, not the newly added ones; note that old_lvs is a
9879
      # copy here
9880
      for disk in old_lvs:
9881
        disk.logical_id = ren_fn(disk, temp_suffix)
9882
        self.cfg.SetDiskID(disk, self.target_node)
9883

    
9884
      # Now that the new lvs have the old name, we can add them to the device
9885
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9886
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9887
                                                  new_lvs)
9888
      msg = result.fail_msg
9889
      if msg:
9890
        for new_lv in new_lvs:
9891
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9892
                                               new_lv).fail_msg
9893
          if msg2:
9894
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9895
                               hint=("cleanup manually the unused logical"
9896
                                     "volumes"))
9897
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9898

    
9899
    cstep = 5
9900
    if self.early_release:
9901
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9902
      cstep += 1
9903
      self._RemoveOldStorage(self.target_node, iv_names)
9904
      # WARNING: we release both node locks here, do not do other RPCs
9905
      # than WaitForSync to the primary node
9906
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9907
                    names=[self.target_node, self.other_node])
9908

    
9909
    # Wait for sync
9910
    # This can fail as the old devices are degraded and _WaitForSync
9911
    # does a combined result over all disks, so we don't check its return value
9912
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9913
    cstep += 1
9914
    _WaitForSync(self.lu, self.instance)
9915

    
9916
    # Check all devices manually
9917
    self._CheckDevices(self.instance.primary_node, iv_names)
9918

    
9919
    # Step: remove old storage
9920
    if not self.early_release:
9921
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9922
      cstep += 1
9923
      self._RemoveOldStorage(self.target_node, iv_names)
9924

    
9925
  def _ExecDrbd8Secondary(self, feedback_fn):
9926
    """Replace the secondary node for DRBD 8.
9927

9928
    The algorithm for replace is quite complicated:
9929
      - for all disks of the instance:
9930
        - create new LVs on the new node with same names
9931
        - shutdown the drbd device on the old secondary
9932
        - disconnect the drbd network on the primary
9933
        - create the drbd device on the new secondary
9934
        - network attach the drbd on the primary, using an artifice:
9935
          the drbd code for Attach() will connect to the network if it
9936
          finds a device which is connected to the good local disks but
9937
          not network enabled
9938
      - wait for sync across all devices
9939
      - remove all disks from the old secondary
9940

9941
    Failures are not very well handled.
9942

9943
    """
9944
    steps_total = 6
9945

    
9946
    pnode = self.instance.primary_node
9947

    
9948
    # Step: check device activation
9949
    self.lu.LogStep(1, steps_total, "Check device existence")
9950
    self._CheckDisksExistence([self.instance.primary_node])
9951
    self._CheckVolumeGroup([self.instance.primary_node])
9952

    
9953
    # Step: check other node consistency
9954
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9955
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9956

    
9957
    # Step: create new storage
9958
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9959
    for idx, dev in enumerate(self.instance.disks):
9960
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9961
                      (self.new_node, idx))
9962
      # we pass force_create=True to force LVM creation
9963
      for new_lv in dev.children:
9964
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9965
                        _GetInstanceInfoText(self.instance), False)
9966

    
9967
    # Step 4: dbrd minors and drbd setups changes
9968
    # after this, we must manually remove the drbd minors on both the
9969
    # error and the success paths
9970
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9971
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9972
                                         for dev in self.instance.disks],
9973
                                        self.instance.name)
9974
    logging.debug("Allocated minors %r", minors)
9975

    
9976
    iv_names = {}
9977
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9978
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9979
                      (self.new_node, idx))
9980
      # create new devices on new_node; note that we create two IDs:
9981
      # one without port, so the drbd will be activated without
9982
      # networking information on the new node at this stage, and one
9983
      # with network, for the latter activation in step 4
9984
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9985
      if self.instance.primary_node == o_node1:
9986
        p_minor = o_minor1
9987
      else:
9988
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9989
        p_minor = o_minor2
9990

    
9991
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9992
                      p_minor, new_minor, o_secret)
9993
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9994
                    p_minor, new_minor, o_secret)
9995

    
9996
      iv_names[idx] = (dev, dev.children, new_net_id)
9997
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9998
                    new_net_id)
9999
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10000
                              logical_id=new_alone_id,
10001
                              children=dev.children,
10002
                              size=dev.size)
10003
      try:
10004
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10005
                              _GetInstanceInfoText(self.instance), False)
10006
      except errors.GenericError:
10007
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10008
        raise
10009

    
10010
    # We have new devices, shutdown the drbd on the old secondary
10011
    for idx, dev in enumerate(self.instance.disks):
10012
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10013
      self.cfg.SetDiskID(dev, self.target_node)
10014
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10015
      if msg:
10016
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10017
                           "node: %s" % (idx, msg),
10018
                           hint=("Please cleanup this device manually as"
10019
                                 " soon as possible"))
10020

    
10021
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10022
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10023
                                               self.instance.disks)[pnode]
10024

    
10025
    msg = result.fail_msg
10026
    if msg:
10027
      # detaches didn't succeed (unlikely)
10028
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10029
      raise errors.OpExecError("Can't detach the disks from the network on"
10030
                               " old node: %s" % (msg,))
10031

    
10032
    # if we managed to detach at least one, we update all the disks of
10033
    # the instance to point to the new secondary
10034
    self.lu.LogInfo("Updating instance configuration")
10035
    for dev, _, new_logical_id in iv_names.itervalues():
10036
      dev.logical_id = new_logical_id
10037
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10038

    
10039
    self.cfg.Update(self.instance, feedback_fn)
10040

    
10041
    # and now perform the drbd attach
10042
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10043
                    " (standalone => connected)")
10044
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10045
                                            self.new_node],
10046
                                           self.node_secondary_ip,
10047
                                           self.instance.disks,
10048
                                           self.instance.name,
10049
                                           False)
10050
    for to_node, to_result in result.items():
10051
      msg = to_result.fail_msg
10052
      if msg:
10053
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10054
                           to_node, msg,
10055
                           hint=("please do a gnt-instance info to see the"
10056
                                 " status of disks"))
10057
    cstep = 5
10058
    if self.early_release:
10059
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10060
      cstep += 1
10061
      self._RemoveOldStorage(self.target_node, iv_names)
10062
      # WARNING: we release all node locks here, do not do other RPCs
10063
      # than WaitForSync to the primary node
10064
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10065
                    names=[self.instance.primary_node,
10066
                           self.target_node,
10067
                           self.new_node])
10068

    
10069
    # Wait for sync
10070
    # This can fail as the old devices are degraded and _WaitForSync
10071
    # does a combined result over all disks, so we don't check its return value
10072
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10073
    cstep += 1
10074
    _WaitForSync(self.lu, self.instance)
10075

    
10076
    # Check all devices manually
10077
    self._CheckDevices(self.instance.primary_node, iv_names)
10078

    
10079
    # Step: remove old storage
10080
    if not self.early_release:
10081
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10082
      self._RemoveOldStorage(self.target_node, iv_names)
10083

    
10084

    
10085
class LURepairNodeStorage(NoHooksLU):
10086
  """Repairs the volume group on a node.
10087

10088
  """
10089
  REQ_BGL = False
10090

    
10091
  def CheckArguments(self):
10092
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10093

    
10094
    storage_type = self.op.storage_type
10095

    
10096
    if (constants.SO_FIX_CONSISTENCY not in
10097
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10098
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10099
                                 " repaired" % storage_type,
10100
                                 errors.ECODE_INVAL)
10101

    
10102
  def ExpandNames(self):
10103
    self.needed_locks = {
10104
      locking.LEVEL_NODE: [self.op.node_name],
10105
      }
10106

    
10107
  def _CheckFaultyDisks(self, instance, node_name):
10108
    """Ensure faulty disks abort the opcode or at least warn."""
10109
    try:
10110
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10111
                                  node_name, True):
10112
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10113
                                   " node '%s'" % (instance.name, node_name),
10114
                                   errors.ECODE_STATE)
10115
    except errors.OpPrereqError, err:
10116
      if self.op.ignore_consistency:
10117
        self.proc.LogWarning(str(err.args[0]))
10118
      else:
10119
        raise
10120

    
10121
  def CheckPrereq(self):
10122
    """Check prerequisites.
10123

10124
    """
10125
    # Check whether any instance on this node has faulty disks
10126
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10127
      if not inst.admin_up:
10128
        continue
10129
      check_nodes = set(inst.all_nodes)
10130
      check_nodes.discard(self.op.node_name)
10131
      for inst_node_name in check_nodes:
10132
        self._CheckFaultyDisks(inst, inst_node_name)
10133

    
10134
  def Exec(self, feedback_fn):
10135
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10136
                (self.op.name, self.op.node_name))
10137

    
10138
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10139
    result = self.rpc.call_storage_execute(self.op.node_name,
10140
                                           self.op.storage_type, st_args,
10141
                                           self.op.name,
10142
                                           constants.SO_FIX_CONSISTENCY)
10143
    result.Raise("Failed to repair storage unit '%s' on %s" %
10144
                 (self.op.name, self.op.node_name))
10145

    
10146

    
10147
class LUNodeEvacuate(NoHooksLU):
10148
  """Evacuates instances off a list of nodes.
10149

10150
  """
10151
  REQ_BGL = False
10152

    
10153
  _MODE2IALLOCATOR = {
10154
    constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10155
    constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10156
    constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10157
    }
10158
  assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10159
  assert (frozenset(_MODE2IALLOCATOR.values()) ==
10160
          constants.IALLOCATOR_NEVAC_MODES)
10161

    
10162
  def CheckArguments(self):
10163
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10164

    
10165
  def ExpandNames(self):
10166
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10167

    
10168
    if self.op.remote_node is not None:
10169
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10170
      assert self.op.remote_node
10171

    
10172
      if self.op.remote_node == self.op.node_name:
10173
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10174
                                   " secondary node", errors.ECODE_INVAL)
10175

    
10176
      if self.op.mode != constants.NODE_EVAC_SEC:
10177
        raise errors.OpPrereqError("Without the use of an iallocator only"
10178
                                   " secondary instances can be evacuated",
10179
                                   errors.ECODE_INVAL)
10180

    
10181
    # Declare locks
10182
    self.share_locks = _ShareAll()
10183
    self.needed_locks = {
10184
      locking.LEVEL_INSTANCE: [],
10185
      locking.LEVEL_NODEGROUP: [],
10186
      locking.LEVEL_NODE: [],
10187
      }
10188

    
10189
    # Determine nodes (via group) optimistically, needs verification once locks
10190
    # have been acquired
10191
    self.lock_nodes = self._DetermineNodes()
10192

    
10193
  def _DetermineNodes(self):
10194
    """Gets the list of nodes to operate on.
10195

10196
    """
10197
    if self.op.remote_node is None:
10198
      # Iallocator will choose any node(s) in the same group
10199
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10200
    else:
10201
      group_nodes = frozenset([self.op.remote_node])
10202

    
10203
    # Determine nodes to be locked
10204
    return set([self.op.node_name]) | group_nodes
10205

    
10206
  def _DetermineInstances(self):
10207
    """Builds list of instances to operate on.
10208

10209
    """
10210
    assert self.op.mode in constants.NODE_EVAC_MODES
10211

    
10212
    if self.op.mode == constants.NODE_EVAC_PRI:
10213
      # Primary instances only
10214
      inst_fn = _GetNodePrimaryInstances
10215
      assert self.op.remote_node is None, \
10216
        "Evacuating primary instances requires iallocator"
10217
    elif self.op.mode == constants.NODE_EVAC_SEC:
10218
      # Secondary instances only
10219
      inst_fn = _GetNodeSecondaryInstances
10220
    else:
10221
      # All instances
10222
      assert self.op.mode == constants.NODE_EVAC_ALL
10223
      inst_fn = _GetNodeInstances
10224
      # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10225
      # per instance
10226
      raise errors.OpPrereqError("Due to an issue with the iallocator"
10227
                                 " interface it is not possible to evacuate"
10228
                                 " all instances at once; specify explicitly"
10229
                                 " whether to evacuate primary or secondary"
10230
                                 " instances",
10231
                                 errors.ECODE_INVAL)
10232

    
10233
    return inst_fn(self.cfg, self.op.node_name)
10234

    
10235
  def DeclareLocks(self, level):
10236
    if level == locking.LEVEL_INSTANCE:
10237
      # Lock instances optimistically, needs verification once node and group
10238
      # locks have been acquired
10239
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10240
        set(i.name for i in self._DetermineInstances())
10241

    
10242
    elif level == locking.LEVEL_NODEGROUP:
10243
      # Lock node groups for all potential target nodes optimistically, needs
10244
      # verification once nodes have been acquired
10245
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10246
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10247

    
10248
    elif level == locking.LEVEL_NODE:
10249
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10250

    
10251
  def CheckPrereq(self):
10252
    # Verify locks
10253
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10254
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10255
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10256

    
10257
    need_nodes = self._DetermineNodes()
10258

    
10259
    if not owned_nodes.issuperset(need_nodes):
10260
      raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10261
                                 " locks were acquired, current nodes are"
10262
                                 " are '%s', used to be '%s'; retry the"
10263
                                 " operation" %
10264
                                 (self.op.node_name,
10265
                                  utils.CommaJoin(need_nodes),
10266
                                  utils.CommaJoin(owned_nodes)),
10267
                                 errors.ECODE_STATE)
10268

    
10269
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10270
    if owned_groups != wanted_groups:
10271
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10272
                               " current groups are '%s', used to be '%s';"
10273
                               " retry the operation" %
10274
                               (utils.CommaJoin(wanted_groups),
10275
                                utils.CommaJoin(owned_groups)))
10276

    
10277
    # Determine affected instances
10278
    self.instances = self._DetermineInstances()
10279
    self.instance_names = [i.name for i in self.instances]
10280

    
10281
    if set(self.instance_names) != owned_instances:
10282
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10283
                               " were acquired, current instances are '%s',"
10284
                               " used to be '%s'; retry the operation" %
10285
                               (self.op.node_name,
10286
                                utils.CommaJoin(self.instance_names),
10287
                                utils.CommaJoin(owned_instances)))
10288

    
10289
    if self.instance_names:
10290
      self.LogInfo("Evacuating instances from node '%s': %s",
10291
                   self.op.node_name,
10292
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10293
    else:
10294
      self.LogInfo("No instances to evacuate from node '%s'",
10295
                   self.op.node_name)
10296

    
10297
    if self.op.remote_node is not None:
10298
      for i in self.instances:
10299
        if i.primary_node == self.op.remote_node:
10300
          raise errors.OpPrereqError("Node %s is the primary node of"
10301
                                     " instance %s, cannot use it as"
10302
                                     " secondary" %
10303
                                     (self.op.remote_node, i.name),
10304
                                     errors.ECODE_INVAL)
10305

    
10306
  def Exec(self, feedback_fn):
10307
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10308

    
10309
    if not self.instance_names:
10310
      # No instances to evacuate
10311
      jobs = []
10312

    
10313
    elif self.op.iallocator is not None:
10314
      # TODO: Implement relocation to other group
10315
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10316
                       evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10317
                       instances=list(self.instance_names))
10318

    
10319
      ial.Run(self.op.iallocator)
10320

    
10321
      if not ial.success:
10322
        raise errors.OpPrereqError("Can't compute node evacuation using"
10323
                                   " iallocator '%s': %s" %
10324
                                   (self.op.iallocator, ial.info),
10325
                                   errors.ECODE_NORES)
10326

    
10327
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10328

    
10329
    elif self.op.remote_node is not None:
10330
      assert self.op.mode == constants.NODE_EVAC_SEC
10331
      jobs = [
10332
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10333
                                        remote_node=self.op.remote_node,
10334
                                        disks=[],
10335
                                        mode=constants.REPLACE_DISK_CHG,
10336
                                        early_release=self.op.early_release)]
10337
        for instance_name in self.instance_names
10338
        ]
10339

    
10340
    else:
10341
      raise errors.ProgrammerError("No iallocator or remote node")
10342

    
10343
    return ResultWithJobs(jobs)
10344

    
10345

    
10346
def _SetOpEarlyRelease(early_release, op):
10347
  """Sets C{early_release} flag on opcodes if available.
10348

10349
  """
10350
  try:
10351
    op.early_release = early_release
10352
  except AttributeError:
10353
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10354

    
10355
  return op
10356

    
10357

    
10358
def _NodeEvacDest(use_nodes, group, nodes):
10359
  """Returns group or nodes depending on caller's choice.
10360

10361
  """
10362
  if use_nodes:
10363
    return utils.CommaJoin(nodes)
10364
  else:
10365
    return group
10366

    
10367

    
10368
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10369
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10370

10371
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10372
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10373

10374
  @type lu: L{LogicalUnit}
10375
  @param lu: Logical unit instance
10376
  @type alloc_result: tuple/list
10377
  @param alloc_result: Result from iallocator
10378
  @type early_release: bool
10379
  @param early_release: Whether to release locks early if possible
10380
  @type use_nodes: bool
10381
  @param use_nodes: Whether to display node names instead of groups
10382

10383
  """
10384
  (moved, failed, jobs) = alloc_result
10385

    
10386
  if failed:
10387
    failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10388
                                 for (name, reason) in failed)
10389
    lu.LogWarning("Unable to evacuate instances %s", failreason)
10390
    raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10391

    
10392
  if moved:
10393
    lu.LogInfo("Instances to be moved: %s",
10394
               utils.CommaJoin("%s (to %s)" %
10395
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10396
                               for (name, group, nodes) in moved))
10397

    
10398
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10399
              map(opcodes.OpCode.LoadOpCode, ops))
10400
          for ops in jobs]
10401

    
10402

    
10403
class LUInstanceGrowDisk(LogicalUnit):
10404
  """Grow a disk of an instance.
10405

10406
  """
10407
  HPATH = "disk-grow"
10408
  HTYPE = constants.HTYPE_INSTANCE
10409
  REQ_BGL = False
10410

    
10411
  def ExpandNames(self):
10412
    self._ExpandAndLockInstance()
10413
    self.needed_locks[locking.LEVEL_NODE] = []
10414
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10415

    
10416
  def DeclareLocks(self, level):
10417
    if level == locking.LEVEL_NODE:
10418
      self._LockInstancesNodes()
10419

    
10420
  def BuildHooksEnv(self):
10421
    """Build hooks env.
10422

10423
    This runs on the master, the primary and all the secondaries.
10424

10425
    """
10426
    env = {
10427
      "DISK": self.op.disk,
10428
      "AMOUNT": self.op.amount,
10429
      }
10430
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10431
    return env
10432

    
10433
  def BuildHooksNodes(self):
10434
    """Build hooks nodes.
10435

10436
    """
10437
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10438
    return (nl, nl)
10439

    
10440
  def CheckPrereq(self):
10441
    """Check prerequisites.
10442

10443
    This checks that the instance is in the cluster.
10444

10445
    """
10446
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10447
    assert instance is not None, \
10448
      "Cannot retrieve locked instance %s" % self.op.instance_name
10449
    nodenames = list(instance.all_nodes)
10450
    for node in nodenames:
10451
      _CheckNodeOnline(self, node)
10452

    
10453
    self.instance = instance
10454

    
10455
    if instance.disk_template not in constants.DTS_GROWABLE:
10456
      raise errors.OpPrereqError("Instance's disk layout does not support"
10457
                                 " growing", errors.ECODE_INVAL)
10458

    
10459
    self.disk = instance.FindDisk(self.op.disk)
10460

    
10461
    if instance.disk_template not in (constants.DT_FILE,
10462
                                      constants.DT_SHARED_FILE):
10463
      # TODO: check the free disk space for file, when that feature will be
10464
      # supported
10465
      _CheckNodesFreeDiskPerVG(self, nodenames,
10466
                               self.disk.ComputeGrowth(self.op.amount))
10467

    
10468
  def Exec(self, feedback_fn):
10469
    """Execute disk grow.
10470

10471
    """
10472
    instance = self.instance
10473
    disk = self.disk
10474

    
10475
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10476
    if not disks_ok:
10477
      raise errors.OpExecError("Cannot activate block device to grow")
10478

    
10479
    # First run all grow ops in dry-run mode
10480
    for node in instance.all_nodes:
10481
      self.cfg.SetDiskID(disk, node)
10482
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10483
      result.Raise("Grow request failed to node %s" % node)
10484

    
10485
    # We know that (as far as we can test) operations across different
10486
    # nodes will succeed, time to run it for real
10487
    for node in instance.all_nodes:
10488
      self.cfg.SetDiskID(disk, node)
10489
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10490
      result.Raise("Grow request failed to node %s" % node)
10491

    
10492
      # TODO: Rewrite code to work properly
10493
      # DRBD goes into sync mode for a short amount of time after executing the
10494
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10495
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10496
      # time is a work-around.
10497
      time.sleep(5)
10498

    
10499
    disk.RecordGrow(self.op.amount)
10500
    self.cfg.Update(instance, feedback_fn)
10501
    if self.op.wait_for_sync:
10502
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10503
      if disk_abort:
10504
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10505
                             " status; please check the instance")
10506
      if not instance.admin_up:
10507
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10508
    elif not instance.admin_up:
10509
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10510
                           " not supposed to be running because no wait for"
10511
                           " sync mode was requested")
10512

    
10513

    
10514
class LUInstanceQueryData(NoHooksLU):
10515
  """Query runtime instance data.
10516

10517
  """
10518
  REQ_BGL = False
10519

    
10520
  def ExpandNames(self):
10521
    self.needed_locks = {}
10522

    
10523
    # Use locking if requested or when non-static information is wanted
10524
    if not (self.op.static or self.op.use_locking):
10525
      self.LogWarning("Non-static data requested, locks need to be acquired")
10526
      self.op.use_locking = True
10527

    
10528
    if self.op.instances or not self.op.use_locking:
10529
      # Expand instance names right here
10530
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10531
    else:
10532
      # Will use acquired locks
10533
      self.wanted_names = None
10534

    
10535
    if self.op.use_locking:
10536
      self.share_locks = _ShareAll()
10537

    
10538
      if self.wanted_names is None:
10539
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10540
      else:
10541
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10542

    
10543
      self.needed_locks[locking.LEVEL_NODE] = []
10544
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10545

    
10546
  def DeclareLocks(self, level):
10547
    if self.op.use_locking and level == locking.LEVEL_NODE:
10548
      self._LockInstancesNodes()
10549

    
10550
  def CheckPrereq(self):
10551
    """Check prerequisites.
10552

10553
    This only checks the optional instance list against the existing names.
10554

10555
    """
10556
    if self.wanted_names is None:
10557
      assert self.op.use_locking, "Locking was not used"
10558
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10559

    
10560
    self.wanted_instances = \
10561
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10562

    
10563
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10564
    """Returns the status of a block device
10565

10566
    """
10567
    if self.op.static or not node:
10568
      return None
10569

    
10570
    self.cfg.SetDiskID(dev, node)
10571

    
10572
    result = self.rpc.call_blockdev_find(node, dev)
10573
    if result.offline:
10574
      return None
10575

    
10576
    result.Raise("Can't compute disk status for %s" % instance_name)
10577

    
10578
    status = result.payload
10579
    if status is None:
10580
      return None
10581

    
10582
    return (status.dev_path, status.major, status.minor,
10583
            status.sync_percent, status.estimated_time,
10584
            status.is_degraded, status.ldisk_status)
10585

    
10586
  def _ComputeDiskStatus(self, instance, snode, dev):
10587
    """Compute block device status.
10588

10589
    """
10590
    if dev.dev_type in constants.LDS_DRBD:
10591
      # we change the snode then (otherwise we use the one passed in)
10592
      if dev.logical_id[0] == instance.primary_node:
10593
        snode = dev.logical_id[1]
10594
      else:
10595
        snode = dev.logical_id[0]
10596

    
10597
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10598
                                              instance.name, dev)
10599
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10600

    
10601
    if dev.children:
10602
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10603
                                        instance, snode),
10604
                         dev.children)
10605
    else:
10606
      dev_children = []
10607

    
10608
    return {
10609
      "iv_name": dev.iv_name,
10610
      "dev_type": dev.dev_type,
10611
      "logical_id": dev.logical_id,
10612
      "physical_id": dev.physical_id,
10613
      "pstatus": dev_pstatus,
10614
      "sstatus": dev_sstatus,
10615
      "children": dev_children,
10616
      "mode": dev.mode,
10617
      "size": dev.size,
10618
      }
10619

    
10620
  def Exec(self, feedback_fn):
10621
    """Gather and return data"""
10622
    result = {}
10623

    
10624
    cluster = self.cfg.GetClusterInfo()
10625

    
10626
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10627
                                          for i in self.wanted_instances)
10628
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10629
      if self.op.static or pnode.offline:
10630
        remote_state = None
10631
        if pnode.offline:
10632
          self.LogWarning("Primary node %s is marked offline, returning static"
10633
                          " information only for instance %s" %
10634
                          (pnode.name, instance.name))
10635
      else:
10636
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10637
                                                  instance.name,
10638
                                                  instance.hypervisor)
10639
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10640
        remote_info = remote_info.payload
10641
        if remote_info and "state" in remote_info:
10642
          remote_state = "up"
10643
        else:
10644
          remote_state = "down"
10645

    
10646
      if instance.admin_up:
10647
        config_state = "up"
10648
      else:
10649
        config_state = "down"
10650

    
10651
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10652
                  instance.disks)
10653

    
10654
      result[instance.name] = {
10655
        "name": instance.name,
10656
        "config_state": config_state,
10657
        "run_state": remote_state,
10658
        "pnode": instance.primary_node,
10659
        "snodes": instance.secondary_nodes,
10660
        "os": instance.os,
10661
        # this happens to be the same format used for hooks
10662
        "nics": _NICListToTuple(self, instance.nics),
10663
        "disk_template": instance.disk_template,
10664
        "disks": disks,
10665
        "hypervisor": instance.hypervisor,
10666
        "network_port": instance.network_port,
10667
        "hv_instance": instance.hvparams,
10668
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10669
        "be_instance": instance.beparams,
10670
        "be_actual": cluster.FillBE(instance),
10671
        "os_instance": instance.osparams,
10672
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10673
        "serial_no": instance.serial_no,
10674
        "mtime": instance.mtime,
10675
        "ctime": instance.ctime,
10676
        "uuid": instance.uuid,
10677
        }
10678

    
10679
    return result
10680

    
10681

    
10682
class LUInstanceSetParams(LogicalUnit):
10683
  """Modifies an instances's parameters.
10684

10685
  """
10686
  HPATH = "instance-modify"
10687
  HTYPE = constants.HTYPE_INSTANCE
10688
  REQ_BGL = False
10689

    
10690
  def CheckArguments(self):
10691
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10692
            self.op.hvparams or self.op.beparams or self.op.os_name):
10693
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10694

    
10695
    if self.op.hvparams:
10696
      _CheckGlobalHvParams(self.op.hvparams)
10697

    
10698
    # Disk validation
10699
    disk_addremove = 0
10700
    for disk_op, disk_dict in self.op.disks:
10701
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10702
      if disk_op == constants.DDM_REMOVE:
10703
        disk_addremove += 1
10704
        continue
10705
      elif disk_op == constants.DDM_ADD:
10706
        disk_addremove += 1
10707
      else:
10708
        if not isinstance(disk_op, int):
10709
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10710
        if not isinstance(disk_dict, dict):
10711
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10712
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10713

    
10714
      if disk_op == constants.DDM_ADD:
10715
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10716
        if mode not in constants.DISK_ACCESS_SET:
10717
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10718
                                     errors.ECODE_INVAL)
10719
        size = disk_dict.get(constants.IDISK_SIZE, None)
10720
        if size is None:
10721
          raise errors.OpPrereqError("Required disk parameter size missing",
10722
                                     errors.ECODE_INVAL)
10723
        try:
10724
          size = int(size)
10725
        except (TypeError, ValueError), err:
10726
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10727
                                     str(err), errors.ECODE_INVAL)
10728
        disk_dict[constants.IDISK_SIZE] = size
10729
      else:
10730
        # modification of disk
10731
        if constants.IDISK_SIZE in disk_dict:
10732
          raise errors.OpPrereqError("Disk size change not possible, use"
10733
                                     " grow-disk", errors.ECODE_INVAL)
10734

    
10735
    if disk_addremove > 1:
10736
      raise errors.OpPrereqError("Only one disk add or remove operation"
10737
                                 " supported at a time", errors.ECODE_INVAL)
10738

    
10739
    if self.op.disks and self.op.disk_template is not None:
10740
      raise errors.OpPrereqError("Disk template conversion and other disk"
10741
                                 " changes not supported at the same time",
10742
                                 errors.ECODE_INVAL)
10743

    
10744
    if (self.op.disk_template and
10745
        self.op.disk_template in constants.DTS_INT_MIRROR and
10746
        self.op.remote_node is None):
10747
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10748
                                 " one requires specifying a secondary node",
10749
                                 errors.ECODE_INVAL)
10750

    
10751
    # NIC validation
10752
    nic_addremove = 0
10753
    for nic_op, nic_dict in self.op.nics:
10754
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10755
      if nic_op == constants.DDM_REMOVE:
10756
        nic_addremove += 1
10757
        continue
10758
      elif nic_op == constants.DDM_ADD:
10759
        nic_addremove += 1
10760
      else:
10761
        if not isinstance(nic_op, int):
10762
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10763
        if not isinstance(nic_dict, dict):
10764
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10765
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10766

    
10767
      # nic_dict should be a dict
10768
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10769
      if nic_ip is not None:
10770
        if nic_ip.lower() == constants.VALUE_NONE:
10771
          nic_dict[constants.INIC_IP] = None
10772
        else:
10773
          if not netutils.IPAddress.IsValid(nic_ip):
10774
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10775
                                       errors.ECODE_INVAL)
10776

    
10777
      nic_bridge = nic_dict.get("bridge", None)
10778
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10779
      if nic_bridge and nic_link:
10780
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10781
                                   " at the same time", errors.ECODE_INVAL)
10782
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10783
        nic_dict["bridge"] = None
10784
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10785
        nic_dict[constants.INIC_LINK] = None
10786

    
10787
      if nic_op == constants.DDM_ADD:
10788
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10789
        if nic_mac is None:
10790
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10791

    
10792
      if constants.INIC_MAC in nic_dict:
10793
        nic_mac = nic_dict[constants.INIC_MAC]
10794
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10795
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10796

    
10797
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10798
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10799
                                     " modifying an existing nic",
10800
                                     errors.ECODE_INVAL)
10801

    
10802
    if nic_addremove > 1:
10803
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10804
                                 " supported at a time", errors.ECODE_INVAL)
10805

    
10806
  def ExpandNames(self):
10807
    self._ExpandAndLockInstance()
10808
    self.needed_locks[locking.LEVEL_NODE] = []
10809
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10810

    
10811
  def DeclareLocks(self, level):
10812
    if level == locking.LEVEL_NODE:
10813
      self._LockInstancesNodes()
10814
      if self.op.disk_template and self.op.remote_node:
10815
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10816
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10817

    
10818
  def BuildHooksEnv(self):
10819
    """Build hooks env.
10820

10821
    This runs on the master, primary and secondaries.
10822

10823
    """
10824
    args = dict()
10825
    if constants.BE_MEMORY in self.be_new:
10826
      args["memory"] = self.be_new[constants.BE_MEMORY]
10827
    if constants.BE_VCPUS in self.be_new:
10828
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10829
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10830
    # information at all.
10831
    if self.op.nics:
10832
      args["nics"] = []
10833
      nic_override = dict(self.op.nics)
10834
      for idx, nic in enumerate(self.instance.nics):
10835
        if idx in nic_override:
10836
          this_nic_override = nic_override[idx]
10837
        else:
10838
          this_nic_override = {}
10839
        if constants.INIC_IP in this_nic_override:
10840
          ip = this_nic_override[constants.INIC_IP]
10841
        else:
10842
          ip = nic.ip
10843
        if constants.INIC_MAC in this_nic_override:
10844
          mac = this_nic_override[constants.INIC_MAC]
10845
        else:
10846
          mac = nic.mac
10847
        if idx in self.nic_pnew:
10848
          nicparams = self.nic_pnew[idx]
10849
        else:
10850
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10851
        mode = nicparams[constants.NIC_MODE]
10852
        link = nicparams[constants.NIC_LINK]
10853
        args["nics"].append((ip, mac, mode, link))
10854
      if constants.DDM_ADD in nic_override:
10855
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10856
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10857
        nicparams = self.nic_pnew[constants.DDM_ADD]
10858
        mode = nicparams[constants.NIC_MODE]
10859
        link = nicparams[constants.NIC_LINK]
10860
        args["nics"].append((ip, mac, mode, link))
10861
      elif constants.DDM_REMOVE in nic_override:
10862
        del args["nics"][-1]
10863

    
10864
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10865
    if self.op.disk_template:
10866
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10867

    
10868
    return env
10869

    
10870
  def BuildHooksNodes(self):
10871
    """Build hooks nodes.
10872

10873
    """
10874
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10875
    return (nl, nl)
10876

    
10877
  def CheckPrereq(self):
10878
    """Check prerequisites.
10879

10880
    This only checks the instance list against the existing names.
10881

10882
    """
10883
    # checking the new params on the primary/secondary nodes
10884

    
10885
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10886
    cluster = self.cluster = self.cfg.GetClusterInfo()
10887
    assert self.instance is not None, \
10888
      "Cannot retrieve locked instance %s" % self.op.instance_name
10889
    pnode = instance.primary_node
10890
    nodelist = list(instance.all_nodes)
10891

    
10892
    # OS change
10893
    if self.op.os_name and not self.op.force:
10894
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10895
                      self.op.force_variant)
10896
      instance_os = self.op.os_name
10897
    else:
10898
      instance_os = instance.os
10899

    
10900
    if self.op.disk_template:
10901
      if instance.disk_template == self.op.disk_template:
10902
        raise errors.OpPrereqError("Instance already has disk template %s" %
10903
                                   instance.disk_template, errors.ECODE_INVAL)
10904

    
10905
      if (instance.disk_template,
10906
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10907
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10908
                                   " %s to %s" % (instance.disk_template,
10909
                                                  self.op.disk_template),
10910
                                   errors.ECODE_INVAL)
10911
      _CheckInstanceDown(self, instance, "cannot change disk template")
10912
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10913
        if self.op.remote_node == pnode:
10914
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10915
                                     " as the primary node of the instance" %
10916
                                     self.op.remote_node, errors.ECODE_STATE)
10917
        _CheckNodeOnline(self, self.op.remote_node)
10918
        _CheckNodeNotDrained(self, self.op.remote_node)
10919
        # FIXME: here we assume that the old instance type is DT_PLAIN
10920
        assert instance.disk_template == constants.DT_PLAIN
10921
        disks = [{constants.IDISK_SIZE: d.size,
10922
                  constants.IDISK_VG: d.logical_id[0]}
10923
                 for d in instance.disks]
10924
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10925
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10926

    
10927
    # hvparams processing
10928
    if self.op.hvparams:
10929
      hv_type = instance.hypervisor
10930
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10931
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10932
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10933

    
10934
      # local check
10935
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10936
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10937
      self.hv_new = hv_new # the new actual values
10938
      self.hv_inst = i_hvdict # the new dict (without defaults)
10939
    else:
10940
      self.hv_new = self.hv_inst = {}
10941

    
10942
    # beparams processing
10943
    if self.op.beparams:
10944
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10945
                                   use_none=True)
10946
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10947
      be_new = cluster.SimpleFillBE(i_bedict)
10948
      self.be_new = be_new # the new actual values
10949
      self.be_inst = i_bedict # the new dict (without defaults)
10950
    else:
10951
      self.be_new = self.be_inst = {}
10952
    be_old = cluster.FillBE(instance)
10953

    
10954
    # osparams processing
10955
    if self.op.osparams:
10956
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10957
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10958
      self.os_inst = i_osdict # the new dict (without defaults)
10959
    else:
10960
      self.os_inst = {}
10961

    
10962
    self.warn = []
10963

    
10964
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10965
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10966
      mem_check_list = [pnode]
10967
      if be_new[constants.BE_AUTO_BALANCE]:
10968
        # either we changed auto_balance to yes or it was from before
10969
        mem_check_list.extend(instance.secondary_nodes)
10970
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10971
                                                  instance.hypervisor)
10972
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10973
                                         instance.hypervisor)
10974
      pninfo = nodeinfo[pnode]
10975
      msg = pninfo.fail_msg
10976
      if msg:
10977
        # Assume the primary node is unreachable and go ahead
10978
        self.warn.append("Can't get info from primary node %s: %s" %
10979
                         (pnode, msg))
10980
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
10981
        self.warn.append("Node data from primary node %s doesn't contain"
10982
                         " free memory information" % pnode)
10983
      elif instance_info.fail_msg:
10984
        self.warn.append("Can't get instance runtime information: %s" %
10985
                        instance_info.fail_msg)
10986
      else:
10987
        if instance_info.payload:
10988
          current_mem = int(instance_info.payload["memory"])
10989
        else:
10990
          # Assume instance not running
10991
          # (there is a slight race condition here, but it's not very probable,
10992
          # and we have no other way to check)
10993
          current_mem = 0
10994
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10995
                    pninfo.payload["memory_free"])
10996
        if miss_mem > 0:
10997
          raise errors.OpPrereqError("This change will prevent the instance"
10998
                                     " from starting, due to %d MB of memory"
10999
                                     " missing on its primary node" % miss_mem,
11000
                                     errors.ECODE_NORES)
11001

    
11002
      if be_new[constants.BE_AUTO_BALANCE]:
11003
        for node, nres in nodeinfo.items():
11004
          if node not in instance.secondary_nodes:
11005
            continue
11006
          nres.Raise("Can't get info from secondary node %s" % node,
11007
                     prereq=True, ecode=errors.ECODE_STATE)
11008
          if not isinstance(nres.payload.get("memory_free", None), int):
11009
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11010
                                       " memory information" % node,
11011
                                       errors.ECODE_STATE)
11012
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11013
            raise errors.OpPrereqError("This change will prevent the instance"
11014
                                       " from failover to its secondary node"
11015
                                       " %s, due to not enough memory" % node,
11016
                                       errors.ECODE_STATE)
11017

    
11018
    # NIC processing
11019
    self.nic_pnew = {}
11020
    self.nic_pinst = {}
11021
    for nic_op, nic_dict in self.op.nics:
11022
      if nic_op == constants.DDM_REMOVE:
11023
        if not instance.nics:
11024
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11025
                                     errors.ECODE_INVAL)
11026
        continue
11027
      if nic_op != constants.DDM_ADD:
11028
        # an existing nic
11029
        if not instance.nics:
11030
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11031
                                     " no NICs" % nic_op,
11032
                                     errors.ECODE_INVAL)
11033
        if nic_op < 0 or nic_op >= len(instance.nics):
11034
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11035
                                     " are 0 to %d" %
11036
                                     (nic_op, len(instance.nics) - 1),
11037
                                     errors.ECODE_INVAL)
11038
        old_nic_params = instance.nics[nic_op].nicparams
11039
        old_nic_ip = instance.nics[nic_op].ip
11040
      else:
11041
        old_nic_params = {}
11042
        old_nic_ip = None
11043

    
11044
      update_params_dict = dict([(key, nic_dict[key])
11045
                                 for key in constants.NICS_PARAMETERS
11046
                                 if key in nic_dict])
11047

    
11048
      if "bridge" in nic_dict:
11049
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11050

    
11051
      new_nic_params = _GetUpdatedParams(old_nic_params,
11052
                                         update_params_dict)
11053
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11054
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11055
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11056
      self.nic_pinst[nic_op] = new_nic_params
11057
      self.nic_pnew[nic_op] = new_filled_nic_params
11058
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11059

    
11060
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11061
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11062
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11063
        if msg:
11064
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11065
          if self.op.force:
11066
            self.warn.append(msg)
11067
          else:
11068
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11069
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11070
        if constants.INIC_IP in nic_dict:
11071
          nic_ip = nic_dict[constants.INIC_IP]
11072
        else:
11073
          nic_ip = old_nic_ip
11074
        if nic_ip is None:
11075
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11076
                                     " on a routed nic", errors.ECODE_INVAL)
11077
      if constants.INIC_MAC in nic_dict:
11078
        nic_mac = nic_dict[constants.INIC_MAC]
11079
        if nic_mac is None:
11080
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11081
                                     errors.ECODE_INVAL)
11082
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11083
          # otherwise generate the mac
11084
          nic_dict[constants.INIC_MAC] = \
11085
            self.cfg.GenerateMAC(self.proc.GetECId())
11086
        else:
11087
          # or validate/reserve the current one
11088
          try:
11089
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11090
          except errors.ReservationError:
11091
            raise errors.OpPrereqError("MAC address %s already in use"
11092
                                       " in cluster" % nic_mac,
11093
                                       errors.ECODE_NOTUNIQUE)
11094

    
11095
    # DISK processing
11096
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11097
      raise errors.OpPrereqError("Disk operations not supported for"
11098
                                 " diskless instances",
11099
                                 errors.ECODE_INVAL)
11100
    for disk_op, _ in self.op.disks:
11101
      if disk_op == constants.DDM_REMOVE:
11102
        if len(instance.disks) == 1:
11103
          raise errors.OpPrereqError("Cannot remove the last disk of"
11104
                                     " an instance", errors.ECODE_INVAL)
11105
        _CheckInstanceDown(self, instance, "cannot remove disks")
11106

    
11107
      if (disk_op == constants.DDM_ADD and
11108
          len(instance.disks) >= constants.MAX_DISKS):
11109
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11110
                                   " add more" % constants.MAX_DISKS,
11111
                                   errors.ECODE_STATE)
11112
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11113
        # an existing disk
11114
        if disk_op < 0 or disk_op >= len(instance.disks):
11115
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11116
                                     " are 0 to %d" %
11117
                                     (disk_op, len(instance.disks)),
11118
                                     errors.ECODE_INVAL)
11119

    
11120
    return
11121

    
11122
  def _ConvertPlainToDrbd(self, feedback_fn):
11123
    """Converts an instance from plain to drbd.
11124

11125
    """
11126
    feedback_fn("Converting template to drbd")
11127
    instance = self.instance
11128
    pnode = instance.primary_node
11129
    snode = self.op.remote_node
11130

    
11131
    # create a fake disk info for _GenerateDiskTemplate
11132
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11133
                  constants.IDISK_VG: d.logical_id[0]}
11134
                 for d in instance.disks]
11135
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11136
                                      instance.name, pnode, [snode],
11137
                                      disk_info, None, None, 0, feedback_fn)
11138
    info = _GetInstanceInfoText(instance)
11139
    feedback_fn("Creating aditional volumes...")
11140
    # first, create the missing data and meta devices
11141
    for disk in new_disks:
11142
      # unfortunately this is... not too nice
11143
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11144
                            info, True)
11145
      for child in disk.children:
11146
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11147
    # at this stage, all new LVs have been created, we can rename the
11148
    # old ones
11149
    feedback_fn("Renaming original volumes...")
11150
    rename_list = [(o, n.children[0].logical_id)
11151
                   for (o, n) in zip(instance.disks, new_disks)]
11152
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11153
    result.Raise("Failed to rename original LVs")
11154

    
11155
    feedback_fn("Initializing DRBD devices...")
11156
    # all child devices are in place, we can now create the DRBD devices
11157
    for disk in new_disks:
11158
      for node in [pnode, snode]:
11159
        f_create = node == pnode
11160
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11161

    
11162
    # at this point, the instance has been modified
11163
    instance.disk_template = constants.DT_DRBD8
11164
    instance.disks = new_disks
11165
    self.cfg.Update(instance, feedback_fn)
11166

    
11167
    # disks are created, waiting for sync
11168
    disk_abort = not _WaitForSync(self, instance,
11169
                                  oneshot=not self.op.wait_for_sync)
11170
    if disk_abort:
11171
      raise errors.OpExecError("There are some degraded disks for"
11172
                               " this instance, please cleanup manually")
11173

    
11174
  def _ConvertDrbdToPlain(self, feedback_fn):
11175
    """Converts an instance from drbd to plain.
11176

11177
    """
11178
    instance = self.instance
11179
    assert len(instance.secondary_nodes) == 1
11180
    pnode = instance.primary_node
11181
    snode = instance.secondary_nodes[0]
11182
    feedback_fn("Converting template to plain")
11183

    
11184
    old_disks = instance.disks
11185
    new_disks = [d.children[0] for d in old_disks]
11186

    
11187
    # copy over size and mode
11188
    for parent, child in zip(old_disks, new_disks):
11189
      child.size = parent.size
11190
      child.mode = parent.mode
11191

    
11192
    # update instance structure
11193
    instance.disks = new_disks
11194
    instance.disk_template = constants.DT_PLAIN
11195
    self.cfg.Update(instance, feedback_fn)
11196

    
11197
    feedback_fn("Removing volumes on the secondary node...")
11198
    for disk in old_disks:
11199
      self.cfg.SetDiskID(disk, snode)
11200
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11201
      if msg:
11202
        self.LogWarning("Could not remove block device %s on node %s,"
11203
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11204

    
11205
    feedback_fn("Removing unneeded volumes on the primary node...")
11206
    for idx, disk in enumerate(old_disks):
11207
      meta = disk.children[1]
11208
      self.cfg.SetDiskID(meta, pnode)
11209
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11210
      if msg:
11211
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11212
                        " continuing anyway: %s", idx, pnode, msg)
11213

    
11214
    # this is a DRBD disk, return its port to the pool
11215
    for disk in old_disks:
11216
      tcp_port = disk.logical_id[2]
11217
      self.cfg.AddTcpUdpPort(tcp_port)
11218

    
11219
  def Exec(self, feedback_fn):
11220
    """Modifies an instance.
11221

11222
    All parameters take effect only at the next restart of the instance.
11223

11224
    """
11225
    # Process here the warnings from CheckPrereq, as we don't have a
11226
    # feedback_fn there.
11227
    for warn in self.warn:
11228
      feedback_fn("WARNING: %s" % warn)
11229

    
11230
    result = []
11231
    instance = self.instance
11232
    # disk changes
11233
    for disk_op, disk_dict in self.op.disks:
11234
      if disk_op == constants.DDM_REMOVE:
11235
        # remove the last disk
11236
        device = instance.disks.pop()
11237
        device_idx = len(instance.disks)
11238
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11239
          self.cfg.SetDiskID(disk, node)
11240
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11241
          if msg:
11242
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11243
                            " continuing anyway", device_idx, node, msg)
11244
        result.append(("disk/%d" % device_idx, "remove"))
11245

    
11246
        # if this is a DRBD disk, return its port to the pool
11247
        if device.dev_type in constants.LDS_DRBD:
11248
          tcp_port = device.logical_id[2]
11249
          self.cfg.AddTcpUdpPort(tcp_port)
11250
      elif disk_op == constants.DDM_ADD:
11251
        # add a new disk
11252
        if instance.disk_template in (constants.DT_FILE,
11253
                                        constants.DT_SHARED_FILE):
11254
          file_driver, file_path = instance.disks[0].logical_id
11255
          file_path = os.path.dirname(file_path)
11256
        else:
11257
          file_driver = file_path = None
11258
        disk_idx_base = len(instance.disks)
11259
        new_disk = _GenerateDiskTemplate(self,
11260
                                         instance.disk_template,
11261
                                         instance.name, instance.primary_node,
11262
                                         instance.secondary_nodes,
11263
                                         [disk_dict],
11264
                                         file_path,
11265
                                         file_driver,
11266
                                         disk_idx_base, feedback_fn)[0]
11267
        instance.disks.append(new_disk)
11268
        info = _GetInstanceInfoText(instance)
11269

    
11270
        logging.info("Creating volume %s for instance %s",
11271
                     new_disk.iv_name, instance.name)
11272
        # Note: this needs to be kept in sync with _CreateDisks
11273
        #HARDCODE
11274
        for node in instance.all_nodes:
11275
          f_create = node == instance.primary_node
11276
          try:
11277
            _CreateBlockDev(self, node, instance, new_disk,
11278
                            f_create, info, f_create)
11279
          except errors.OpExecError, err:
11280
            self.LogWarning("Failed to create volume %s (%s) on"
11281
                            " node %s: %s",
11282
                            new_disk.iv_name, new_disk, node, err)
11283
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11284
                       (new_disk.size, new_disk.mode)))
11285
      else:
11286
        # change a given disk
11287
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11288
        result.append(("disk.mode/%d" % disk_op,
11289
                       disk_dict[constants.IDISK_MODE]))
11290

    
11291
    if self.op.disk_template:
11292
      r_shut = _ShutdownInstanceDisks(self, instance)
11293
      if not r_shut:
11294
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11295
                                 " proceed with disk template conversion")
11296
      mode = (instance.disk_template, self.op.disk_template)
11297
      try:
11298
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11299
      except:
11300
        self.cfg.ReleaseDRBDMinors(instance.name)
11301
        raise
11302
      result.append(("disk_template", self.op.disk_template))
11303

    
11304
    # NIC changes
11305
    for nic_op, nic_dict in self.op.nics:
11306
      if nic_op == constants.DDM_REMOVE:
11307
        # remove the last nic
11308
        del instance.nics[-1]
11309
        result.append(("nic.%d" % len(instance.nics), "remove"))
11310
      elif nic_op == constants.DDM_ADD:
11311
        # mac and bridge should be set, by now
11312
        mac = nic_dict[constants.INIC_MAC]
11313
        ip = nic_dict.get(constants.INIC_IP, None)
11314
        nicparams = self.nic_pinst[constants.DDM_ADD]
11315
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11316
        instance.nics.append(new_nic)
11317
        result.append(("nic.%d" % (len(instance.nics) - 1),
11318
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11319
                       (new_nic.mac, new_nic.ip,
11320
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11321
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11322
                       )))
11323
      else:
11324
        for key in (constants.INIC_MAC, constants.INIC_IP):
11325
          if key in nic_dict:
11326
            setattr(instance.nics[nic_op], key, nic_dict[key])
11327
        if nic_op in self.nic_pinst:
11328
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11329
        for key, val in nic_dict.iteritems():
11330
          result.append(("nic.%s/%d" % (key, nic_op), val))
11331

    
11332
    # hvparams changes
11333
    if self.op.hvparams:
11334
      instance.hvparams = self.hv_inst
11335
      for key, val in self.op.hvparams.iteritems():
11336
        result.append(("hv/%s" % key, val))
11337

    
11338
    # beparams changes
11339
    if self.op.beparams:
11340
      instance.beparams = self.be_inst
11341
      for key, val in self.op.beparams.iteritems():
11342
        result.append(("be/%s" % key, val))
11343

    
11344
    # OS change
11345
    if self.op.os_name:
11346
      instance.os = self.op.os_name
11347

    
11348
    # osparams changes
11349
    if self.op.osparams:
11350
      instance.osparams = self.os_inst
11351
      for key, val in self.op.osparams.iteritems():
11352
        result.append(("os/%s" % key, val))
11353

    
11354
    self.cfg.Update(instance, feedback_fn)
11355

    
11356
    return result
11357

    
11358
  _DISK_CONVERSIONS = {
11359
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11360
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11361
    }
11362

    
11363

    
11364
class LUInstanceChangeGroup(LogicalUnit):
11365
  HPATH = "instance-change-group"
11366
  HTYPE = constants.HTYPE_INSTANCE
11367
  REQ_BGL = False
11368

    
11369
  def ExpandNames(self):
11370
    self.share_locks = _ShareAll()
11371
    self.needed_locks = {
11372
      locking.LEVEL_NODEGROUP: [],
11373
      locking.LEVEL_NODE: [],
11374
      }
11375

    
11376
    self._ExpandAndLockInstance()
11377

    
11378
    if self.op.target_groups:
11379
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11380
                                  self.op.target_groups)
11381
    else:
11382
      self.req_target_uuids = None
11383

    
11384
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11385

    
11386
  def DeclareLocks(self, level):
11387
    if level == locking.LEVEL_NODEGROUP:
11388
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11389

    
11390
      if self.req_target_uuids:
11391
        lock_groups = set(self.req_target_uuids)
11392

    
11393
        # Lock all groups used by instance optimistically; this requires going
11394
        # via the node before it's locked, requiring verification later on
11395
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11396
        lock_groups.update(instance_groups)
11397
      else:
11398
        # No target groups, need to lock all of them
11399
        lock_groups = locking.ALL_SET
11400

    
11401
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11402

    
11403
    elif level == locking.LEVEL_NODE:
11404
      if self.req_target_uuids:
11405
        # Lock all nodes used by instances
11406
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11407
        self._LockInstancesNodes()
11408

    
11409
        # Lock all nodes in all potential target groups
11410
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11411
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11412
        member_nodes = [node_name
11413
                        for group in lock_groups
11414
                        for node_name in self.cfg.GetNodeGroup(group).members]
11415
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11416
      else:
11417
        # Lock all nodes as all groups are potential targets
11418
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11419

    
11420
  def CheckPrereq(self):
11421
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11422
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11423
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11424

    
11425
    assert (self.req_target_uuids is None or
11426
            owned_groups.issuperset(self.req_target_uuids))
11427
    assert owned_instances == set([self.op.instance_name])
11428

    
11429
    # Get instance information
11430
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11431

    
11432
    # Check if node groups for locked instance are still correct
11433
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11434
      ("Instance %s's nodes changed while we kept the lock" %
11435
       self.op.instance_name)
11436

    
11437
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11438
                                           owned_groups)
11439

    
11440
    if self.req_target_uuids:
11441
      # User requested specific target groups
11442
      self.target_uuids = self.req_target_uuids
11443
    else:
11444
      # All groups except those used by the instance are potential targets
11445
      self.target_uuids = owned_groups - inst_groups
11446

    
11447
    conflicting_groups = self.target_uuids & inst_groups
11448
    if conflicting_groups:
11449
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11450
                                 " used by the instance '%s'" %
11451
                                 (utils.CommaJoin(conflicting_groups),
11452
                                  self.op.instance_name),
11453
                                 errors.ECODE_INVAL)
11454

    
11455
    if not self.target_uuids:
11456
      raise errors.OpPrereqError("There are no possible target groups",
11457
                                 errors.ECODE_INVAL)
11458

    
11459
  def BuildHooksEnv(self):
11460
    """Build hooks env.
11461

11462
    """
11463
    assert self.target_uuids
11464

    
11465
    env = {
11466
      "TARGET_GROUPS": " ".join(self.target_uuids),
11467
      }
11468

    
11469
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11470

    
11471
    return env
11472

    
11473
  def BuildHooksNodes(self):
11474
    """Build hooks nodes.
11475

11476
    """
11477
    mn = self.cfg.GetMasterNode()
11478
    return ([mn], [mn])
11479

    
11480
  def Exec(self, feedback_fn):
11481
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11482

    
11483
    assert instances == [self.op.instance_name], "Instance not locked"
11484

    
11485
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11486
                     instances=instances, target_groups=list(self.target_uuids))
11487

    
11488
    ial.Run(self.op.iallocator)
11489

    
11490
    if not ial.success:
11491
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11492
                                 " instance '%s' using iallocator '%s': %s" %
11493
                                 (self.op.instance_name, self.op.iallocator,
11494
                                  ial.info),
11495
                                 errors.ECODE_NORES)
11496

    
11497
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11498

    
11499
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11500
                 " instance '%s'", len(jobs), self.op.instance_name)
11501

    
11502
    return ResultWithJobs(jobs)
11503

    
11504

    
11505
class LUBackupQuery(NoHooksLU):
11506
  """Query the exports list
11507

11508
  """
11509
  REQ_BGL = False
11510

    
11511
  def ExpandNames(self):
11512
    self.needed_locks = {}
11513
    self.share_locks[locking.LEVEL_NODE] = 1
11514
    if not self.op.nodes:
11515
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11516
    else:
11517
      self.needed_locks[locking.LEVEL_NODE] = \
11518
        _GetWantedNodes(self, self.op.nodes)
11519

    
11520
  def Exec(self, feedback_fn):
11521
    """Compute the list of all the exported system images.
11522

11523
    @rtype: dict
11524
    @return: a dictionary with the structure node->(export-list)
11525
        where export-list is a list of the instances exported on
11526
        that node.
11527

11528
    """
11529
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11530
    rpcresult = self.rpc.call_export_list(self.nodes)
11531
    result = {}
11532
    for node in rpcresult:
11533
      if rpcresult[node].fail_msg:
11534
        result[node] = False
11535
      else:
11536
        result[node] = rpcresult[node].payload
11537

    
11538
    return result
11539

    
11540

    
11541
class LUBackupPrepare(NoHooksLU):
11542
  """Prepares an instance for an export and returns useful information.
11543

11544
  """
11545
  REQ_BGL = False
11546

    
11547
  def ExpandNames(self):
11548
    self._ExpandAndLockInstance()
11549

    
11550
  def CheckPrereq(self):
11551
    """Check prerequisites.
11552

11553
    """
11554
    instance_name = self.op.instance_name
11555

    
11556
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11557
    assert self.instance is not None, \
11558
          "Cannot retrieve locked instance %s" % self.op.instance_name
11559
    _CheckNodeOnline(self, self.instance.primary_node)
11560

    
11561
    self._cds = _GetClusterDomainSecret()
11562

    
11563
  def Exec(self, feedback_fn):
11564
    """Prepares an instance for an export.
11565

11566
    """
11567
    instance = self.instance
11568

    
11569
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11570
      salt = utils.GenerateSecret(8)
11571

    
11572
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11573
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11574
                                              constants.RIE_CERT_VALIDITY)
11575
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11576

    
11577
      (name, cert_pem) = result.payload
11578

    
11579
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11580
                                             cert_pem)
11581

    
11582
      return {
11583
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11584
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11585
                          salt),
11586
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11587
        }
11588

    
11589
    return None
11590

    
11591

    
11592
class LUBackupExport(LogicalUnit):
11593
  """Export an instance to an image in the cluster.
11594

11595
  """
11596
  HPATH = "instance-export"
11597
  HTYPE = constants.HTYPE_INSTANCE
11598
  REQ_BGL = False
11599

    
11600
  def CheckArguments(self):
11601
    """Check the arguments.
11602

11603
    """
11604
    self.x509_key_name = self.op.x509_key_name
11605
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11606

    
11607
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11608
      if not self.x509_key_name:
11609
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11610
                                   errors.ECODE_INVAL)
11611

    
11612
      if not self.dest_x509_ca_pem:
11613
        raise errors.OpPrereqError("Missing destination X509 CA",
11614
                                   errors.ECODE_INVAL)
11615

    
11616
  def ExpandNames(self):
11617
    self._ExpandAndLockInstance()
11618

    
11619
    # Lock all nodes for local exports
11620
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11621
      # FIXME: lock only instance primary and destination node
11622
      #
11623
      # Sad but true, for now we have do lock all nodes, as we don't know where
11624
      # the previous export might be, and in this LU we search for it and
11625
      # remove it from its current node. In the future we could fix this by:
11626
      #  - making a tasklet to search (share-lock all), then create the
11627
      #    new one, then one to remove, after
11628
      #  - removing the removal operation altogether
11629
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11630

    
11631
  def DeclareLocks(self, level):
11632
    """Last minute lock declaration."""
11633
    # All nodes are locked anyway, so nothing to do here.
11634

    
11635
  def BuildHooksEnv(self):
11636
    """Build hooks env.
11637

11638
    This will run on the master, primary node and target node.
11639

11640
    """
11641
    env = {
11642
      "EXPORT_MODE": self.op.mode,
11643
      "EXPORT_NODE": self.op.target_node,
11644
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11645
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11646
      # TODO: Generic function for boolean env variables
11647
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11648
      }
11649

    
11650
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11651

    
11652
    return env
11653

    
11654
  def BuildHooksNodes(self):
11655
    """Build hooks nodes.
11656

11657
    """
11658
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11659

    
11660
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11661
      nl.append(self.op.target_node)
11662

    
11663
    return (nl, nl)
11664

    
11665
  def CheckPrereq(self):
11666
    """Check prerequisites.
11667

11668
    This checks that the instance and node names are valid.
11669

11670
    """
11671
    instance_name = self.op.instance_name
11672

    
11673
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11674
    assert self.instance is not None, \
11675
          "Cannot retrieve locked instance %s" % self.op.instance_name
11676
    _CheckNodeOnline(self, self.instance.primary_node)
11677

    
11678
    if (self.op.remove_instance and self.instance.admin_up and
11679
        not self.op.shutdown):
11680
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11681
                                 " down before")
11682

    
11683
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11684
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11685
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11686
      assert self.dst_node is not None
11687

    
11688
      _CheckNodeOnline(self, self.dst_node.name)
11689
      _CheckNodeNotDrained(self, self.dst_node.name)
11690

    
11691
      self._cds = None
11692
      self.dest_disk_info = None
11693
      self.dest_x509_ca = None
11694

    
11695
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11696
      self.dst_node = None
11697

    
11698
      if len(self.op.target_node) != len(self.instance.disks):
11699
        raise errors.OpPrereqError(("Received destination information for %s"
11700
                                    " disks, but instance %s has %s disks") %
11701
                                   (len(self.op.target_node), instance_name,
11702
                                    len(self.instance.disks)),
11703
                                   errors.ECODE_INVAL)
11704

    
11705
      cds = _GetClusterDomainSecret()
11706

    
11707
      # Check X509 key name
11708
      try:
11709
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11710
      except (TypeError, ValueError), err:
11711
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11712

    
11713
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11714
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11715
                                   errors.ECODE_INVAL)
11716

    
11717
      # Load and verify CA
11718
      try:
11719
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11720
      except OpenSSL.crypto.Error, err:
11721
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11722
                                   (err, ), errors.ECODE_INVAL)
11723

    
11724
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11725
      if errcode is not None:
11726
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11727
                                   (msg, ), errors.ECODE_INVAL)
11728

    
11729
      self.dest_x509_ca = cert
11730

    
11731
      # Verify target information
11732
      disk_info = []
11733
      for idx, disk_data in enumerate(self.op.target_node):
11734
        try:
11735
          (host, port, magic) = \
11736
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11737
        except errors.GenericError, err:
11738
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11739
                                     (idx, err), errors.ECODE_INVAL)
11740

    
11741
        disk_info.append((host, port, magic))
11742

    
11743
      assert len(disk_info) == len(self.op.target_node)
11744
      self.dest_disk_info = disk_info
11745

    
11746
    else:
11747
      raise errors.ProgrammerError("Unhandled export mode %r" %
11748
                                   self.op.mode)
11749

    
11750
    # instance disk type verification
11751
    # TODO: Implement export support for file-based disks
11752
    for disk in self.instance.disks:
11753
      if disk.dev_type == constants.LD_FILE:
11754
        raise errors.OpPrereqError("Export not supported for instances with"
11755
                                   " file-based disks", errors.ECODE_INVAL)
11756

    
11757
  def _CleanupExports(self, feedback_fn):
11758
    """Removes exports of current instance from all other nodes.
11759

11760
    If an instance in a cluster with nodes A..D was exported to node C, its
11761
    exports will be removed from the nodes A, B and D.
11762

11763
    """
11764
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11765

    
11766
    nodelist = self.cfg.GetNodeList()
11767
    nodelist.remove(self.dst_node.name)
11768

    
11769
    # on one-node clusters nodelist will be empty after the removal
11770
    # if we proceed the backup would be removed because OpBackupQuery
11771
    # substitutes an empty list with the full cluster node list.
11772
    iname = self.instance.name
11773
    if nodelist:
11774
      feedback_fn("Removing old exports for instance %s" % iname)
11775
      exportlist = self.rpc.call_export_list(nodelist)
11776
      for node in exportlist:
11777
        if exportlist[node].fail_msg:
11778
          continue
11779
        if iname in exportlist[node].payload:
11780
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11781
          if msg:
11782
            self.LogWarning("Could not remove older export for instance %s"
11783
                            " on node %s: %s", iname, node, msg)
11784

    
11785
  def Exec(self, feedback_fn):
11786
    """Export an instance to an image in the cluster.
11787

11788
    """
11789
    assert self.op.mode in constants.EXPORT_MODES
11790

    
11791
    instance = self.instance
11792
    src_node = instance.primary_node
11793

    
11794
    if self.op.shutdown:
11795
      # shutdown the instance, but not the disks
11796
      feedback_fn("Shutting down instance %s" % instance.name)
11797
      result = self.rpc.call_instance_shutdown(src_node, instance,
11798
                                               self.op.shutdown_timeout)
11799
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11800
      result.Raise("Could not shutdown instance %s on"
11801
                   " node %s" % (instance.name, src_node))
11802

    
11803
    # set the disks ID correctly since call_instance_start needs the
11804
    # correct drbd minor to create the symlinks
11805
    for disk in instance.disks:
11806
      self.cfg.SetDiskID(disk, src_node)
11807

    
11808
    activate_disks = (not instance.admin_up)
11809

    
11810
    if activate_disks:
11811
      # Activate the instance disks if we'exporting a stopped instance
11812
      feedback_fn("Activating disks for %s" % instance.name)
11813
      _StartInstanceDisks(self, instance, None)
11814

    
11815
    try:
11816
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11817
                                                     instance)
11818

    
11819
      helper.CreateSnapshots()
11820
      try:
11821
        if (self.op.shutdown and instance.admin_up and
11822
            not self.op.remove_instance):
11823
          assert not activate_disks
11824
          feedback_fn("Starting instance %s" % instance.name)
11825
          result = self.rpc.call_instance_start(src_node, instance,
11826
                                                None, None, False)
11827
          msg = result.fail_msg
11828
          if msg:
11829
            feedback_fn("Failed to start instance: %s" % msg)
11830
            _ShutdownInstanceDisks(self, instance)
11831
            raise errors.OpExecError("Could not start instance: %s" % msg)
11832

    
11833
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11834
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11835
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11836
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11837
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11838

    
11839
          (key_name, _, _) = self.x509_key_name
11840

    
11841
          dest_ca_pem = \
11842
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11843
                                            self.dest_x509_ca)
11844

    
11845
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11846
                                                     key_name, dest_ca_pem,
11847
                                                     timeouts)
11848
      finally:
11849
        helper.Cleanup()
11850

    
11851
      # Check for backwards compatibility
11852
      assert len(dresults) == len(instance.disks)
11853
      assert compat.all(isinstance(i, bool) for i in dresults), \
11854
             "Not all results are boolean: %r" % dresults
11855

    
11856
    finally:
11857
      if activate_disks:
11858
        feedback_fn("Deactivating disks for %s" % instance.name)
11859
        _ShutdownInstanceDisks(self, instance)
11860

    
11861
    if not (compat.all(dresults) and fin_resu):
11862
      failures = []
11863
      if not fin_resu:
11864
        failures.append("export finalization")
11865
      if not compat.all(dresults):
11866
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11867
                               if not dsk)
11868
        failures.append("disk export: disk(s) %s" % fdsk)
11869

    
11870
      raise errors.OpExecError("Export failed, errors in %s" %
11871
                               utils.CommaJoin(failures))
11872

    
11873
    # At this point, the export was successful, we can cleanup/finish
11874

    
11875
    # Remove instance if requested
11876
    if self.op.remove_instance:
11877
      feedback_fn("Removing instance %s" % instance.name)
11878
      _RemoveInstance(self, feedback_fn, instance,
11879
                      self.op.ignore_remove_failures)
11880

    
11881
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11882
      self._CleanupExports(feedback_fn)
11883

    
11884
    return fin_resu, dresults
11885

    
11886

    
11887
class LUBackupRemove(NoHooksLU):
11888
  """Remove exports related to the named instance.
11889

11890
  """
11891
  REQ_BGL = False
11892

    
11893
  def ExpandNames(self):
11894
    self.needed_locks = {}
11895
    # We need all nodes to be locked in order for RemoveExport to work, but we
11896
    # don't need to lock the instance itself, as nothing will happen to it (and
11897
    # we can remove exports also for a removed instance)
11898
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11899

    
11900
  def Exec(self, feedback_fn):
11901
    """Remove any export.
11902

11903
    """
11904
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11905
    # If the instance was not found we'll try with the name that was passed in.
11906
    # This will only work if it was an FQDN, though.
11907
    fqdn_warn = False
11908
    if not instance_name:
11909
      fqdn_warn = True
11910
      instance_name = self.op.instance_name
11911

    
11912
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11913
    exportlist = self.rpc.call_export_list(locked_nodes)
11914
    found = False
11915
    for node in exportlist:
11916
      msg = exportlist[node].fail_msg
11917
      if msg:
11918
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11919
        continue
11920
      if instance_name in exportlist[node].payload:
11921
        found = True
11922
        result = self.rpc.call_export_remove(node, instance_name)
11923
        msg = result.fail_msg
11924
        if msg:
11925
          logging.error("Could not remove export for instance %s"
11926
                        " on node %s: %s", instance_name, node, msg)
11927

    
11928
    if fqdn_warn and not found:
11929
      feedback_fn("Export not found. If trying to remove an export belonging"
11930
                  " to a deleted instance please use its Fully Qualified"
11931
                  " Domain Name.")
11932

    
11933

    
11934
class LUGroupAdd(LogicalUnit):
11935
  """Logical unit for creating node groups.
11936

11937
  """
11938
  HPATH = "group-add"
11939
  HTYPE = constants.HTYPE_GROUP
11940
  REQ_BGL = False
11941

    
11942
  def ExpandNames(self):
11943
    # We need the new group's UUID here so that we can create and acquire the
11944
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11945
    # that it should not check whether the UUID exists in the configuration.
11946
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11947
    self.needed_locks = {}
11948
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11949

    
11950
  def CheckPrereq(self):
11951
    """Check prerequisites.
11952

11953
    This checks that the given group name is not an existing node group
11954
    already.
11955

11956
    """
11957
    try:
11958
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11959
    except errors.OpPrereqError:
11960
      pass
11961
    else:
11962
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11963
                                 " node group (UUID: %s)" %
11964
                                 (self.op.group_name, existing_uuid),
11965
                                 errors.ECODE_EXISTS)
11966

    
11967
    if self.op.ndparams:
11968
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11969

    
11970
  def BuildHooksEnv(self):
11971
    """Build hooks env.
11972

11973
    """
11974
    return {
11975
      "GROUP_NAME": self.op.group_name,
11976
      }
11977

    
11978
  def BuildHooksNodes(self):
11979
    """Build hooks nodes.
11980

11981
    """
11982
    mn = self.cfg.GetMasterNode()
11983
    return ([mn], [mn])
11984

    
11985
  def Exec(self, feedback_fn):
11986
    """Add the node group to the cluster.
11987

11988
    """
11989
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11990
                                  uuid=self.group_uuid,
11991
                                  alloc_policy=self.op.alloc_policy,
11992
                                  ndparams=self.op.ndparams)
11993

    
11994
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11995
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11996

    
11997

    
11998
class LUGroupAssignNodes(NoHooksLU):
11999
  """Logical unit for assigning nodes to groups.
12000

12001
  """
12002
  REQ_BGL = False
12003

    
12004
  def ExpandNames(self):
12005
    # These raise errors.OpPrereqError on their own:
12006
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12007
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12008

    
12009
    # We want to lock all the affected nodes and groups. We have readily
12010
    # available the list of nodes, and the *destination* group. To gather the
12011
    # list of "source" groups, we need to fetch node information later on.
12012
    self.needed_locks = {
12013
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12014
      locking.LEVEL_NODE: self.op.nodes,
12015
      }
12016

    
12017
  def DeclareLocks(self, level):
12018
    if level == locking.LEVEL_NODEGROUP:
12019
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12020

    
12021
      # Try to get all affected nodes' groups without having the group or node
12022
      # lock yet. Needs verification later in the code flow.
12023
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12024

    
12025
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12026

    
12027
  def CheckPrereq(self):
12028
    """Check prerequisites.
12029

12030
    """
12031
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12032
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12033
            frozenset(self.op.nodes))
12034

    
12035
    expected_locks = (set([self.group_uuid]) |
12036
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12037
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12038
    if actual_locks != expected_locks:
12039
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12040
                               " current groups are '%s', used to be '%s'" %
12041
                               (utils.CommaJoin(expected_locks),
12042
                                utils.CommaJoin(actual_locks)))
12043

    
12044
    self.node_data = self.cfg.GetAllNodesInfo()
12045
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12046
    instance_data = self.cfg.GetAllInstancesInfo()
12047

    
12048
    if self.group is None:
12049
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12050
                               (self.op.group_name, self.group_uuid))
12051

    
12052
    (new_splits, previous_splits) = \
12053
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12054
                                             for node in self.op.nodes],
12055
                                            self.node_data, instance_data)
12056

    
12057
    if new_splits:
12058
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12059

    
12060
      if not self.op.force:
12061
        raise errors.OpExecError("The following instances get split by this"
12062
                                 " change and --force was not given: %s" %
12063
                                 fmt_new_splits)
12064
      else:
12065
        self.LogWarning("This operation will split the following instances: %s",
12066
                        fmt_new_splits)
12067

    
12068
        if previous_splits:
12069
          self.LogWarning("In addition, these already-split instances continue"
12070
                          " to be split across groups: %s",
12071
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12072

    
12073
  def Exec(self, feedback_fn):
12074
    """Assign nodes to a new group.
12075

12076
    """
12077
    mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12078

    
12079
    self.cfg.AssignGroupNodes(mods)
12080

    
12081
  @staticmethod
12082
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12083
    """Check for split instances after a node assignment.
12084

12085
    This method considers a series of node assignments as an atomic operation,
12086
    and returns information about split instances after applying the set of
12087
    changes.
12088

12089
    In particular, it returns information about newly split instances, and
12090
    instances that were already split, and remain so after the change.
12091

12092
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12093
    considered.
12094

12095
    @type changes: list of (node_name, new_group_uuid) pairs.
12096
    @param changes: list of node assignments to consider.
12097
    @param node_data: a dict with data for all nodes
12098
    @param instance_data: a dict with all instances to consider
12099
    @rtype: a two-tuple
12100
    @return: a list of instances that were previously okay and result split as a
12101
      consequence of this change, and a list of instances that were previously
12102
      split and this change does not fix.
12103

12104
    """
12105
    changed_nodes = dict((node, group) for node, group in changes
12106
                         if node_data[node].group != group)
12107

    
12108
    all_split_instances = set()
12109
    previously_split_instances = set()
12110

    
12111
    def InstanceNodes(instance):
12112
      return [instance.primary_node] + list(instance.secondary_nodes)
12113

    
12114
    for inst in instance_data.values():
12115
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12116
        continue
12117

    
12118
      instance_nodes = InstanceNodes(inst)
12119

    
12120
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12121
        previously_split_instances.add(inst.name)
12122

    
12123
      if len(set(changed_nodes.get(node, node_data[node].group)
12124
                 for node in instance_nodes)) > 1:
12125
        all_split_instances.add(inst.name)
12126

    
12127
    return (list(all_split_instances - previously_split_instances),
12128
            list(previously_split_instances & all_split_instances))
12129

    
12130

    
12131
class _GroupQuery(_QueryBase):
12132
  FIELDS = query.GROUP_FIELDS
12133

    
12134
  def ExpandNames(self, lu):
12135
    lu.needed_locks = {}
12136

    
12137
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12138
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12139

    
12140
    if not self.names:
12141
      self.wanted = [name_to_uuid[name]
12142
                     for name in utils.NiceSort(name_to_uuid.keys())]
12143
    else:
12144
      # Accept names to be either names or UUIDs.
12145
      missing = []
12146
      self.wanted = []
12147
      all_uuid = frozenset(self._all_groups.keys())
12148

    
12149
      for name in self.names:
12150
        if name in all_uuid:
12151
          self.wanted.append(name)
12152
        elif name in name_to_uuid:
12153
          self.wanted.append(name_to_uuid[name])
12154
        else:
12155
          missing.append(name)
12156

    
12157
      if missing:
12158
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12159
                                   utils.CommaJoin(missing),
12160
                                   errors.ECODE_NOENT)
12161

    
12162
  def DeclareLocks(self, lu, level):
12163
    pass
12164

    
12165
  def _GetQueryData(self, lu):
12166
    """Computes the list of node groups and their attributes.
12167

12168
    """
12169
    do_nodes = query.GQ_NODE in self.requested_data
12170
    do_instances = query.GQ_INST in self.requested_data
12171

    
12172
    group_to_nodes = None
12173
    group_to_instances = None
12174

    
12175
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12176
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12177
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12178
    # instance->node. Hence, we will need to process nodes even if we only need
12179
    # instance information.
12180
    if do_nodes or do_instances:
12181
      all_nodes = lu.cfg.GetAllNodesInfo()
12182
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12183
      node_to_group = {}
12184

    
12185
      for node in all_nodes.values():
12186
        if node.group in group_to_nodes:
12187
          group_to_nodes[node.group].append(node.name)
12188
          node_to_group[node.name] = node.group
12189

    
12190
      if do_instances:
12191
        all_instances = lu.cfg.GetAllInstancesInfo()
12192
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12193

    
12194
        for instance in all_instances.values():
12195
          node = instance.primary_node
12196
          if node in node_to_group:
12197
            group_to_instances[node_to_group[node]].append(instance.name)
12198

    
12199
        if not do_nodes:
12200
          # Do not pass on node information if it was not requested.
12201
          group_to_nodes = None
12202

    
12203
    return query.GroupQueryData([self._all_groups[uuid]
12204
                                 for uuid in self.wanted],
12205
                                group_to_nodes, group_to_instances)
12206

    
12207

    
12208
class LUGroupQuery(NoHooksLU):
12209
  """Logical unit for querying node groups.
12210

12211
  """
12212
  REQ_BGL = False
12213

    
12214
  def CheckArguments(self):
12215
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12216
                          self.op.output_fields, False)
12217

    
12218
  def ExpandNames(self):
12219
    self.gq.ExpandNames(self)
12220

    
12221
  def DeclareLocks(self, level):
12222
    self.gq.DeclareLocks(self, level)
12223

    
12224
  def Exec(self, feedback_fn):
12225
    return self.gq.OldStyleQuery(self)
12226

    
12227

    
12228
class LUGroupSetParams(LogicalUnit):
12229
  """Modifies the parameters of a node group.
12230

12231
  """
12232
  HPATH = "group-modify"
12233
  HTYPE = constants.HTYPE_GROUP
12234
  REQ_BGL = False
12235

    
12236
  def CheckArguments(self):
12237
    all_changes = [
12238
      self.op.ndparams,
12239
      self.op.alloc_policy,
12240
      ]
12241

    
12242
    if all_changes.count(None) == len(all_changes):
12243
      raise errors.OpPrereqError("Please pass at least one modification",
12244
                                 errors.ECODE_INVAL)
12245

    
12246
  def ExpandNames(self):
12247
    # This raises errors.OpPrereqError on its own:
12248
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12249

    
12250
    self.needed_locks = {
12251
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12252
      }
12253

    
12254
  def CheckPrereq(self):
12255
    """Check prerequisites.
12256

12257
    """
12258
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12259

    
12260
    if self.group is None:
12261
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12262
                               (self.op.group_name, self.group_uuid))
12263

    
12264
    if self.op.ndparams:
12265
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12266
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12267
      self.new_ndparams = new_ndparams
12268

    
12269
  def BuildHooksEnv(self):
12270
    """Build hooks env.
12271

12272
    """
12273
    return {
12274
      "GROUP_NAME": self.op.group_name,
12275
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12276
      }
12277

    
12278
  def BuildHooksNodes(self):
12279
    """Build hooks nodes.
12280

12281
    """
12282
    mn = self.cfg.GetMasterNode()
12283
    return ([mn], [mn])
12284

    
12285
  def Exec(self, feedback_fn):
12286
    """Modifies the node group.
12287

12288
    """
12289
    result = []
12290

    
12291
    if self.op.ndparams:
12292
      self.group.ndparams = self.new_ndparams
12293
      result.append(("ndparams", str(self.group.ndparams)))
12294

    
12295
    if self.op.alloc_policy:
12296
      self.group.alloc_policy = self.op.alloc_policy
12297

    
12298
    self.cfg.Update(self.group, feedback_fn)
12299
    return result
12300

    
12301

    
12302
class LUGroupRemove(LogicalUnit):
12303
  HPATH = "group-remove"
12304
  HTYPE = constants.HTYPE_GROUP
12305
  REQ_BGL = False
12306

    
12307
  def ExpandNames(self):
12308
    # This will raises errors.OpPrereqError on its own:
12309
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12310
    self.needed_locks = {
12311
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12312
      }
12313

    
12314
  def CheckPrereq(self):
12315
    """Check prerequisites.
12316

12317
    This checks that the given group name exists as a node group, that is
12318
    empty (i.e., contains no nodes), and that is not the last group of the
12319
    cluster.
12320

12321
    """
12322
    # Verify that the group is empty.
12323
    group_nodes = [node.name
12324
                   for node in self.cfg.GetAllNodesInfo().values()
12325
                   if node.group == self.group_uuid]
12326

    
12327
    if group_nodes:
12328
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12329
                                 " nodes: %s" %
12330
                                 (self.op.group_name,
12331
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12332
                                 errors.ECODE_STATE)
12333

    
12334
    # Verify the cluster would not be left group-less.
12335
    if len(self.cfg.GetNodeGroupList()) == 1:
12336
      raise errors.OpPrereqError("Group '%s' is the only group,"
12337
                                 " cannot be removed" %
12338
                                 self.op.group_name,
12339
                                 errors.ECODE_STATE)
12340

    
12341
  def BuildHooksEnv(self):
12342
    """Build hooks env.
12343

12344
    """
12345
    return {
12346
      "GROUP_NAME": self.op.group_name,
12347
      }
12348

    
12349
  def BuildHooksNodes(self):
12350
    """Build hooks nodes.
12351

12352
    """
12353
    mn = self.cfg.GetMasterNode()
12354
    return ([mn], [mn])
12355

    
12356
  def Exec(self, feedback_fn):
12357
    """Remove the node group.
12358

12359
    """
12360
    try:
12361
      self.cfg.RemoveNodeGroup(self.group_uuid)
12362
    except errors.ConfigurationError:
12363
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12364
                               (self.op.group_name, self.group_uuid))
12365

    
12366
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12367

    
12368

    
12369
class LUGroupRename(LogicalUnit):
12370
  HPATH = "group-rename"
12371
  HTYPE = constants.HTYPE_GROUP
12372
  REQ_BGL = False
12373

    
12374
  def ExpandNames(self):
12375
    # This raises errors.OpPrereqError on its own:
12376
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12377

    
12378
    self.needed_locks = {
12379
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12380
      }
12381

    
12382
  def CheckPrereq(self):
12383
    """Check prerequisites.
12384

12385
    Ensures requested new name is not yet used.
12386

12387
    """
12388
    try:
12389
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12390
    except errors.OpPrereqError:
12391
      pass
12392
    else:
12393
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12394
                                 " node group (UUID: %s)" %
12395
                                 (self.op.new_name, new_name_uuid),
12396
                                 errors.ECODE_EXISTS)
12397

    
12398
  def BuildHooksEnv(self):
12399
    """Build hooks env.
12400

12401
    """
12402
    return {
12403
      "OLD_NAME": self.op.group_name,
12404
      "NEW_NAME": self.op.new_name,
12405
      }
12406

    
12407
  def BuildHooksNodes(self):
12408
    """Build hooks nodes.
12409

12410
    """
12411
    mn = self.cfg.GetMasterNode()
12412

    
12413
    all_nodes = self.cfg.GetAllNodesInfo()
12414
    all_nodes.pop(mn, None)
12415

    
12416
    run_nodes = [mn]
12417
    run_nodes.extend(node.name for node in all_nodes.values()
12418
                     if node.group == self.group_uuid)
12419

    
12420
    return (run_nodes, run_nodes)
12421

    
12422
  def Exec(self, feedback_fn):
12423
    """Rename the node group.
12424

12425
    """
12426
    group = self.cfg.GetNodeGroup(self.group_uuid)
12427

    
12428
    if group is None:
12429
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12430
                               (self.op.group_name, self.group_uuid))
12431

    
12432
    group.name = self.op.new_name
12433
    self.cfg.Update(group, feedback_fn)
12434

    
12435
    return self.op.new_name
12436

    
12437

    
12438
class LUGroupEvacuate(LogicalUnit):
12439
  HPATH = "group-evacuate"
12440
  HTYPE = constants.HTYPE_GROUP
12441
  REQ_BGL = False
12442

    
12443
  def ExpandNames(self):
12444
    # This raises errors.OpPrereqError on its own:
12445
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12446

    
12447
    if self.op.target_groups:
12448
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12449
                                  self.op.target_groups)
12450
    else:
12451
      self.req_target_uuids = []
12452

    
12453
    if self.group_uuid in self.req_target_uuids:
12454
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12455
                                 " as a target group (targets are %s)" %
12456
                                 (self.group_uuid,
12457
                                  utils.CommaJoin(self.req_target_uuids)),
12458
                                 errors.ECODE_INVAL)
12459

    
12460
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12461

    
12462
    self.share_locks = _ShareAll()
12463
    self.needed_locks = {
12464
      locking.LEVEL_INSTANCE: [],
12465
      locking.LEVEL_NODEGROUP: [],
12466
      locking.LEVEL_NODE: [],
12467
      }
12468

    
12469
  def DeclareLocks(self, level):
12470
    if level == locking.LEVEL_INSTANCE:
12471
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12472

    
12473
      # Lock instances optimistically, needs verification once node and group
12474
      # locks have been acquired
12475
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12476
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12477

    
12478
    elif level == locking.LEVEL_NODEGROUP:
12479
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12480

    
12481
      if self.req_target_uuids:
12482
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12483

    
12484
        # Lock all groups used by instances optimistically; this requires going
12485
        # via the node before it's locked, requiring verification later on
12486
        lock_groups.update(group_uuid
12487
                           for instance_name in
12488
                             self.owned_locks(locking.LEVEL_INSTANCE)
12489
                           for group_uuid in
12490
                             self.cfg.GetInstanceNodeGroups(instance_name))
12491
      else:
12492
        # No target groups, need to lock all of them
12493
        lock_groups = locking.ALL_SET
12494

    
12495
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12496

    
12497
    elif level == locking.LEVEL_NODE:
12498
      # This will only lock the nodes in the group to be evacuated which
12499
      # contain actual instances
12500
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12501
      self._LockInstancesNodes()
12502

    
12503
      # Lock all nodes in group to be evacuated and target groups
12504
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12505
      assert self.group_uuid in owned_groups
12506
      member_nodes = [node_name
12507
                      for group in owned_groups
12508
                      for node_name in self.cfg.GetNodeGroup(group).members]
12509
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12510

    
12511
  def CheckPrereq(self):
12512
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12513
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12514
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12515

    
12516
    assert owned_groups.issuperset(self.req_target_uuids)
12517
    assert self.group_uuid in owned_groups
12518

    
12519
    # Check if locked instances are still correct
12520
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12521

    
12522
    # Get instance information
12523
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12524

    
12525
    # Check if node groups for locked instances are still correct
12526
    for instance_name in owned_instances:
12527
      inst = self.instances[instance_name]
12528
      assert owned_nodes.issuperset(inst.all_nodes), \
12529
        "Instance %s's nodes changed while we kept the lock" % instance_name
12530

    
12531
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12532
                                             owned_groups)
12533

    
12534
      assert self.group_uuid in inst_groups, \
12535
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12536

    
12537
    if self.req_target_uuids:
12538
      # User requested specific target groups
12539
      self.target_uuids = self.req_target_uuids
12540
    else:
12541
      # All groups except the one to be evacuated are potential targets
12542
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12543
                           if group_uuid != self.group_uuid]
12544

    
12545
      if not self.target_uuids:
12546
        raise errors.OpPrereqError("There are no possible target groups",
12547
                                   errors.ECODE_INVAL)
12548

    
12549
  def BuildHooksEnv(self):
12550
    """Build hooks env.
12551

12552
    """
12553
    return {
12554
      "GROUP_NAME": self.op.group_name,
12555
      "TARGET_GROUPS": " ".join(self.target_uuids),
12556
      }
12557

    
12558
  def BuildHooksNodes(self):
12559
    """Build hooks nodes.
12560

12561
    """
12562
    mn = self.cfg.GetMasterNode()
12563

    
12564
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12565

    
12566
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12567

    
12568
    return (run_nodes, run_nodes)
12569

    
12570
  def Exec(self, feedback_fn):
12571
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12572

    
12573
    assert self.group_uuid not in self.target_uuids
12574

    
12575
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12576
                     instances=instances, target_groups=self.target_uuids)
12577

    
12578
    ial.Run(self.op.iallocator)
12579

    
12580
    if not ial.success:
12581
      raise errors.OpPrereqError("Can't compute group evacuation using"
12582
                                 " iallocator '%s': %s" %
12583
                                 (self.op.iallocator, ial.info),
12584
                                 errors.ECODE_NORES)
12585

    
12586
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12587

    
12588
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12589
                 len(jobs), self.op.group_name)
12590

    
12591
    return ResultWithJobs(jobs)
12592

    
12593

    
12594
class TagsLU(NoHooksLU): # pylint: disable=W0223
12595
  """Generic tags LU.
12596

12597
  This is an abstract class which is the parent of all the other tags LUs.
12598

12599
  """
12600
  def ExpandNames(self):
12601
    self.group_uuid = None
12602
    self.needed_locks = {}
12603
    if self.op.kind == constants.TAG_NODE:
12604
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12605
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12606
    elif self.op.kind == constants.TAG_INSTANCE:
12607
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12608
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12609
    elif self.op.kind == constants.TAG_NODEGROUP:
12610
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12611

    
12612
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12613
    # not possible to acquire the BGL based on opcode parameters)
12614

    
12615
  def CheckPrereq(self):
12616
    """Check prerequisites.
12617

12618
    """
12619
    if self.op.kind == constants.TAG_CLUSTER:
12620
      self.target = self.cfg.GetClusterInfo()
12621
    elif self.op.kind == constants.TAG_NODE:
12622
      self.target = self.cfg.GetNodeInfo(self.op.name)
12623
    elif self.op.kind == constants.TAG_INSTANCE:
12624
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12625
    elif self.op.kind == constants.TAG_NODEGROUP:
12626
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12627
    else:
12628
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12629
                                 str(self.op.kind), errors.ECODE_INVAL)
12630

    
12631

    
12632
class LUTagsGet(TagsLU):
12633
  """Returns the tags of a given object.
12634

12635
  """
12636
  REQ_BGL = False
12637

    
12638
  def ExpandNames(self):
12639
    TagsLU.ExpandNames(self)
12640

    
12641
    # Share locks as this is only a read operation
12642
    self.share_locks = _ShareAll()
12643

    
12644
  def Exec(self, feedback_fn):
12645
    """Returns the tag list.
12646

12647
    """
12648
    return list(self.target.GetTags())
12649

    
12650

    
12651
class LUTagsSearch(NoHooksLU):
12652
  """Searches the tags for a given pattern.
12653

12654
  """
12655
  REQ_BGL = False
12656

    
12657
  def ExpandNames(self):
12658
    self.needed_locks = {}
12659

    
12660
  def CheckPrereq(self):
12661
    """Check prerequisites.
12662

12663
    This checks the pattern passed for validity by compiling it.
12664

12665
    """
12666
    try:
12667
      self.re = re.compile(self.op.pattern)
12668
    except re.error, err:
12669
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12670
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12671

    
12672
  def Exec(self, feedback_fn):
12673
    """Returns the tag list.
12674

12675
    """
12676
    cfg = self.cfg
12677
    tgts = [("/cluster", cfg.GetClusterInfo())]
12678
    ilist = cfg.GetAllInstancesInfo().values()
12679
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12680
    nlist = cfg.GetAllNodesInfo().values()
12681
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12682
    tgts.extend(("/nodegroup/%s" % n.name, n)
12683
                for n in cfg.GetAllNodeGroupsInfo().values())
12684
    results = []
12685
    for path, target in tgts:
12686
      for tag in target.GetTags():
12687
        if self.re.search(tag):
12688
          results.append((path, tag))
12689
    return results
12690

    
12691

    
12692
class LUTagsSet(TagsLU):
12693
  """Sets a tag on a given object.
12694

12695
  """
12696
  REQ_BGL = False
12697

    
12698
  def CheckPrereq(self):
12699
    """Check prerequisites.
12700

12701
    This checks the type and length of the tag name and value.
12702

12703
    """
12704
    TagsLU.CheckPrereq(self)
12705
    for tag in self.op.tags:
12706
      objects.TaggableObject.ValidateTag(tag)
12707

    
12708
  def Exec(self, feedback_fn):
12709
    """Sets the tag.
12710

12711
    """
12712
    try:
12713
      for tag in self.op.tags:
12714
        self.target.AddTag(tag)
12715
    except errors.TagError, err:
12716
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12717
    self.cfg.Update(self.target, feedback_fn)
12718

    
12719

    
12720
class LUTagsDel(TagsLU):
12721
  """Delete a list of tags from a given object.
12722

12723
  """
12724
  REQ_BGL = False
12725

    
12726
  def CheckPrereq(self):
12727
    """Check prerequisites.
12728

12729
    This checks that we have the given tag.
12730

12731
    """
12732
    TagsLU.CheckPrereq(self)
12733
    for tag in self.op.tags:
12734
      objects.TaggableObject.ValidateTag(tag)
12735
    del_tags = frozenset(self.op.tags)
12736
    cur_tags = self.target.GetTags()
12737

    
12738
    diff_tags = del_tags - cur_tags
12739
    if diff_tags:
12740
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12741
      raise errors.OpPrereqError("Tag(s) %s not found" %
12742
                                 (utils.CommaJoin(diff_names), ),
12743
                                 errors.ECODE_NOENT)
12744

    
12745
  def Exec(self, feedback_fn):
12746
    """Remove the tag from the object.
12747

12748
    """
12749
    for tag in self.op.tags:
12750
      self.target.RemoveTag(tag)
12751
    self.cfg.Update(self.target, feedback_fn)
12752

    
12753

    
12754
class LUTestDelay(NoHooksLU):
12755
  """Sleep for a specified amount of time.
12756

12757
  This LU sleeps on the master and/or nodes for a specified amount of
12758
  time.
12759

12760
  """
12761
  REQ_BGL = False
12762

    
12763
  def ExpandNames(self):
12764
    """Expand names and set required locks.
12765

12766
    This expands the node list, if any.
12767

12768
    """
12769
    self.needed_locks = {}
12770
    if self.op.on_nodes:
12771
      # _GetWantedNodes can be used here, but is not always appropriate to use
12772
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12773
      # more information.
12774
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12775
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12776

    
12777
  def _TestDelay(self):
12778
    """Do the actual sleep.
12779

12780
    """
12781
    if self.op.on_master:
12782
      if not utils.TestDelay(self.op.duration):
12783
        raise errors.OpExecError("Error during master delay test")
12784
    if self.op.on_nodes:
12785
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12786
      for node, node_result in result.items():
12787
        node_result.Raise("Failure during rpc call to node %s" % node)
12788

    
12789
  def Exec(self, feedback_fn):
12790
    """Execute the test delay opcode, with the wanted repetitions.
12791

12792
    """
12793
    if self.op.repeat == 0:
12794
      self._TestDelay()
12795
    else:
12796
      top_value = self.op.repeat - 1
12797
      for i in range(self.op.repeat):
12798
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12799
        self._TestDelay()
12800

    
12801

    
12802
class LUTestJqueue(NoHooksLU):
12803
  """Utility LU to test some aspects of the job queue.
12804

12805
  """
12806
  REQ_BGL = False
12807

    
12808
  # Must be lower than default timeout for WaitForJobChange to see whether it
12809
  # notices changed jobs
12810
  _CLIENT_CONNECT_TIMEOUT = 20.0
12811
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12812

    
12813
  @classmethod
12814
  def _NotifyUsingSocket(cls, cb, errcls):
12815
    """Opens a Unix socket and waits for another program to connect.
12816

12817
    @type cb: callable
12818
    @param cb: Callback to send socket name to client
12819
    @type errcls: class
12820
    @param errcls: Exception class to use for errors
12821

12822
    """
12823
    # Using a temporary directory as there's no easy way to create temporary
12824
    # sockets without writing a custom loop around tempfile.mktemp and
12825
    # socket.bind
12826
    tmpdir = tempfile.mkdtemp()
12827
    try:
12828
      tmpsock = utils.PathJoin(tmpdir, "sock")
12829

    
12830
      logging.debug("Creating temporary socket at %s", tmpsock)
12831
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12832
      try:
12833
        sock.bind(tmpsock)
12834
        sock.listen(1)
12835

    
12836
        # Send details to client
12837
        cb(tmpsock)
12838

    
12839
        # Wait for client to connect before continuing
12840
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12841
        try:
12842
          (conn, _) = sock.accept()
12843
        except socket.error, err:
12844
          raise errcls("Client didn't connect in time (%s)" % err)
12845
      finally:
12846
        sock.close()
12847
    finally:
12848
      # Remove as soon as client is connected
12849
      shutil.rmtree(tmpdir)
12850

    
12851
    # Wait for client to close
12852
    try:
12853
      try:
12854
        # pylint: disable=E1101
12855
        # Instance of '_socketobject' has no ... member
12856
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12857
        conn.recv(1)
12858
      except socket.error, err:
12859
        raise errcls("Client failed to confirm notification (%s)" % err)
12860
    finally:
12861
      conn.close()
12862

    
12863
  def _SendNotification(self, test, arg, sockname):
12864
    """Sends a notification to the client.
12865

12866
    @type test: string
12867
    @param test: Test name
12868
    @param arg: Test argument (depends on test)
12869
    @type sockname: string
12870
    @param sockname: Socket path
12871

12872
    """
12873
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12874

    
12875
  def _Notify(self, prereq, test, arg):
12876
    """Notifies the client of a test.
12877

12878
    @type prereq: bool
12879
    @param prereq: Whether this is a prereq-phase test
12880
    @type test: string
12881
    @param test: Test name
12882
    @param arg: Test argument (depends on test)
12883

12884
    """
12885
    if prereq:
12886
      errcls = errors.OpPrereqError
12887
    else:
12888
      errcls = errors.OpExecError
12889

    
12890
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12891
                                                  test, arg),
12892
                                   errcls)
12893

    
12894
  def CheckArguments(self):
12895
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12896
    self.expandnames_calls = 0
12897

    
12898
  def ExpandNames(self):
12899
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12900
    if checkargs_calls < 1:
12901
      raise errors.ProgrammerError("CheckArguments was not called")
12902

    
12903
    self.expandnames_calls += 1
12904

    
12905
    if self.op.notify_waitlock:
12906
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12907

    
12908
    self.LogInfo("Expanding names")
12909

    
12910
    # Get lock on master node (just to get a lock, not for a particular reason)
12911
    self.needed_locks = {
12912
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12913
      }
12914

    
12915
  def Exec(self, feedback_fn):
12916
    if self.expandnames_calls < 1:
12917
      raise errors.ProgrammerError("ExpandNames was not called")
12918

    
12919
    if self.op.notify_exec:
12920
      self._Notify(False, constants.JQT_EXEC, None)
12921

    
12922
    self.LogInfo("Executing")
12923

    
12924
    if self.op.log_messages:
12925
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12926
      for idx, msg in enumerate(self.op.log_messages):
12927
        self.LogInfo("Sending log message %s", idx + 1)
12928
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12929
        # Report how many test messages have been sent
12930
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12931

    
12932
    if self.op.fail:
12933
      raise errors.OpExecError("Opcode failure was requested")
12934

    
12935
    return True
12936

    
12937

    
12938
class IAllocator(object):
12939
  """IAllocator framework.
12940

12941
  An IAllocator instance has three sets of attributes:
12942
    - cfg that is needed to query the cluster
12943
    - input data (all members of the _KEYS class attribute are required)
12944
    - four buffer attributes (in|out_data|text), that represent the
12945
      input (to the external script) in text and data structure format,
12946
      and the output from it, again in two formats
12947
    - the result variables from the script (success, info, nodes) for
12948
      easy usage
12949

12950
  """
12951
  # pylint: disable=R0902
12952
  # lots of instance attributes
12953

    
12954
  def __init__(self, cfg, rpc, mode, **kwargs):
12955
    self.cfg = cfg
12956
    self.rpc = rpc
12957
    # init buffer variables
12958
    self.in_text = self.out_text = self.in_data = self.out_data = None
12959
    # init all input fields so that pylint is happy
12960
    self.mode = mode
12961
    self.memory = self.disks = self.disk_template = None
12962
    self.os = self.tags = self.nics = self.vcpus = None
12963
    self.hypervisor = None
12964
    self.relocate_from = None
12965
    self.name = None
12966
    self.instances = None
12967
    self.evac_mode = None
12968
    self.target_groups = []
12969
    # computed fields
12970
    self.required_nodes = None
12971
    # init result fields
12972
    self.success = self.info = self.result = None
12973

    
12974
    try:
12975
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12976
    except KeyError:
12977
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12978
                                   " IAllocator" % self.mode)
12979

    
12980
    keyset = [n for (n, _) in keydata]
12981

    
12982
    for key in kwargs:
12983
      if key not in keyset:
12984
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12985
                                     " IAllocator" % key)
12986
      setattr(self, key, kwargs[key])
12987

    
12988
    for key in keyset:
12989
      if key not in kwargs:
12990
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12991
                                     " IAllocator" % key)
12992
    self._BuildInputData(compat.partial(fn, self), keydata)
12993

    
12994
  def _ComputeClusterData(self):
12995
    """Compute the generic allocator input data.
12996

12997
    This is the data that is independent of the actual operation.
12998

12999
    """
13000
    cfg = self.cfg
13001
    cluster_info = cfg.GetClusterInfo()
13002
    # cluster data
13003
    data = {
13004
      "version": constants.IALLOCATOR_VERSION,
13005
      "cluster_name": cfg.GetClusterName(),
13006
      "cluster_tags": list(cluster_info.GetTags()),
13007
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13008
      # we don't have job IDs
13009
      }
13010
    ninfo = cfg.GetAllNodesInfo()
13011
    iinfo = cfg.GetAllInstancesInfo().values()
13012
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13013

    
13014
    # node data
13015
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13016

    
13017
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13018
      hypervisor_name = self.hypervisor
13019
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13020
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13021
    else:
13022
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13023

    
13024
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13025
                                        hypervisor_name)
13026
    node_iinfo = \
13027
      self.rpc.call_all_instances_info(node_list,
13028
                                       cluster_info.enabled_hypervisors)
13029

    
13030
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13031

    
13032
    config_ndata = self._ComputeBasicNodeData(ninfo)
13033
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13034
                                                 i_list, config_ndata)
13035
    assert len(data["nodes"]) == len(ninfo), \
13036
        "Incomplete node data computed"
13037

    
13038
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13039

    
13040
    self.in_data = data
13041

    
13042
  @staticmethod
13043
  def _ComputeNodeGroupData(cfg):
13044
    """Compute node groups data.
13045

13046
    """
13047
    ng = dict((guuid, {
13048
      "name": gdata.name,
13049
      "alloc_policy": gdata.alloc_policy,
13050
      })
13051
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13052

    
13053
    return ng
13054

    
13055
  @staticmethod
13056
  def _ComputeBasicNodeData(node_cfg):
13057
    """Compute global node data.
13058

13059
    @rtype: dict
13060
    @returns: a dict of name: (node dict, node config)
13061

13062
    """
13063
    # fill in static (config-based) values
13064
    node_results = dict((ninfo.name, {
13065
      "tags": list(ninfo.GetTags()),
13066
      "primary_ip": ninfo.primary_ip,
13067
      "secondary_ip": ninfo.secondary_ip,
13068
      "offline": ninfo.offline,
13069
      "drained": ninfo.drained,
13070
      "master_candidate": ninfo.master_candidate,
13071
      "group": ninfo.group,
13072
      "master_capable": ninfo.master_capable,
13073
      "vm_capable": ninfo.vm_capable,
13074
      })
13075
      for ninfo in node_cfg.values())
13076

    
13077
    return node_results
13078

    
13079
  @staticmethod
13080
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13081
                              node_results):
13082
    """Compute global node data.
13083

13084
    @param node_results: the basic node structures as filled from the config
13085

13086
    """
13087
    # make a copy of the current dict
13088
    node_results = dict(node_results)
13089
    for nname, nresult in node_data.items():
13090
      assert nname in node_results, "Missing basic data for node %s" % nname
13091
      ninfo = node_cfg[nname]
13092

    
13093
      if not (ninfo.offline or ninfo.drained):
13094
        nresult.Raise("Can't get data for node %s" % nname)
13095
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13096
                                nname)
13097
        remote_info = nresult.payload
13098

    
13099
        for attr in ["memory_total", "memory_free", "memory_dom0",
13100
                     "vg_size", "vg_free", "cpu_total"]:
13101
          if attr not in remote_info:
13102
            raise errors.OpExecError("Node '%s' didn't return attribute"
13103
                                     " '%s'" % (nname, attr))
13104
          if not isinstance(remote_info[attr], int):
13105
            raise errors.OpExecError("Node '%s' returned invalid value"
13106
                                     " for '%s': %s" %
13107
                                     (nname, attr, remote_info[attr]))
13108
        # compute memory used by primary instances
13109
        i_p_mem = i_p_up_mem = 0
13110
        for iinfo, beinfo in i_list:
13111
          if iinfo.primary_node == nname:
13112
            i_p_mem += beinfo[constants.BE_MEMORY]
13113
            if iinfo.name not in node_iinfo[nname].payload:
13114
              i_used_mem = 0
13115
            else:
13116
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13117
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13118
            remote_info["memory_free"] -= max(0, i_mem_diff)
13119

    
13120
            if iinfo.admin_up:
13121
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13122

    
13123
        # compute memory used by instances
13124
        pnr_dyn = {
13125
          "total_memory": remote_info["memory_total"],
13126
          "reserved_memory": remote_info["memory_dom0"],
13127
          "free_memory": remote_info["memory_free"],
13128
          "total_disk": remote_info["vg_size"],
13129
          "free_disk": remote_info["vg_free"],
13130
          "total_cpus": remote_info["cpu_total"],
13131
          "i_pri_memory": i_p_mem,
13132
          "i_pri_up_memory": i_p_up_mem,
13133
          }
13134
        pnr_dyn.update(node_results[nname])
13135
        node_results[nname] = pnr_dyn
13136

    
13137
    return node_results
13138

    
13139
  @staticmethod
13140
  def _ComputeInstanceData(cluster_info, i_list):
13141
    """Compute global instance data.
13142

13143
    """
13144
    instance_data = {}
13145
    for iinfo, beinfo in i_list:
13146
      nic_data = []
13147
      for nic in iinfo.nics:
13148
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13149
        nic_dict = {
13150
          "mac": nic.mac,
13151
          "ip": nic.ip,
13152
          "mode": filled_params[constants.NIC_MODE],
13153
          "link": filled_params[constants.NIC_LINK],
13154
          }
13155
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13156
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13157
        nic_data.append(nic_dict)
13158
      pir = {
13159
        "tags": list(iinfo.GetTags()),
13160
        "admin_up": iinfo.admin_up,
13161
        "vcpus": beinfo[constants.BE_VCPUS],
13162
        "memory": beinfo[constants.BE_MEMORY],
13163
        "os": iinfo.os,
13164
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13165
        "nics": nic_data,
13166
        "disks": [{constants.IDISK_SIZE: dsk.size,
13167
                   constants.IDISK_MODE: dsk.mode}
13168
                  for dsk in iinfo.disks],
13169
        "disk_template": iinfo.disk_template,
13170
        "hypervisor": iinfo.hypervisor,
13171
        }
13172
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13173
                                                 pir["disks"])
13174
      instance_data[iinfo.name] = pir
13175

    
13176
    return instance_data
13177

    
13178
  def _AddNewInstance(self):
13179
    """Add new instance data to allocator structure.
13180

13181
    This in combination with _AllocatorGetClusterData will create the
13182
    correct structure needed as input for the allocator.
13183

13184
    The checks for the completeness of the opcode must have already been
13185
    done.
13186

13187
    """
13188
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13189

    
13190
    if self.disk_template in constants.DTS_INT_MIRROR:
13191
      self.required_nodes = 2
13192
    else:
13193
      self.required_nodes = 1
13194

    
13195
    request = {
13196
      "name": self.name,
13197
      "disk_template": self.disk_template,
13198
      "tags": self.tags,
13199
      "os": self.os,
13200
      "vcpus": self.vcpus,
13201
      "memory": self.memory,
13202
      "disks": self.disks,
13203
      "disk_space_total": disk_space,
13204
      "nics": self.nics,
13205
      "required_nodes": self.required_nodes,
13206
      "hypervisor": self.hypervisor,
13207
      }
13208

    
13209
    return request
13210

    
13211
  def _AddRelocateInstance(self):
13212
    """Add relocate instance data to allocator structure.
13213

13214
    This in combination with _IAllocatorGetClusterData will create the
13215
    correct structure needed as input for the allocator.
13216

13217
    The checks for the completeness of the opcode must have already been
13218
    done.
13219

13220
    """
13221
    instance = self.cfg.GetInstanceInfo(self.name)
13222
    if instance is None:
13223
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13224
                                   " IAllocator" % self.name)
13225

    
13226
    if instance.disk_template not in constants.DTS_MIRRORED:
13227
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13228
                                 errors.ECODE_INVAL)
13229

    
13230
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13231
        len(instance.secondary_nodes) != 1:
13232
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13233
                                 errors.ECODE_STATE)
13234

    
13235
    self.required_nodes = 1
13236
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13237
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13238

    
13239
    request = {
13240
      "name": self.name,
13241
      "disk_space_total": disk_space,
13242
      "required_nodes": self.required_nodes,
13243
      "relocate_from": self.relocate_from,
13244
      }
13245
    return request
13246

    
13247
  def _AddNodeEvacuate(self):
13248
    """Get data for node-evacuate requests.
13249

13250
    """
13251
    return {
13252
      "instances": self.instances,
13253
      "evac_mode": self.evac_mode,
13254
      }
13255

    
13256
  def _AddChangeGroup(self):
13257
    """Get data for node-evacuate requests.
13258

13259
    """
13260
    return {
13261
      "instances": self.instances,
13262
      "target_groups": self.target_groups,
13263
      }
13264

    
13265
  def _BuildInputData(self, fn, keydata):
13266
    """Build input data structures.
13267

13268
    """
13269
    self._ComputeClusterData()
13270

    
13271
    request = fn()
13272
    request["type"] = self.mode
13273
    for keyname, keytype in keydata:
13274
      if keyname not in request:
13275
        raise errors.ProgrammerError("Request parameter %s is missing" %
13276
                                     keyname)
13277
      val = request[keyname]
13278
      if not keytype(val):
13279
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13280
                                     " validation, value %s, expected"
13281
                                     " type %s" % (keyname, val, keytype))
13282
    self.in_data["request"] = request
13283

    
13284
    self.in_text = serializer.Dump(self.in_data)
13285

    
13286
  _STRING_LIST = ht.TListOf(ht.TString)
13287
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13288
     # pylint: disable=E1101
13289
     # Class '...' has no 'OP_ID' member
13290
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13291
                          opcodes.OpInstanceMigrate.OP_ID,
13292
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13293
     })))
13294

    
13295
  _NEVAC_MOVED = \
13296
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13297
                       ht.TItems([ht.TNonEmptyString,
13298
                                  ht.TNonEmptyString,
13299
                                  ht.TListOf(ht.TNonEmptyString),
13300
                                 ])))
13301
  _NEVAC_FAILED = \
13302
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13303
                       ht.TItems([ht.TNonEmptyString,
13304
                                  ht.TMaybeString,
13305
                                 ])))
13306
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13307
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13308

    
13309
  _MODE_DATA = {
13310
    constants.IALLOCATOR_MODE_ALLOC:
13311
      (_AddNewInstance,
13312
       [
13313
        ("name", ht.TString),
13314
        ("memory", ht.TInt),
13315
        ("disks", ht.TListOf(ht.TDict)),
13316
        ("disk_template", ht.TString),
13317
        ("os", ht.TString),
13318
        ("tags", _STRING_LIST),
13319
        ("nics", ht.TListOf(ht.TDict)),
13320
        ("vcpus", ht.TInt),
13321
        ("hypervisor", ht.TString),
13322
        ], ht.TList),
13323
    constants.IALLOCATOR_MODE_RELOC:
13324
      (_AddRelocateInstance,
13325
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13326
       ht.TList),
13327
     constants.IALLOCATOR_MODE_NODE_EVAC:
13328
      (_AddNodeEvacuate, [
13329
        ("instances", _STRING_LIST),
13330
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13331
        ], _NEVAC_RESULT),
13332
     constants.IALLOCATOR_MODE_CHG_GROUP:
13333
      (_AddChangeGroup, [
13334
        ("instances", _STRING_LIST),
13335
        ("target_groups", _STRING_LIST),
13336
        ], _NEVAC_RESULT),
13337
    }
13338

    
13339
  def Run(self, name, validate=True, call_fn=None):
13340
    """Run an instance allocator and return the results.
13341

13342
    """
13343
    if call_fn is None:
13344
      call_fn = self.rpc.call_iallocator_runner
13345

    
13346
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13347
    result.Raise("Failure while running the iallocator script")
13348

    
13349
    self.out_text = result.payload
13350
    if validate:
13351
      self._ValidateResult()
13352

    
13353
  def _ValidateResult(self):
13354
    """Process the allocator results.
13355

13356
    This will process and if successful save the result in
13357
    self.out_data and the other parameters.
13358

13359
    """
13360
    try:
13361
      rdict = serializer.Load(self.out_text)
13362
    except Exception, err:
13363
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13364

    
13365
    if not isinstance(rdict, dict):
13366
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13367

    
13368
    # TODO: remove backwards compatiblity in later versions
13369
    if "nodes" in rdict and "result" not in rdict:
13370
      rdict["result"] = rdict["nodes"]
13371
      del rdict["nodes"]
13372

    
13373
    for key in "success", "info", "result":
13374
      if key not in rdict:
13375
        raise errors.OpExecError("Can't parse iallocator results:"
13376
                                 " missing key '%s'" % key)
13377
      setattr(self, key, rdict[key])
13378

    
13379
    if not self._result_check(self.result):
13380
      raise errors.OpExecError("Iallocator returned invalid result,"
13381
                               " expected %s, got %s" %
13382
                               (self._result_check, self.result),
13383
                               errors.ECODE_INVAL)
13384

    
13385
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13386
      assert self.relocate_from is not None
13387
      assert self.required_nodes == 1
13388

    
13389
      node2group = dict((name, ndata["group"])
13390
                        for (name, ndata) in self.in_data["nodes"].items())
13391

    
13392
      fn = compat.partial(self._NodesToGroups, node2group,
13393
                          self.in_data["nodegroups"])
13394

    
13395
      instance = self.cfg.GetInstanceInfo(self.name)
13396
      request_groups = fn(self.relocate_from + [instance.primary_node])
13397
      result_groups = fn(rdict["result"] + [instance.primary_node])
13398

    
13399
      if self.success and not set(result_groups).issubset(request_groups):
13400
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13401
                                 " differ from original groups (%s)" %
13402
                                 (utils.CommaJoin(result_groups),
13403
                                  utils.CommaJoin(request_groups)))
13404

    
13405
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13406
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13407

    
13408
    self.out_data = rdict
13409

    
13410
  @staticmethod
13411
  def _NodesToGroups(node2group, groups, nodes):
13412
    """Returns a list of unique group names for a list of nodes.
13413

13414
    @type node2group: dict
13415
    @param node2group: Map from node name to group UUID
13416
    @type groups: dict
13417
    @param groups: Group information
13418
    @type nodes: list
13419
    @param nodes: Node names
13420

13421
    """
13422
    result = set()
13423

    
13424
    for node in nodes:
13425
      try:
13426
        group_uuid = node2group[node]
13427
      except KeyError:
13428
        # Ignore unknown node
13429
        pass
13430
      else:
13431
        try:
13432
          group = groups[group_uuid]
13433
        except KeyError:
13434
          # Can't find group, let's use UUID
13435
          group_name = group_uuid
13436
        else:
13437
          group_name = group["name"]
13438

    
13439
        result.add(group_name)
13440

    
13441
    return sorted(result)
13442

    
13443

    
13444
class LUTestAllocator(NoHooksLU):
13445
  """Run allocator tests.
13446

13447
  This LU runs the allocator tests
13448

13449
  """
13450
  def CheckPrereq(self):
13451
    """Check prerequisites.
13452

13453
    This checks the opcode parameters depending on the director and mode test.
13454

13455
    """
13456
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13457
      for attr in ["memory", "disks", "disk_template",
13458
                   "os", "tags", "nics", "vcpus"]:
13459
        if not hasattr(self.op, attr):
13460
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13461
                                     attr, errors.ECODE_INVAL)
13462
      iname = self.cfg.ExpandInstanceName(self.op.name)
13463
      if iname is not None:
13464
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13465
                                   iname, errors.ECODE_EXISTS)
13466
      if not isinstance(self.op.nics, list):
13467
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13468
                                   errors.ECODE_INVAL)
13469
      if not isinstance(self.op.disks, list):
13470
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13471
                                   errors.ECODE_INVAL)
13472
      for row in self.op.disks:
13473
        if (not isinstance(row, dict) or
13474
            constants.IDISK_SIZE not in row or
13475
            not isinstance(row[constants.IDISK_SIZE], int) or
13476
            constants.IDISK_MODE not in row or
13477
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13478
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13479
                                     " parameter", errors.ECODE_INVAL)
13480
      if self.op.hypervisor is None:
13481
        self.op.hypervisor = self.cfg.GetHypervisorType()
13482
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13483
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13484
      self.op.name = fname
13485
      self.relocate_from = \
13486
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13487
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13488
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13489
      if not self.op.instances:
13490
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13491
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13492
    else:
13493
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13494
                                 self.op.mode, errors.ECODE_INVAL)
13495

    
13496
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13497
      if self.op.allocator is None:
13498
        raise errors.OpPrereqError("Missing allocator name",
13499
                                   errors.ECODE_INVAL)
13500
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13501
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13502
                                 self.op.direction, errors.ECODE_INVAL)
13503

    
13504
  def Exec(self, feedback_fn):
13505
    """Run the allocator test.
13506

13507
    """
13508
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13509
      ial = IAllocator(self.cfg, self.rpc,
13510
                       mode=self.op.mode,
13511
                       name=self.op.name,
13512
                       memory=self.op.memory,
13513
                       disks=self.op.disks,
13514
                       disk_template=self.op.disk_template,
13515
                       os=self.op.os,
13516
                       tags=self.op.tags,
13517
                       nics=self.op.nics,
13518
                       vcpus=self.op.vcpus,
13519
                       hypervisor=self.op.hypervisor,
13520
                       )
13521
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13522
      ial = IAllocator(self.cfg, self.rpc,
13523
                       mode=self.op.mode,
13524
                       name=self.op.name,
13525
                       relocate_from=list(self.relocate_from),
13526
                       )
13527
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13528
      ial = IAllocator(self.cfg, self.rpc,
13529
                       mode=self.op.mode,
13530
                       instances=self.op.instances,
13531
                       target_groups=self.op.target_groups)
13532
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13533
      ial = IAllocator(self.cfg, self.rpc,
13534
                       mode=self.op.mode,
13535
                       instances=self.op.instances,
13536
                       evac_mode=self.op.evac_mode)
13537
    else:
13538
      raise errors.ProgrammerError("Uncatched mode %s in"
13539
                                   " LUTestAllocator.Exec", self.op.mode)
13540

    
13541
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13542
      result = ial.in_text
13543
    else:
13544
      ial.Run(self.op.allocator, validate=False)
13545
      result = ial.out_text
13546
    return result
13547

    
13548

    
13549
#: Query type implementations
13550
_QUERY_IMPL = {
13551
  constants.QR_INSTANCE: _InstanceQuery,
13552
  constants.QR_NODE: _NodeQuery,
13553
  constants.QR_GROUP: _GroupQuery,
13554
  constants.QR_OS: _OsQuery,
13555
  }
13556

    
13557
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13558

    
13559

    
13560
def _GetQueryImplementation(name):
13561
  """Returns the implemtnation for a query type.
13562

13563
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13564

13565
  """
13566
  try:
13567
    return _QUERY_IMPL[name]
13568
  except KeyError:
13569
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13570
                               errors.ECODE_INVAL)