Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ fb926117

History | View | Annotate | Download (477.5 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    # readability alias
123
    self.owned_locks = context.glm.list_owned
124
    self.context = context
125
    self.rpc = rpc
126
    # Dicts used to declare locking needs to mcpu
127
    self.needed_locks = None
128
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
129
    self.add_locks = {}
130
    self.remove_locks = {}
131
    # Used to force good behavior when calling helper functions
132
    self.recalculate_locks = {}
133
    # logging
134
    self.Log = processor.Log # pylint: disable=C0103
135
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
136
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
137
    self.LogStep = processor.LogStep # pylint: disable=C0103
138
    # support for dry-run
139
    self.dry_run_result = None
140
    # support for generic debug attribute
141
    if (not hasattr(self.op, "debug_level") or
142
        not isinstance(self.op.debug_level, int)):
143
      self.op.debug_level = 0
144

    
145
    # Tasklets
146
    self.tasklets = None
147

    
148
    # Validate opcode parameters and set defaults
149
    self.op.Validate(True)
150

    
151
    self.CheckArguments()
152

    
153
  def CheckArguments(self):
154
    """Check syntactic validity for the opcode arguments.
155

156
    This method is for doing a simple syntactic check and ensure
157
    validity of opcode parameters, without any cluster-related
158
    checks. While the same can be accomplished in ExpandNames and/or
159
    CheckPrereq, doing these separate is better because:
160

161
      - ExpandNames is left as as purely a lock-related function
162
      - CheckPrereq is run after we have acquired locks (and possible
163
        waited for them)
164

165
    The function is allowed to change the self.op attribute so that
166
    later methods can no longer worry about missing parameters.
167

168
    """
169
    pass
170

    
171
  def ExpandNames(self):
172
    """Expand names for this LU.
173

174
    This method is called before starting to execute the opcode, and it should
175
    update all the parameters of the opcode to their canonical form (e.g. a
176
    short node name must be fully expanded after this method has successfully
177
    completed). This way locking, hooks, logging, etc. can work correctly.
178

179
    LUs which implement this method must also populate the self.needed_locks
180
    member, as a dict with lock levels as keys, and a list of needed lock names
181
    as values. Rules:
182

183
      - use an empty dict if you don't need any lock
184
      - if you don't need any lock at a particular level omit that level
185
      - don't put anything for the BGL level
186
      - if you want all locks at a level use locking.ALL_SET as a value
187

188
    If you need to share locks (rather than acquire them exclusively) at one
189
    level you can modify self.share_locks, setting a true value (usually 1) for
190
    that level. By default locks are not shared.
191

192
    This function can also define a list of tasklets, which then will be
193
    executed in order instead of the usual LU-level CheckPrereq and Exec
194
    functions, if those are not defined by the LU.
195

196
    Examples::
197

198
      # Acquire all nodes and one instance
199
      self.needed_locks = {
200
        locking.LEVEL_NODE: locking.ALL_SET,
201
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
202
      }
203
      # Acquire just two nodes
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206
      }
207
      # Acquire no locks
208
      self.needed_locks = {} # No, you can't leave it to the default value None
209

210
    """
211
    # The implementation of this method is mandatory only if the new LU is
212
    # concurrent, so that old LUs don't need to be changed all at the same
213
    # time.
214
    if self.REQ_BGL:
215
      self.needed_locks = {} # Exclusive LUs don't need locks.
216
    else:
217
      raise NotImplementedError
218

    
219
  def DeclareLocks(self, level):
220
    """Declare LU locking needs for a level
221

222
    While most LUs can just declare their locking needs at ExpandNames time,
223
    sometimes there's the need to calculate some locks after having acquired
224
    the ones before. This function is called just before acquiring locks at a
225
    particular level, but after acquiring the ones at lower levels, and permits
226
    such calculations. It can be used to modify self.needed_locks, and by
227
    default it does nothing.
228

229
    This function is only called if you have something already set in
230
    self.needed_locks for the level.
231

232
    @param level: Locking level which is going to be locked
233
    @type level: member of ganeti.locking.LEVELS
234

235
    """
236

    
237
  def CheckPrereq(self):
238
    """Check prerequisites for this LU.
239

240
    This method should check that the prerequisites for the execution
241
    of this LU are fulfilled. It can do internode communication, but
242
    it should be idempotent - no cluster or system changes are
243
    allowed.
244

245
    The method should raise errors.OpPrereqError in case something is
246
    not fulfilled. Its return value is ignored.
247

248
    This method should also update all the parameters of the opcode to
249
    their canonical form if it hasn't been done by ExpandNames before.
250

251
    """
252
    if self.tasklets is not None:
253
      for (idx, tl) in enumerate(self.tasklets):
254
        logging.debug("Checking prerequisites for tasklet %s/%s",
255
                      idx + 1, len(self.tasklets))
256
        tl.CheckPrereq()
257
    else:
258
      pass
259

    
260
  def Exec(self, feedback_fn):
261
    """Execute the LU.
262

263
    This method should implement the actual work. It should raise
264
    errors.OpExecError for failures that are somewhat dealt with in
265
    code, or expected.
266

267
    """
268
    if self.tasklets is not None:
269
      for (idx, tl) in enumerate(self.tasklets):
270
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271
        tl.Exec(feedback_fn)
272
    else:
273
      raise NotImplementedError
274

    
275
  def BuildHooksEnv(self):
276
    """Build hooks environment for this LU.
277

278
    @rtype: dict
279
    @return: Dictionary containing the environment that will be used for
280
      running the hooks for this LU. The keys of the dict must not be prefixed
281
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282
      will extend the environment with additional variables. If no environment
283
      should be defined, an empty dictionary should be returned (not C{None}).
284
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
285
      will not be called.
286

287
    """
288
    raise NotImplementedError
289

    
290
  def BuildHooksNodes(self):
291
    """Build list of nodes to run LU's hooks.
292

293
    @rtype: tuple; (list, list)
294
    @return: Tuple containing a list of node names on which the hook
295
      should run before the execution and a list of node names on which the
296
      hook should run after the execution. No nodes should be returned as an
297
      empty list (and not None).
298
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
299
      will not be called.
300

301
    """
302
    raise NotImplementedError
303

    
304
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305
    """Notify the LU about the results of its hooks.
306

307
    This method is called every time a hooks phase is executed, and notifies
308
    the Logical Unit about the hooks' result. The LU can then use it to alter
309
    its result based on the hooks.  By default the method does nothing and the
310
    previous result is passed back unchanged but any LU can define it if it
311
    wants to use the local cluster hook-scripts somehow.
312

313
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
314
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315
    @param hook_results: the results of the multi-node hooks rpc call
316
    @param feedback_fn: function used send feedback back to the caller
317
    @param lu_result: the previous Exec result this LU had, or None
318
        in the PRE phase
319
    @return: the new Exec result, based on the previous result
320
        and hook results
321

322
    """
323
    # API must be kept, thus we ignore the unused argument and could
324
    # be a function warnings
325
    # pylint: disable=W0613,R0201
326
    return lu_result
327

    
328
  def _ExpandAndLockInstance(self):
329
    """Helper function to expand and lock an instance.
330

331
    Many LUs that work on an instance take its name in self.op.instance_name
332
    and need to expand it and then declare the expanded name for locking. This
333
    function does it, and then updates self.op.instance_name to the expanded
334
    name. It also initializes needed_locks as a dict, if this hasn't been done
335
    before.
336

337
    """
338
    if self.needed_locks is None:
339
      self.needed_locks = {}
340
    else:
341
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342
        "_ExpandAndLockInstance called with instance-level locks set"
343
    self.op.instance_name = _ExpandInstanceName(self.cfg,
344
                                                self.op.instance_name)
345
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
346

    
347
  def _LockInstancesNodes(self, primary_only=False):
348
    """Helper function to declare instances' nodes for locking.
349

350
    This function should be called after locking one or more instances to lock
351
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352
    with all primary or secondary nodes for instances already locked and
353
    present in self.needed_locks[locking.LEVEL_INSTANCE].
354

355
    It should be called from DeclareLocks, and for safety only works if
356
    self.recalculate_locks[locking.LEVEL_NODE] is set.
357

358
    In the future it may grow parameters to just lock some instance's nodes, or
359
    to just lock primaries or secondary nodes, if needed.
360

361
    If should be called in DeclareLocks in a way similar to::
362

363
      if level == locking.LEVEL_NODE:
364
        self._LockInstancesNodes()
365

366
    @type primary_only: boolean
367
    @param primary_only: only lock primary nodes of locked instances
368

369
    """
370
    assert locking.LEVEL_NODE in self.recalculate_locks, \
371
      "_LockInstancesNodes helper function called with no nodes to recalculate"
372

    
373
    # TODO: check if we're really been called with the instance locks held
374

    
375
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376
    # future we might want to have different behaviors depending on the value
377
    # of self.recalculate_locks[locking.LEVEL_NODE]
378
    wanted_nodes = []
379
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381
      wanted_nodes.append(instance.primary_node)
382
      if not primary_only:
383
        wanted_nodes.extend(instance.secondary_nodes)
384

    
385
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
389

    
390
    del self.recalculate_locks[locking.LEVEL_NODE]
391

    
392

    
393
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
394
  """Simple LU which runs no hooks.
395

396
  This LU is intended as a parent for other LogicalUnits which will
397
  run no hooks, in order to reduce duplicate code.
398

399
  """
400
  HPATH = None
401
  HTYPE = None
402

    
403
  def BuildHooksEnv(self):
404
    """Empty BuildHooksEnv for NoHooksLu.
405

406
    This just raises an error.
407

408
    """
409
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
410

    
411
  def BuildHooksNodes(self):
412
    """Empty BuildHooksNodes for NoHooksLU.
413

414
    """
415
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
416

    
417

    
418
class Tasklet:
419
  """Tasklet base class.
420

421
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
423
  tasklets know nothing about locks.
424

425
  Subclasses must follow these rules:
426
    - Implement CheckPrereq
427
    - Implement Exec
428

429
  """
430
  def __init__(self, lu):
431
    self.lu = lu
432

    
433
    # Shortcuts
434
    self.cfg = lu.cfg
435
    self.rpc = lu.rpc
436

    
437
  def CheckPrereq(self):
438
    """Check prerequisites for this tasklets.
439

440
    This method should check whether the prerequisites for the execution of
441
    this tasklet are fulfilled. It can do internode communication, but it
442
    should be idempotent - no cluster or system changes are allowed.
443

444
    The method should raise errors.OpPrereqError in case something is not
445
    fulfilled. Its return value is ignored.
446

447
    This method should also update all parameters to their canonical form if it
448
    hasn't been done before.
449

450
    """
451
    pass
452

    
453
  def Exec(self, feedback_fn):
454
    """Execute the tasklet.
455

456
    This method should implement the actual work. It should raise
457
    errors.OpExecError for failures that are somewhat dealt with in code, or
458
    expected.
459

460
    """
461
    raise NotImplementedError
462

    
463

    
464
class _QueryBase:
465
  """Base for query utility classes.
466

467
  """
468
  #: Attribute holding field definitions
469
  FIELDS = None
470

    
471
  def __init__(self, filter_, fields, use_locking):
472
    """Initializes this class.
473

474
    """
475
    self.use_locking = use_locking
476

    
477
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
478
                             namefield="name")
479
    self.requested_data = self.query.RequestedData()
480
    self.names = self.query.RequestedNames()
481

    
482
    # Sort only if no names were requested
483
    self.sort_by_name = not self.names
484

    
485
    self.do_locking = None
486
    self.wanted = None
487

    
488
  def _GetNames(self, lu, all_names, lock_level):
489
    """Helper function to determine names asked for in the query.
490

491
    """
492
    if self.do_locking:
493
      names = lu.owned_locks(lock_level)
494
    else:
495
      names = all_names
496

    
497
    if self.wanted == locking.ALL_SET:
498
      assert not self.names
499
      # caller didn't specify names, so ordering is not important
500
      return utils.NiceSort(names)
501

    
502
    # caller specified names and we must keep the same order
503
    assert self.names
504
    assert not self.do_locking or lu.glm.is_owned(lock_level)
505

    
506
    missing = set(self.wanted).difference(names)
507
    if missing:
508
      raise errors.OpExecError("Some items were removed before retrieving"
509
                               " their data: %s" % missing)
510

    
511
    # Return expanded names
512
    return self.wanted
513

    
514
  def ExpandNames(self, lu):
515
    """Expand names for this query.
516

517
    See L{LogicalUnit.ExpandNames}.
518

519
    """
520
    raise NotImplementedError()
521

    
522
  def DeclareLocks(self, lu, level):
523
    """Declare locks for this query.
524

525
    See L{LogicalUnit.DeclareLocks}.
526

527
    """
528
    raise NotImplementedError()
529

    
530
  def _GetQueryData(self, lu):
531
    """Collects all data for this query.
532

533
    @return: Query data object
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def NewStyleQuery(self, lu):
539
    """Collect data and execute query.
540

541
    """
542
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543
                                  sort_by_name=self.sort_by_name)
544

    
545
  def OldStyleQuery(self, lu):
546
    """Collect data and execute query.
547

548
    """
549
    return self.query.OldStyleQuery(self._GetQueryData(lu),
550
                                    sort_by_name=self.sort_by_name)
551

    
552

    
553
def _ShareAll():
554
  """Returns a dict declaring all lock levels shared.
555

556
  """
557
  return dict.fromkeys(locking.LEVELS, 1)
558

    
559

    
560
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
561
  """Checks if the owned node groups are still correct for an instance.
562

563
  @type cfg: L{config.ConfigWriter}
564
  @param cfg: The cluster configuration
565
  @type instance_name: string
566
  @param instance_name: Instance name
567
  @type owned_groups: set or frozenset
568
  @param owned_groups: List of currently owned node groups
569

570
  """
571
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
572

    
573
  if not owned_groups.issuperset(inst_groups):
574
    raise errors.OpPrereqError("Instance %s's node groups changed since"
575
                               " locks were acquired, current groups are"
576
                               " are '%s', owning groups '%s'; retry the"
577
                               " operation" %
578
                               (instance_name,
579
                                utils.CommaJoin(inst_groups),
580
                                utils.CommaJoin(owned_groups)),
581
                               errors.ECODE_STATE)
582

    
583
  return inst_groups
584

    
585

    
586
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
587
  """Checks if the instances in a node group are still correct.
588

589
  @type cfg: L{config.ConfigWriter}
590
  @param cfg: The cluster configuration
591
  @type group_uuid: string
592
  @param group_uuid: Node group UUID
593
  @type owned_instances: set or frozenset
594
  @param owned_instances: List of currently owned instances
595

596
  """
597
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
598
  if owned_instances != wanted_instances:
599
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
600
                               " locks were acquired, wanted '%s', have '%s';"
601
                               " retry the operation" %
602
                               (group_uuid,
603
                                utils.CommaJoin(wanted_instances),
604
                                utils.CommaJoin(owned_instances)),
605
                               errors.ECODE_STATE)
606

    
607
  return wanted_instances
608

    
609

    
610
def _SupportsOob(cfg, node):
611
  """Tells if node supports OOB.
612

613
  @type cfg: L{config.ConfigWriter}
614
  @param cfg: The cluster configuration
615
  @type node: L{objects.Node}
616
  @param node: The node
617
  @return: The OOB script if supported or an empty string otherwise
618

619
  """
620
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
621

    
622

    
623
def _GetWantedNodes(lu, nodes):
624
  """Returns list of checked and expanded node names.
625

626
  @type lu: L{LogicalUnit}
627
  @param lu: the logical unit on whose behalf we execute
628
  @type nodes: list
629
  @param nodes: list of node names or None for all nodes
630
  @rtype: list
631
  @return: the list of nodes, sorted
632
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
633

634
  """
635
  if nodes:
636
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
637

    
638
  return utils.NiceSort(lu.cfg.GetNodeList())
639

    
640

    
641
def _GetWantedInstances(lu, instances):
642
  """Returns list of checked and expanded instance names.
643

644
  @type lu: L{LogicalUnit}
645
  @param lu: the logical unit on whose behalf we execute
646
  @type instances: list
647
  @param instances: list of instance names or None for all instances
648
  @rtype: list
649
  @return: the list of instances, sorted
650
  @raise errors.OpPrereqError: if the instances parameter is wrong type
651
  @raise errors.OpPrereqError: if any of the passed instances is not found
652

653
  """
654
  if instances:
655
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
656
  else:
657
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
658
  return wanted
659

    
660

    
661
def _GetUpdatedParams(old_params, update_dict,
662
                      use_default=True, use_none=False):
663
  """Return the new version of a parameter dictionary.
664

665
  @type old_params: dict
666
  @param old_params: old parameters
667
  @type update_dict: dict
668
  @param update_dict: dict containing new parameter values, or
669
      constants.VALUE_DEFAULT to reset the parameter to its default
670
      value
671
  @param use_default: boolean
672
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
673
      values as 'to be deleted' values
674
  @param use_none: boolean
675
  @type use_none: whether to recognise C{None} values as 'to be
676
      deleted' values
677
  @rtype: dict
678
  @return: the new parameter dictionary
679

680
  """
681
  params_copy = copy.deepcopy(old_params)
682
  for key, val in update_dict.iteritems():
683
    if ((use_default and val == constants.VALUE_DEFAULT) or
684
        (use_none and val is None)):
685
      try:
686
        del params_copy[key]
687
      except KeyError:
688
        pass
689
    else:
690
      params_copy[key] = val
691
  return params_copy
692

    
693

    
694
def _ReleaseLocks(lu, level, names=None, keep=None):
695
  """Releases locks owned by an LU.
696

697
  @type lu: L{LogicalUnit}
698
  @param level: Lock level
699
  @type names: list or None
700
  @param names: Names of locks to release
701
  @type keep: list or None
702
  @param keep: Names of locks to retain
703

704
  """
705
  assert not (keep is not None and names is not None), \
706
         "Only one of the 'names' and the 'keep' parameters can be given"
707

    
708
  if names is not None:
709
    should_release = names.__contains__
710
  elif keep:
711
    should_release = lambda name: name not in keep
712
  else:
713
    should_release = None
714

    
715
  if should_release:
716
    retain = []
717
    release = []
718

    
719
    # Determine which locks to release
720
    for name in lu.owned_locks(level):
721
      if should_release(name):
722
        release.append(name)
723
      else:
724
        retain.append(name)
725

    
726
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
727

    
728
    # Release just some locks
729
    lu.glm.release(level, names=release)
730

    
731
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
732
  else:
733
    # Release everything
734
    lu.glm.release(level)
735

    
736
    assert not lu.glm.is_owned(level), "No locks should be owned"
737

    
738

    
739
def _MapInstanceDisksToNodes(instances):
740
  """Creates a map from (node, volume) to instance name.
741

742
  @type instances: list of L{objects.Instance}
743
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
744

745
  """
746
  return dict(((node, vol), inst.name)
747
              for inst in instances
748
              for (node, vols) in inst.MapLVsByNode().items()
749
              for vol in vols)
750

    
751

    
752
def _RunPostHook(lu, node_name):
753
  """Runs the post-hook for an opcode on a single node.
754

755
  """
756
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
757
  try:
758
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
759
  except:
760
    # pylint: disable=W0702
761
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
762

    
763

    
764
def _CheckOutputFields(static, dynamic, selected):
765
  """Checks whether all selected fields are valid.
766

767
  @type static: L{utils.FieldSet}
768
  @param static: static fields set
769
  @type dynamic: L{utils.FieldSet}
770
  @param dynamic: dynamic fields set
771

772
  """
773
  f = utils.FieldSet()
774
  f.Extend(static)
775
  f.Extend(dynamic)
776

    
777
  delta = f.NonMatching(selected)
778
  if delta:
779
    raise errors.OpPrereqError("Unknown output fields selected: %s"
780
                               % ",".join(delta), errors.ECODE_INVAL)
781

    
782

    
783
def _CheckGlobalHvParams(params):
784
  """Validates that given hypervisor params are not global ones.
785

786
  This will ensure that instances don't get customised versions of
787
  global params.
788

789
  """
790
  used_globals = constants.HVC_GLOBALS.intersection(params)
791
  if used_globals:
792
    msg = ("The following hypervisor parameters are global and cannot"
793
           " be customized at instance level, please modify them at"
794
           " cluster level: %s" % utils.CommaJoin(used_globals))
795
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
796

    
797

    
798
def _CheckNodeOnline(lu, node, msg=None):
799
  """Ensure that a given node is online.
800

801
  @param lu: the LU on behalf of which we make the check
802
  @param node: the node to check
803
  @param msg: if passed, should be a message to replace the default one
804
  @raise errors.OpPrereqError: if the node is offline
805

806
  """
807
  if msg is None:
808
    msg = "Can't use offline node"
809
  if lu.cfg.GetNodeInfo(node).offline:
810
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
811

    
812

    
813
def _CheckNodeNotDrained(lu, node):
814
  """Ensure that a given node is not drained.
815

816
  @param lu: the LU on behalf of which we make the check
817
  @param node: the node to check
818
  @raise errors.OpPrereqError: if the node is drained
819

820
  """
821
  if lu.cfg.GetNodeInfo(node).drained:
822
    raise errors.OpPrereqError("Can't use drained node %s" % node,
823
                               errors.ECODE_STATE)
824

    
825

    
826
def _CheckNodeVmCapable(lu, node):
827
  """Ensure that a given node is vm capable.
828

829
  @param lu: the LU on behalf of which we make the check
830
  @param node: the node to check
831
  @raise errors.OpPrereqError: if the node is not vm capable
832

833
  """
834
  if not lu.cfg.GetNodeInfo(node).vm_capable:
835
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
836
                               errors.ECODE_STATE)
837

    
838

    
839
def _CheckNodeHasOS(lu, node, os_name, force_variant):
840
  """Ensure that a node supports a given OS.
841

842
  @param lu: the LU on behalf of which we make the check
843
  @param node: the node to check
844
  @param os_name: the OS to query about
845
  @param force_variant: whether to ignore variant errors
846
  @raise errors.OpPrereqError: if the node is not supporting the OS
847

848
  """
849
  result = lu.rpc.call_os_get(node, os_name)
850
  result.Raise("OS '%s' not in supported OS list for node %s" %
851
               (os_name, node),
852
               prereq=True, ecode=errors.ECODE_INVAL)
853
  if not force_variant:
854
    _CheckOSVariant(result.payload, os_name)
855

    
856

    
857
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
858
  """Ensure that a node has the given secondary ip.
859

860
  @type lu: L{LogicalUnit}
861
  @param lu: the LU on behalf of which we make the check
862
  @type node: string
863
  @param node: the node to check
864
  @type secondary_ip: string
865
  @param secondary_ip: the ip to check
866
  @type prereq: boolean
867
  @param prereq: whether to throw a prerequisite or an execute error
868
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
869
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
870

871
  """
872
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
873
  result.Raise("Failure checking secondary ip on node %s" % node,
874
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
875
  if not result.payload:
876
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
877
           " please fix and re-run this command" % secondary_ip)
878
    if prereq:
879
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
880
    else:
881
      raise errors.OpExecError(msg)
882

    
883

    
884
def _GetClusterDomainSecret():
885
  """Reads the cluster domain secret.
886

887
  """
888
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
889
                               strict=True)
890

    
891

    
892
def _CheckInstanceDown(lu, instance, reason):
893
  """Ensure that an instance is not running."""
894
  if instance.admin_up:
895
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
896
                               (instance.name, reason), errors.ECODE_STATE)
897

    
898
  pnode = instance.primary_node
899
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
900
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
901
              prereq=True, ecode=errors.ECODE_ENVIRON)
902

    
903
  if instance.name in ins_l.payload:
904
    raise errors.OpPrereqError("Instance %s is running, %s" %
905
                               (instance.name, reason), errors.ECODE_STATE)
906

    
907

    
908
def _ExpandItemName(fn, name, kind):
909
  """Expand an item name.
910

911
  @param fn: the function to use for expansion
912
  @param name: requested item name
913
  @param kind: text description ('Node' or 'Instance')
914
  @return: the resolved (full) name
915
  @raise errors.OpPrereqError: if the item is not found
916

917
  """
918
  full_name = fn(name)
919
  if full_name is None:
920
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
921
                               errors.ECODE_NOENT)
922
  return full_name
923

    
924

    
925
def _ExpandNodeName(cfg, name):
926
  """Wrapper over L{_ExpandItemName} for nodes."""
927
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
928

    
929

    
930
def _ExpandInstanceName(cfg, name):
931
  """Wrapper over L{_ExpandItemName} for instance."""
932
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
933

    
934

    
935
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
936
                          memory, vcpus, nics, disk_template, disks,
937
                          bep, hvp, hypervisor_name, tags):
938
  """Builds instance related env variables for hooks
939

940
  This builds the hook environment from individual variables.
941

942
  @type name: string
943
  @param name: the name of the instance
944
  @type primary_node: string
945
  @param primary_node: the name of the instance's primary node
946
  @type secondary_nodes: list
947
  @param secondary_nodes: list of secondary nodes as strings
948
  @type os_type: string
949
  @param os_type: the name of the instance's OS
950
  @type status: boolean
951
  @param status: the should_run status of the instance
952
  @type memory: string
953
  @param memory: the memory size of the instance
954
  @type vcpus: string
955
  @param vcpus: the count of VCPUs the instance has
956
  @type nics: list
957
  @param nics: list of tuples (ip, mac, mode, link) representing
958
      the NICs the instance has
959
  @type disk_template: string
960
  @param disk_template: the disk template of the instance
961
  @type disks: list
962
  @param disks: the list of (size, mode) pairs
963
  @type bep: dict
964
  @param bep: the backend parameters for the instance
965
  @type hvp: dict
966
  @param hvp: the hypervisor parameters for the instance
967
  @type hypervisor_name: string
968
  @param hypervisor_name: the hypervisor for the instance
969
  @type tags: list
970
  @param tags: list of instance tags as strings
971
  @rtype: dict
972
  @return: the hook environment for this instance
973

974
  """
975
  if status:
976
    str_status = "up"
977
  else:
978
    str_status = "down"
979
  env = {
980
    "OP_TARGET": name,
981
    "INSTANCE_NAME": name,
982
    "INSTANCE_PRIMARY": primary_node,
983
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
984
    "INSTANCE_OS_TYPE": os_type,
985
    "INSTANCE_STATUS": str_status,
986
    "INSTANCE_MEMORY": memory,
987
    "INSTANCE_VCPUS": vcpus,
988
    "INSTANCE_DISK_TEMPLATE": disk_template,
989
    "INSTANCE_HYPERVISOR": hypervisor_name,
990
  }
991

    
992
  if nics:
993
    nic_count = len(nics)
994
    for idx, (ip, mac, mode, link) in enumerate(nics):
995
      if ip is None:
996
        ip = ""
997
      env["INSTANCE_NIC%d_IP" % idx] = ip
998
      env["INSTANCE_NIC%d_MAC" % idx] = mac
999
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1000
      env["INSTANCE_NIC%d_LINK" % idx] = link
1001
      if mode == constants.NIC_MODE_BRIDGED:
1002
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003
  else:
1004
    nic_count = 0
1005

    
1006
  env["INSTANCE_NIC_COUNT"] = nic_count
1007

    
1008
  if disks:
1009
    disk_count = len(disks)
1010
    for idx, (size, mode) in enumerate(disks):
1011
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1012
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1013
  else:
1014
    disk_count = 0
1015

    
1016
  env["INSTANCE_DISK_COUNT"] = disk_count
1017

    
1018
  if not tags:
1019
    tags = []
1020

    
1021
  env["INSTANCE_TAGS"] = " ".join(tags)
1022

    
1023
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024
    for key, value in source.items():
1025
      env["INSTANCE_%s_%s" % (kind, key)] = value
1026

    
1027
  return env
1028

    
1029

    
1030
def _NICListToTuple(lu, nics):
1031
  """Build a list of nic information tuples.
1032

1033
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034
  value in LUInstanceQueryData.
1035

1036
  @type lu:  L{LogicalUnit}
1037
  @param lu: the logical unit on whose behalf we execute
1038
  @type nics: list of L{objects.NIC}
1039
  @param nics: list of nics to convert to hooks tuples
1040

1041
  """
1042
  hooks_nics = []
1043
  cluster = lu.cfg.GetClusterInfo()
1044
  for nic in nics:
1045
    ip = nic.ip
1046
    mac = nic.mac
1047
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048
    mode = filled_params[constants.NIC_MODE]
1049
    link = filled_params[constants.NIC_LINK]
1050
    hooks_nics.append((ip, mac, mode, link))
1051
  return hooks_nics
1052

    
1053

    
1054
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055
  """Builds instance related env variables for hooks from an object.
1056

1057
  @type lu: L{LogicalUnit}
1058
  @param lu: the logical unit on whose behalf we execute
1059
  @type instance: L{objects.Instance}
1060
  @param instance: the instance for which we should build the
1061
      environment
1062
  @type override: dict
1063
  @param override: dictionary with key/values that will override
1064
      our values
1065
  @rtype: dict
1066
  @return: the hook environment dictionary
1067

1068
  """
1069
  cluster = lu.cfg.GetClusterInfo()
1070
  bep = cluster.FillBE(instance)
1071
  hvp = cluster.FillHV(instance)
1072
  args = {
1073
    "name": instance.name,
1074
    "primary_node": instance.primary_node,
1075
    "secondary_nodes": instance.secondary_nodes,
1076
    "os_type": instance.os,
1077
    "status": instance.admin_up,
1078
    "memory": bep[constants.BE_MEMORY],
1079
    "vcpus": bep[constants.BE_VCPUS],
1080
    "nics": _NICListToTuple(lu, instance.nics),
1081
    "disk_template": instance.disk_template,
1082
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083
    "bep": bep,
1084
    "hvp": hvp,
1085
    "hypervisor_name": instance.hypervisor,
1086
    "tags": instance.tags,
1087
  }
1088
  if override:
1089
    args.update(override)
1090
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1091

    
1092

    
1093
def _AdjustCandidatePool(lu, exceptions):
1094
  """Adjust the candidate pool after node operations.
1095

1096
  """
1097
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098
  if mod_list:
1099
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1100
               utils.CommaJoin(node.name for node in mod_list))
1101
    for name in mod_list:
1102
      lu.context.ReaddNode(name)
1103
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104
  if mc_now > mc_max:
1105
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106
               (mc_now, mc_max))
1107

    
1108

    
1109
def _DecideSelfPromotion(lu, exceptions=None):
1110
  """Decide whether I should promote myself as a master candidate.
1111

1112
  """
1113
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115
  # the new node will increase mc_max with one, so:
1116
  mc_should = min(mc_should + 1, cp_size)
1117
  return mc_now < mc_should
1118

    
1119

    
1120
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121
  """Check that the brigdes needed by a list of nics exist.
1122

1123
  """
1124
  cluster = lu.cfg.GetClusterInfo()
1125
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126
  brlist = [params[constants.NIC_LINK] for params in paramslist
1127
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128
  if brlist:
1129
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1130
    result.Raise("Error checking bridges on destination node '%s'" %
1131
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132

    
1133

    
1134
def _CheckInstanceBridgesExist(lu, instance, node=None):
1135
  """Check that the brigdes needed by an instance exist.
1136

1137
  """
1138
  if node is None:
1139
    node = instance.primary_node
1140
  _CheckNicsBridgesExist(lu, instance.nics, node)
1141

    
1142

    
1143
def _CheckOSVariant(os_obj, name):
1144
  """Check whether an OS name conforms to the os variants specification.
1145

1146
  @type os_obj: L{objects.OS}
1147
  @param os_obj: OS object to check
1148
  @type name: string
1149
  @param name: OS name passed by the user, to check for validity
1150

1151
  """
1152
  variant = objects.OS.GetVariant(name)
1153
  if not os_obj.supported_variants:
1154
    if variant:
1155
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156
                                 " passed)" % (os_obj.name, variant),
1157
                                 errors.ECODE_INVAL)
1158
    return
1159
  if not variant:
1160
    raise errors.OpPrereqError("OS name must include a variant",
1161
                               errors.ECODE_INVAL)
1162

    
1163
  if variant not in os_obj.supported_variants:
1164
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165

    
1166

    
1167
def _GetNodeInstancesInner(cfg, fn):
1168
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169

    
1170

    
1171
def _GetNodeInstances(cfg, node_name):
1172
  """Returns a list of all primary and secondary instances on a node.
1173

1174
  """
1175

    
1176
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177

    
1178

    
1179
def _GetNodePrimaryInstances(cfg, node_name):
1180
  """Returns primary instances on a node.
1181

1182
  """
1183
  return _GetNodeInstancesInner(cfg,
1184
                                lambda inst: node_name == inst.primary_node)
1185

    
1186

    
1187
def _GetNodeSecondaryInstances(cfg, node_name):
1188
  """Returns secondary instances on a node.
1189

1190
  """
1191
  return _GetNodeInstancesInner(cfg,
1192
                                lambda inst: node_name in inst.secondary_nodes)
1193

    
1194

    
1195
def _GetStorageTypeArgs(cfg, storage_type):
1196
  """Returns the arguments for a storage type.
1197

1198
  """
1199
  # Special case for file storage
1200
  if storage_type == constants.ST_FILE:
1201
    # storage.FileStorage wants a list of storage directories
1202
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203

    
1204
  return []
1205

    
1206

    
1207
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208
  faulty = []
1209

    
1210
  for dev in instance.disks:
1211
    cfg.SetDiskID(dev, node_name)
1212

    
1213
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214
  result.Raise("Failed to get disk status from node %s" % node_name,
1215
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216

    
1217
  for idx, bdev_status in enumerate(result.payload):
1218
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219
      faulty.append(idx)
1220

    
1221
  return faulty
1222

    
1223

    
1224
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225
  """Check the sanity of iallocator and node arguments and use the
1226
  cluster-wide iallocator if appropriate.
1227

1228
  Check that at most one of (iallocator, node) is specified. If none is
1229
  specified, then the LU's opcode's iallocator slot is filled with the
1230
  cluster-wide default iallocator.
1231

1232
  @type iallocator_slot: string
1233
  @param iallocator_slot: the name of the opcode iallocator slot
1234
  @type node_slot: string
1235
  @param node_slot: the name of the opcode target node slot
1236

1237
  """
1238
  node = getattr(lu.op, node_slot, None)
1239
  iallocator = getattr(lu.op, iallocator_slot, None)
1240

    
1241
  if node is not None and iallocator is not None:
1242
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243
                               errors.ECODE_INVAL)
1244
  elif node is None and iallocator is None:
1245
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1246
    if default_iallocator:
1247
      setattr(lu.op, iallocator_slot, default_iallocator)
1248
    else:
1249
      raise errors.OpPrereqError("No iallocator or node given and no"
1250
                                 " cluster-wide default iallocator found;"
1251
                                 " please specify either an iallocator or a"
1252
                                 " node, or set a cluster-wide default"
1253
                                 " iallocator")
1254

    
1255

    
1256
def _GetDefaultIAllocator(cfg, iallocator):
1257
  """Decides on which iallocator to use.
1258

1259
  @type cfg: L{config.ConfigWriter}
1260
  @param cfg: Cluster configuration object
1261
  @type iallocator: string or None
1262
  @param iallocator: Iallocator specified in opcode
1263
  @rtype: string
1264
  @return: Iallocator name
1265

1266
  """
1267
  if not iallocator:
1268
    # Use default iallocator
1269
    iallocator = cfg.GetDefaultIAllocator()
1270

    
1271
  if not iallocator:
1272
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273
                               " opcode nor as a cluster-wide default",
1274
                               errors.ECODE_INVAL)
1275

    
1276
  return iallocator
1277

    
1278

    
1279
class LUClusterPostInit(LogicalUnit):
1280
  """Logical unit for running hooks after cluster initialization.
1281

1282
  """
1283
  HPATH = "cluster-init"
1284
  HTYPE = constants.HTYPE_CLUSTER
1285

    
1286
  def BuildHooksEnv(self):
1287
    """Build hooks env.
1288

1289
    """
1290
    return {
1291
      "OP_TARGET": self.cfg.GetClusterName(),
1292
      }
1293

    
1294
  def BuildHooksNodes(self):
1295
    """Build hooks nodes.
1296

1297
    """
1298
    return ([], [self.cfg.GetMasterNode()])
1299

    
1300
  def Exec(self, feedback_fn):
1301
    """Nothing to do.
1302

1303
    """
1304
    return True
1305

    
1306

    
1307
class LUClusterDestroy(LogicalUnit):
1308
  """Logical unit for destroying the cluster.
1309

1310
  """
1311
  HPATH = "cluster-destroy"
1312
  HTYPE = constants.HTYPE_CLUSTER
1313

    
1314
  def BuildHooksEnv(self):
1315
    """Build hooks env.
1316

1317
    """
1318
    return {
1319
      "OP_TARGET": self.cfg.GetClusterName(),
1320
      }
1321

    
1322
  def BuildHooksNodes(self):
1323
    """Build hooks nodes.
1324

1325
    """
1326
    return ([], [])
1327

    
1328
  def CheckPrereq(self):
1329
    """Check prerequisites.
1330

1331
    This checks whether the cluster is empty.
1332

1333
    Any errors are signaled by raising errors.OpPrereqError.
1334

1335
    """
1336
    master = self.cfg.GetMasterNode()
1337

    
1338
    nodelist = self.cfg.GetNodeList()
1339
    if len(nodelist) != 1 or nodelist[0] != master:
1340
      raise errors.OpPrereqError("There are still %d node(s) in"
1341
                                 " this cluster." % (len(nodelist) - 1),
1342
                                 errors.ECODE_INVAL)
1343
    instancelist = self.cfg.GetInstanceList()
1344
    if instancelist:
1345
      raise errors.OpPrereqError("There are still %d instance(s) in"
1346
                                 " this cluster." % len(instancelist),
1347
                                 errors.ECODE_INVAL)
1348

    
1349
  def Exec(self, feedback_fn):
1350
    """Destroys the cluster.
1351

1352
    """
1353
    master = self.cfg.GetMasterNode()
1354

    
1355
    # Run post hooks on master node before it's removed
1356
    _RunPostHook(self, master)
1357

    
1358
    result = self.rpc.call_node_deactivate_master_ip(master)
1359
    result.Raise("Could not disable the master role")
1360

    
1361
    return master
1362

    
1363

    
1364
def _VerifyCertificate(filename):
1365
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1366

1367
  @type filename: string
1368
  @param filename: Path to PEM file
1369

1370
  """
1371
  try:
1372
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373
                                           utils.ReadFile(filename))
1374
  except Exception, err: # pylint: disable=W0703
1375
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1376
            "Failed to load X509 certificate %s: %s" % (filename, err))
1377

    
1378
  (errcode, msg) = \
1379
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380
                                constants.SSL_CERT_EXPIRATION_ERROR)
1381

    
1382
  if msg:
1383
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1384
  else:
1385
    fnamemsg = None
1386

    
1387
  if errcode is None:
1388
    return (None, fnamemsg)
1389
  elif errcode == utils.CERT_WARNING:
1390
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391
  elif errcode == utils.CERT_ERROR:
1392
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393

    
1394
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395

    
1396

    
1397
def _GetAllHypervisorParameters(cluster, instances):
1398
  """Compute the set of all hypervisor parameters.
1399

1400
  @type cluster: L{objects.Cluster}
1401
  @param cluster: the cluster object
1402
  @param instances: list of L{objects.Instance}
1403
  @param instances: additional instances from which to obtain parameters
1404
  @rtype: list of (origin, hypervisor, parameters)
1405
  @return: a list with all parameters found, indicating the hypervisor they
1406
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407

1408
  """
1409
  hvp_data = []
1410

    
1411
  for hv_name in cluster.enabled_hypervisors:
1412
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413

    
1414
  for os_name, os_hvp in cluster.os_hvp.items():
1415
    for hv_name, hv_params in os_hvp.items():
1416
      if hv_params:
1417
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419

    
1420
  # TODO: collapse identical parameter values in a single one
1421
  for instance in instances:
1422
    if instance.hvparams:
1423
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424
                       cluster.FillHV(instance)))
1425

    
1426
  return hvp_data
1427

    
1428

    
1429
class _VerifyErrors(object):
1430
  """Mix-in for cluster/group verify LUs.
1431

1432
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433
  self.op and self._feedback_fn to be available.)
1434

1435
  """
1436
  TCLUSTER = "cluster"
1437
  TNODE = "node"
1438
  TINSTANCE = "instance"
1439

    
1440
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452
  ENODEDRBD = (TNODE, "ENODEDRBD")
1453
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456
  ENODEHV = (TNODE, "ENODEHV")
1457
  ENODELVM = (TNODE, "ENODELVM")
1458
  ENODEN1 = (TNODE, "ENODEN1")
1459
  ENODENET = (TNODE, "ENODENET")
1460
  ENODEOS = (TNODE, "ENODEOS")
1461
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463
  ENODERPC = (TNODE, "ENODERPC")
1464
  ENODESSH = (TNODE, "ENODESSH")
1465
  ENODEVERSION = (TNODE, "ENODEVERSION")
1466
  ENODESETUP = (TNODE, "ENODESETUP")
1467
  ENODETIME = (TNODE, "ENODETIME")
1468
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469

    
1470
  ETYPE_FIELD = "code"
1471
  ETYPE_ERROR = "ERROR"
1472
  ETYPE_WARNING = "WARNING"
1473

    
1474
  def _Error(self, ecode, item, msg, *args, **kwargs):
1475
    """Format an error message.
1476

1477
    Based on the opcode's error_codes parameter, either format a
1478
    parseable error code, or a simpler error string.
1479

1480
    This must be called only from Exec and functions called from Exec.
1481

1482
    """
1483
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484
    itype, etxt = ecode
1485
    # first complete the msg
1486
    if args:
1487
      msg = msg % args
1488
    # then format the whole message
1489
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1490
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491
    else:
1492
      if item:
1493
        item = " " + item
1494
      else:
1495
        item = ""
1496
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497
    # and finally report it via the feedback_fn
1498
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1499

    
1500
  def _ErrorIf(self, cond, *args, **kwargs):
1501
    """Log an error message if the passed condition is True.
1502

1503
    """
1504
    cond = (bool(cond)
1505
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1506
    if cond:
1507
      self._Error(*args, **kwargs)
1508
    # do not mark the operation as failed for WARN cases only
1509
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510
      self.bad = self.bad or cond
1511

    
1512

    
1513
class LUClusterVerify(NoHooksLU):
1514
  """Submits all jobs necessary to verify the cluster.
1515

1516
  """
1517
  REQ_BGL = False
1518

    
1519
  def ExpandNames(self):
1520
    self.needed_locks = {}
1521

    
1522
  def Exec(self, feedback_fn):
1523
    jobs = []
1524

    
1525
    if self.op.group_name:
1526
      groups = [self.op.group_name]
1527
      depends_fn = lambda: None
1528
    else:
1529
      groups = self.cfg.GetNodeGroupList()
1530

    
1531
      # Verify global configuration
1532
      jobs.append([opcodes.OpClusterVerifyConfig()])
1533

    
1534
      # Always depend on global verification
1535
      depends_fn = lambda: [(-len(jobs), [])]
1536

    
1537
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538
                                              depends=depends_fn())]
1539
                for group in groups)
1540

    
1541
    # Fix up all parameters
1542
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1543
      op.debug_simulate_errors = self.op.debug_simulate_errors
1544
      op.verbose = self.op.verbose
1545
      op.error_codes = self.op.error_codes
1546
      try:
1547
        op.skip_checks = self.op.skip_checks
1548
      except AttributeError:
1549
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1550

    
1551
    return ResultWithJobs(jobs)
1552

    
1553

    
1554
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555
  """Verifies the cluster config.
1556

1557
  """
1558
  REQ_BGL = True
1559

    
1560
  def _VerifyHVP(self, hvp_data):
1561
    """Verifies locally the syntax of the hypervisor parameters.
1562

1563
    """
1564
    for item, hv_name, hv_params in hvp_data:
1565
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1566
             (item, hv_name))
1567
      try:
1568
        hv_class = hypervisor.GetHypervisor(hv_name)
1569
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570
        hv_class.CheckParameterSyntax(hv_params)
1571
      except errors.GenericError, err:
1572
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1573

    
1574
  def ExpandNames(self):
1575
    # Information can be safely retrieved as the BGL is acquired in exclusive
1576
    # mode
1577
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1578
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1579
    self.all_node_info = self.cfg.GetAllNodesInfo()
1580
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1581
    self.needed_locks = {}
1582

    
1583
  def Exec(self, feedback_fn):
1584
    """Verify integrity of cluster, performing various test on nodes.
1585

1586
    """
1587
    self.bad = False
1588
    self._feedback_fn = feedback_fn
1589

    
1590
    feedback_fn("* Verifying cluster config")
1591

    
1592
    for msg in self.cfg.VerifyConfig():
1593
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1594

    
1595
    feedback_fn("* Verifying cluster certificate files")
1596

    
1597
    for cert_filename in constants.ALL_CERT_FILES:
1598
      (errcode, msg) = _VerifyCertificate(cert_filename)
1599
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1600

    
1601
    feedback_fn("* Verifying hypervisor parameters")
1602

    
1603
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1604
                                                self.all_inst_info.values()))
1605

    
1606
    feedback_fn("* Verifying all nodes belong to an existing group")
1607

    
1608
    # We do this verification here because, should this bogus circumstance
1609
    # occur, it would never be caught by VerifyGroup, which only acts on
1610
    # nodes/instances reachable from existing node groups.
1611

    
1612
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1613
                         if node.group not in self.all_group_info)
1614

    
1615
    dangling_instances = {}
1616
    no_node_instances = []
1617

    
1618
    for inst in self.all_inst_info.values():
1619
      if inst.primary_node in dangling_nodes:
1620
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1621
      elif inst.primary_node not in self.all_node_info:
1622
        no_node_instances.append(inst.name)
1623

    
1624
    pretty_dangling = [
1625
        "%s (%s)" %
1626
        (node.name,
1627
         utils.CommaJoin(dangling_instances.get(node.name,
1628
                                                ["no instances"])))
1629
        for node in dangling_nodes]
1630

    
1631
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1632
                  "the following nodes (and their instances) belong to a non"
1633
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1634

    
1635
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1636
                  "the following instances have a non-existing primary-node:"
1637
                  " %s", utils.CommaJoin(no_node_instances))
1638

    
1639
    return not self.bad
1640

    
1641

    
1642
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1643
  """Verifies the status of a node group.
1644

1645
  """
1646
  HPATH = "cluster-verify"
1647
  HTYPE = constants.HTYPE_CLUSTER
1648
  REQ_BGL = False
1649

    
1650
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1651

    
1652
  class NodeImage(object):
1653
    """A class representing the logical and physical status of a node.
1654

1655
    @type name: string
1656
    @ivar name: the node name to which this object refers
1657
    @ivar volumes: a structure as returned from
1658
        L{ganeti.backend.GetVolumeList} (runtime)
1659
    @ivar instances: a list of running instances (runtime)
1660
    @ivar pinst: list of configured primary instances (config)
1661
    @ivar sinst: list of configured secondary instances (config)
1662
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1663
        instances for which this node is secondary (config)
1664
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1665
    @ivar dfree: free disk, as reported by the node (runtime)
1666
    @ivar offline: the offline status (config)
1667
    @type rpc_fail: boolean
1668
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1669
        not whether the individual keys were correct) (runtime)
1670
    @type lvm_fail: boolean
1671
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1672
    @type hyp_fail: boolean
1673
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1674
    @type ghost: boolean
1675
    @ivar ghost: whether this is a known node or not (config)
1676
    @type os_fail: boolean
1677
    @ivar os_fail: whether the RPC call didn't return valid OS data
1678
    @type oslist: list
1679
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1680
    @type vm_capable: boolean
1681
    @ivar vm_capable: whether the node can host instances
1682

1683
    """
1684
    def __init__(self, offline=False, name=None, vm_capable=True):
1685
      self.name = name
1686
      self.volumes = {}
1687
      self.instances = []
1688
      self.pinst = []
1689
      self.sinst = []
1690
      self.sbp = {}
1691
      self.mfree = 0
1692
      self.dfree = 0
1693
      self.offline = offline
1694
      self.vm_capable = vm_capable
1695
      self.rpc_fail = False
1696
      self.lvm_fail = False
1697
      self.hyp_fail = False
1698
      self.ghost = False
1699
      self.os_fail = False
1700
      self.oslist = {}
1701

    
1702
  def ExpandNames(self):
1703
    # This raises errors.OpPrereqError on its own:
1704
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1705

    
1706
    # Get instances in node group; this is unsafe and needs verification later
1707
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1708

    
1709
    self.needed_locks = {
1710
      locking.LEVEL_INSTANCE: inst_names,
1711
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1712
      locking.LEVEL_NODE: [],
1713
      }
1714

    
1715
    self.share_locks = _ShareAll()
1716

    
1717
  def DeclareLocks(self, level):
1718
    if level == locking.LEVEL_NODE:
1719
      # Get members of node group; this is unsafe and needs verification later
1720
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1721

    
1722
      all_inst_info = self.cfg.GetAllInstancesInfo()
1723

    
1724
      # In Exec(), we warn about mirrored instances that have primary and
1725
      # secondary living in separate node groups. To fully verify that
1726
      # volumes for these instances are healthy, we will need to do an
1727
      # extra call to their secondaries. We ensure here those nodes will
1728
      # be locked.
1729
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1730
        # Important: access only the instances whose lock is owned
1731
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1732
          nodes.update(all_inst_info[inst].secondary_nodes)
1733

    
1734
      self.needed_locks[locking.LEVEL_NODE] = nodes
1735

    
1736
  def CheckPrereq(self):
1737
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1738
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1739

    
1740
    group_nodes = set(self.group_info.members)
1741
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1742

    
1743
    unlocked_nodes = \
1744
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1745

    
1746
    unlocked_instances = \
1747
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1748

    
1749
    if unlocked_nodes:
1750
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1751
                                 utils.CommaJoin(unlocked_nodes))
1752

    
1753
    if unlocked_instances:
1754
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1755
                                 utils.CommaJoin(unlocked_instances))
1756

    
1757
    self.all_node_info = self.cfg.GetAllNodesInfo()
1758
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1759

    
1760
    self.my_node_names = utils.NiceSort(group_nodes)
1761
    self.my_inst_names = utils.NiceSort(group_instances)
1762

    
1763
    self.my_node_info = dict((name, self.all_node_info[name])
1764
                             for name in self.my_node_names)
1765

    
1766
    self.my_inst_info = dict((name, self.all_inst_info[name])
1767
                             for name in self.my_inst_names)
1768

    
1769
    # We detect here the nodes that will need the extra RPC calls for verifying
1770
    # split LV volumes; they should be locked.
1771
    extra_lv_nodes = set()
1772

    
1773
    for inst in self.my_inst_info.values():
1774
      if inst.disk_template in constants.DTS_INT_MIRROR:
1775
        group = self.my_node_info[inst.primary_node].group
1776
        for nname in inst.secondary_nodes:
1777
          if self.all_node_info[nname].group != group:
1778
            extra_lv_nodes.add(nname)
1779

    
1780
    unlocked_lv_nodes = \
1781
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1782

    
1783
    if unlocked_lv_nodes:
1784
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1785
                                 utils.CommaJoin(unlocked_lv_nodes))
1786
    self.extra_lv_nodes = list(extra_lv_nodes)
1787

    
1788
  def _VerifyNode(self, ninfo, nresult):
1789
    """Perform some basic validation on data returned from a node.
1790

1791
      - check the result data structure is well formed and has all the
1792
        mandatory fields
1793
      - check ganeti version
1794

1795
    @type ninfo: L{objects.Node}
1796
    @param ninfo: the node to check
1797
    @param nresult: the results from the node
1798
    @rtype: boolean
1799
    @return: whether overall this call was successful (and we can expect
1800
         reasonable values in the respose)
1801

1802
    """
1803
    node = ninfo.name
1804
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1805

    
1806
    # main result, nresult should be a non-empty dict
1807
    test = not nresult or not isinstance(nresult, dict)
1808
    _ErrorIf(test, self.ENODERPC, node,
1809
                  "unable to verify node: no data returned")
1810
    if test:
1811
      return False
1812

    
1813
    # compares ganeti version
1814
    local_version = constants.PROTOCOL_VERSION
1815
    remote_version = nresult.get("version", None)
1816
    test = not (remote_version and
1817
                isinstance(remote_version, (list, tuple)) and
1818
                len(remote_version) == 2)
1819
    _ErrorIf(test, self.ENODERPC, node,
1820
             "connection to node returned invalid data")
1821
    if test:
1822
      return False
1823

    
1824
    test = local_version != remote_version[0]
1825
    _ErrorIf(test, self.ENODEVERSION, node,
1826
             "incompatible protocol versions: master %s,"
1827
             " node %s", local_version, remote_version[0])
1828
    if test:
1829
      return False
1830

    
1831
    # node seems compatible, we can actually try to look into its results
1832

    
1833
    # full package version
1834
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1835
                  self.ENODEVERSION, node,
1836
                  "software version mismatch: master %s, node %s",
1837
                  constants.RELEASE_VERSION, remote_version[1],
1838
                  code=self.ETYPE_WARNING)
1839

    
1840
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1841
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1842
      for hv_name, hv_result in hyp_result.iteritems():
1843
        test = hv_result is not None
1844
        _ErrorIf(test, self.ENODEHV, node,
1845
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1846

    
1847
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1848
    if ninfo.vm_capable and isinstance(hvp_result, list):
1849
      for item, hv_name, hv_result in hvp_result:
1850
        _ErrorIf(True, self.ENODEHV, node,
1851
                 "hypervisor %s parameter verify failure (source %s): %s",
1852
                 hv_name, item, hv_result)
1853

    
1854
    test = nresult.get(constants.NV_NODESETUP,
1855
                       ["Missing NODESETUP results"])
1856
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1857
             "; ".join(test))
1858

    
1859
    return True
1860

    
1861
  def _VerifyNodeTime(self, ninfo, nresult,
1862
                      nvinfo_starttime, nvinfo_endtime):
1863
    """Check the node time.
1864

1865
    @type ninfo: L{objects.Node}
1866
    @param ninfo: the node to check
1867
    @param nresult: the remote results for the node
1868
    @param nvinfo_starttime: the start time of the RPC call
1869
    @param nvinfo_endtime: the end time of the RPC call
1870

1871
    """
1872
    node = ninfo.name
1873
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1874

    
1875
    ntime = nresult.get(constants.NV_TIME, None)
1876
    try:
1877
      ntime_merged = utils.MergeTime(ntime)
1878
    except (ValueError, TypeError):
1879
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1880
      return
1881

    
1882
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1883
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1884
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1885
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1886
    else:
1887
      ntime_diff = None
1888

    
1889
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1890
             "Node time diverges by at least %s from master node time",
1891
             ntime_diff)
1892

    
1893
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1894
    """Check the node LVM results.
1895

1896
    @type ninfo: L{objects.Node}
1897
    @param ninfo: the node to check
1898
    @param nresult: the remote results for the node
1899
    @param vg_name: the configured VG name
1900

1901
    """
1902
    if vg_name is None:
1903
      return
1904

    
1905
    node = ninfo.name
1906
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1907

    
1908
    # checks vg existence and size > 20G
1909
    vglist = nresult.get(constants.NV_VGLIST, None)
1910
    test = not vglist
1911
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1912
    if not test:
1913
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1914
                                            constants.MIN_VG_SIZE)
1915
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1916

    
1917
    # check pv names
1918
    pvlist = nresult.get(constants.NV_PVLIST, None)
1919
    test = pvlist is None
1920
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1921
    if not test:
1922
      # check that ':' is not present in PV names, since it's a
1923
      # special character for lvcreate (denotes the range of PEs to
1924
      # use on the PV)
1925
      for _, pvname, owner_vg in pvlist:
1926
        test = ":" in pvname
1927
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1928
                 " '%s' of VG '%s'", pvname, owner_vg)
1929

    
1930
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1931
    """Check the node bridges.
1932

1933
    @type ninfo: L{objects.Node}
1934
    @param ninfo: the node to check
1935
    @param nresult: the remote results for the node
1936
    @param bridges: the expected list of bridges
1937

1938
    """
1939
    if not bridges:
1940
      return
1941

    
1942
    node = ninfo.name
1943
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1944

    
1945
    missing = nresult.get(constants.NV_BRIDGES, None)
1946
    test = not isinstance(missing, list)
1947
    _ErrorIf(test, self.ENODENET, node,
1948
             "did not return valid bridge information")
1949
    if not test:
1950
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1951
               utils.CommaJoin(sorted(missing)))
1952

    
1953
  def _VerifyNodeNetwork(self, ninfo, nresult):
1954
    """Check the node network connectivity results.
1955

1956
    @type ninfo: L{objects.Node}
1957
    @param ninfo: the node to check
1958
    @param nresult: the remote results for the node
1959

1960
    """
1961
    node = ninfo.name
1962
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1963

    
1964
    test = constants.NV_NODELIST not in nresult
1965
    _ErrorIf(test, self.ENODESSH, node,
1966
             "node hasn't returned node ssh connectivity data")
1967
    if not test:
1968
      if nresult[constants.NV_NODELIST]:
1969
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1970
          _ErrorIf(True, self.ENODESSH, node,
1971
                   "ssh communication with node '%s': %s", a_node, a_msg)
1972

    
1973
    test = constants.NV_NODENETTEST not in nresult
1974
    _ErrorIf(test, self.ENODENET, node,
1975
             "node hasn't returned node tcp connectivity data")
1976
    if not test:
1977
      if nresult[constants.NV_NODENETTEST]:
1978
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1979
        for anode in nlist:
1980
          _ErrorIf(True, self.ENODENET, node,
1981
                   "tcp communication with node '%s': %s",
1982
                   anode, nresult[constants.NV_NODENETTEST][anode])
1983

    
1984
    test = constants.NV_MASTERIP not in nresult
1985
    _ErrorIf(test, self.ENODENET, node,
1986
             "node hasn't returned node master IP reachability data")
1987
    if not test:
1988
      if not nresult[constants.NV_MASTERIP]:
1989
        if node == self.master_node:
1990
          msg = "the master node cannot reach the master IP (not configured?)"
1991
        else:
1992
          msg = "cannot reach the master IP"
1993
        _ErrorIf(True, self.ENODENET, node, msg)
1994

    
1995
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1996
                      diskstatus):
1997
    """Verify an instance.
1998

1999
    This function checks to see if the required block devices are
2000
    available on the instance's node.
2001

2002
    """
2003
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2004
    node_current = instanceconfig.primary_node
2005

    
2006
    node_vol_should = {}
2007
    instanceconfig.MapLVsByNode(node_vol_should)
2008

    
2009
    for node in node_vol_should:
2010
      n_img = node_image[node]
2011
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2012
        # ignore missing volumes on offline or broken nodes
2013
        continue
2014
      for volume in node_vol_should[node]:
2015
        test = volume not in n_img.volumes
2016
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2017
                 "volume %s missing on node %s", volume, node)
2018

    
2019
    if instanceconfig.admin_up:
2020
      pri_img = node_image[node_current]
2021
      test = instance not in pri_img.instances and not pri_img.offline
2022
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
2023
               "instance not running on its primary node %s",
2024
               node_current)
2025

    
2026
    diskdata = [(nname, success, status, idx)
2027
                for (nname, disks) in diskstatus.items()
2028
                for idx, (success, status) in enumerate(disks)]
2029

    
2030
    for nname, success, bdev_status, idx in diskdata:
2031
      # the 'ghost node' construction in Exec() ensures that we have a
2032
      # node here
2033
      snode = node_image[nname]
2034
      bad_snode = snode.ghost or snode.offline
2035
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2036
               self.EINSTANCEFAULTYDISK, instance,
2037
               "couldn't retrieve status for disk/%s on %s: %s",
2038
               idx, nname, bdev_status)
2039
      _ErrorIf((instanceconfig.admin_up and success and
2040
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2041
               self.EINSTANCEFAULTYDISK, instance,
2042
               "disk/%s on %s is faulty", idx, nname)
2043

    
2044
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2045
    """Verify if there are any unknown volumes in the cluster.
2046

2047
    The .os, .swap and backup volumes are ignored. All other volumes are
2048
    reported as unknown.
2049

2050
    @type reserved: L{ganeti.utils.FieldSet}
2051
    @param reserved: a FieldSet of reserved volume names
2052

2053
    """
2054
    for node, n_img in node_image.items():
2055
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2056
        # skip non-healthy nodes
2057
        continue
2058
      for volume in n_img.volumes:
2059
        test = ((node not in node_vol_should or
2060
                volume not in node_vol_should[node]) and
2061
                not reserved.Matches(volume))
2062
        self._ErrorIf(test, self.ENODEORPHANLV, node,
2063
                      "volume %s is unknown", volume)
2064

    
2065
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2066
    """Verify N+1 Memory Resilience.
2067

2068
    Check that if one single node dies we can still start all the
2069
    instances it was primary for.
2070

2071
    """
2072
    cluster_info = self.cfg.GetClusterInfo()
2073
    for node, n_img in node_image.items():
2074
      # This code checks that every node which is now listed as
2075
      # secondary has enough memory to host all instances it is
2076
      # supposed to should a single other node in the cluster fail.
2077
      # FIXME: not ready for failover to an arbitrary node
2078
      # FIXME: does not support file-backed instances
2079
      # WARNING: we currently take into account down instances as well
2080
      # as up ones, considering that even if they're down someone
2081
      # might want to start them even in the event of a node failure.
2082
      if n_img.offline:
2083
        # we're skipping offline nodes from the N+1 warning, since
2084
        # most likely we don't have good memory infromation from them;
2085
        # we already list instances living on such nodes, and that's
2086
        # enough warning
2087
        continue
2088
      for prinode, instances in n_img.sbp.items():
2089
        needed_mem = 0
2090
        for instance in instances:
2091
          bep = cluster_info.FillBE(instance_cfg[instance])
2092
          if bep[constants.BE_AUTO_BALANCE]:
2093
            needed_mem += bep[constants.BE_MEMORY]
2094
        test = n_img.mfree < needed_mem
2095
        self._ErrorIf(test, self.ENODEN1, node,
2096
                      "not enough memory to accomodate instance failovers"
2097
                      " should node %s fail (%dMiB needed, %dMiB available)",
2098
                      prinode, needed_mem, n_img.mfree)
2099

    
2100
  @classmethod
2101
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2102
                   (files_all, files_all_opt, files_mc, files_vm)):
2103
    """Verifies file checksums collected from all nodes.
2104

2105
    @param errorif: Callback for reporting errors
2106
    @param nodeinfo: List of L{objects.Node} objects
2107
    @param master_node: Name of master node
2108
    @param all_nvinfo: RPC results
2109

2110
    """
2111
    node_names = frozenset(node.name for node in nodeinfo if not node.offline)
2112

    
2113
    assert master_node in node_names
2114
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2115
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2116
           "Found file listed in more than one file list"
2117

    
2118
    # Define functions determining which nodes to consider for a file
2119
    file2nodefn = dict([(filename, fn)
2120
      for (files, fn) in [(files_all, None),
2121
                          (files_all_opt, None),
2122
                          (files_mc, lambda node: (node.master_candidate or
2123
                                                   node.name == master_node)),
2124
                          (files_vm, lambda node: node.vm_capable)]
2125
      for filename in files])
2126

    
2127
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
2128

    
2129
    for node in nodeinfo:
2130
      if node.offline:
2131
        continue
2132

    
2133
      nresult = all_nvinfo[node.name]
2134

    
2135
      if nresult.fail_msg or not nresult.payload:
2136
        node_files = None
2137
      else:
2138
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2139

    
2140
      test = not (node_files and isinstance(node_files, dict))
2141
      errorif(test, cls.ENODEFILECHECK, node.name,
2142
              "Node did not return file checksum data")
2143
      if test:
2144
        continue
2145

    
2146
      for (filename, checksum) in node_files.items():
2147
        # Check if the file should be considered for a node
2148
        fn = file2nodefn[filename]
2149
        if fn is None or fn(node):
2150
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
2151

    
2152
    for (filename, checksums) in fileinfo.items():
2153
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2154

    
2155
      # Nodes having the file
2156
      with_file = frozenset(node_name
2157
                            for nodes in fileinfo[filename].values()
2158
                            for node_name in nodes)
2159

    
2160
      # Nodes missing file
2161
      missing_file = node_names - with_file
2162

    
2163
      if filename in files_all_opt:
2164
        # All or no nodes
2165
        errorif(missing_file and missing_file != node_names,
2166
                cls.ECLUSTERFILECHECK, None,
2167
                "File %s is optional, but it must exist on all or no"
2168
                " nodes (not found on %s)",
2169
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2170
      else:
2171
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2172
                "File %s is missing from node(s) %s", filename,
2173
                utils.CommaJoin(utils.NiceSort(missing_file)))
2174

    
2175
      # See if there are multiple versions of the file
2176
      test = len(checksums) > 1
2177
      if test:
2178
        variants = ["variant %s on %s" %
2179
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2180
                    for (idx, (checksum, nodes)) in
2181
                      enumerate(sorted(checksums.items()))]
2182
      else:
2183
        variants = []
2184

    
2185
      errorif(test, cls.ECLUSTERFILECHECK, None,
2186
              "File %s found with %s different checksums (%s)",
2187
              filename, len(checksums), "; ".join(variants))
2188

    
2189
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2190
                      drbd_map):
2191
    """Verifies and the node DRBD status.
2192

2193
    @type ninfo: L{objects.Node}
2194
    @param ninfo: the node to check
2195
    @param nresult: the remote results for the node
2196
    @param instanceinfo: the dict of instances
2197
    @param drbd_helper: the configured DRBD usermode helper
2198
    @param drbd_map: the DRBD map as returned by
2199
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2200

2201
    """
2202
    node = ninfo.name
2203
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2204

    
2205
    if drbd_helper:
2206
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2207
      test = (helper_result == None)
2208
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2209
               "no drbd usermode helper returned")
2210
      if helper_result:
2211
        status, payload = helper_result
2212
        test = not status
2213
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2214
                 "drbd usermode helper check unsuccessful: %s", payload)
2215
        test = status and (payload != drbd_helper)
2216
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2217
                 "wrong drbd usermode helper: %s", payload)
2218

    
2219
    # compute the DRBD minors
2220
    node_drbd = {}
2221
    for minor, instance in drbd_map[node].items():
2222
      test = instance not in instanceinfo
2223
      _ErrorIf(test, self.ECLUSTERCFG, None,
2224
               "ghost instance '%s' in temporary DRBD map", instance)
2225
        # ghost instance should not be running, but otherwise we
2226
        # don't give double warnings (both ghost instance and
2227
        # unallocated minor in use)
2228
      if test:
2229
        node_drbd[minor] = (instance, False)
2230
      else:
2231
        instance = instanceinfo[instance]
2232
        node_drbd[minor] = (instance.name, instance.admin_up)
2233

    
2234
    # and now check them
2235
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2236
    test = not isinstance(used_minors, (tuple, list))
2237
    _ErrorIf(test, self.ENODEDRBD, node,
2238
             "cannot parse drbd status file: %s", str(used_minors))
2239
    if test:
2240
      # we cannot check drbd status
2241
      return
2242

    
2243
    for minor, (iname, must_exist) in node_drbd.items():
2244
      test = minor not in used_minors and must_exist
2245
      _ErrorIf(test, self.ENODEDRBD, node,
2246
               "drbd minor %d of instance %s is not active", minor, iname)
2247
    for minor in used_minors:
2248
      test = minor not in node_drbd
2249
      _ErrorIf(test, self.ENODEDRBD, node,
2250
               "unallocated drbd minor %d is in use", minor)
2251

    
2252
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2253
    """Builds the node OS structures.
2254

2255
    @type ninfo: L{objects.Node}
2256
    @param ninfo: the node to check
2257
    @param nresult: the remote results for the node
2258
    @param nimg: the node image object
2259

2260
    """
2261
    node = ninfo.name
2262
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2263

    
2264
    remote_os = nresult.get(constants.NV_OSLIST, None)
2265
    test = (not isinstance(remote_os, list) or
2266
            not compat.all(isinstance(v, list) and len(v) == 7
2267
                           for v in remote_os))
2268

    
2269
    _ErrorIf(test, self.ENODEOS, node,
2270
             "node hasn't returned valid OS data")
2271

    
2272
    nimg.os_fail = test
2273

    
2274
    if test:
2275
      return
2276

    
2277
    os_dict = {}
2278

    
2279
    for (name, os_path, status, diagnose,
2280
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2281

    
2282
      if name not in os_dict:
2283
        os_dict[name] = []
2284

    
2285
      # parameters is a list of lists instead of list of tuples due to
2286
      # JSON lacking a real tuple type, fix it:
2287
      parameters = [tuple(v) for v in parameters]
2288
      os_dict[name].append((os_path, status, diagnose,
2289
                            set(variants), set(parameters), set(api_ver)))
2290

    
2291
    nimg.oslist = os_dict
2292

    
2293
  def _VerifyNodeOS(self, ninfo, nimg, base):
2294
    """Verifies the node OS list.
2295

2296
    @type ninfo: L{objects.Node}
2297
    @param ninfo: the node to check
2298
    @param nimg: the node image object
2299
    @param base: the 'template' node we match against (e.g. from the master)
2300

2301
    """
2302
    node = ninfo.name
2303
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2304

    
2305
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2306

    
2307
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2308
    for os_name, os_data in nimg.oslist.items():
2309
      assert os_data, "Empty OS status for OS %s?!" % os_name
2310
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2311
      _ErrorIf(not f_status, self.ENODEOS, node,
2312
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2313
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2314
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2315
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2316
      # comparisons with the 'base' image
2317
      test = os_name not in base.oslist
2318
      _ErrorIf(test, self.ENODEOS, node,
2319
               "Extra OS %s not present on reference node (%s)",
2320
               os_name, base.name)
2321
      if test:
2322
        continue
2323
      assert base.oslist[os_name], "Base node has empty OS status?"
2324
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2325
      if not b_status:
2326
        # base OS is invalid, skipping
2327
        continue
2328
      for kind, a, b in [("API version", f_api, b_api),
2329
                         ("variants list", f_var, b_var),
2330
                         ("parameters", beautify_params(f_param),
2331
                          beautify_params(b_param))]:
2332
        _ErrorIf(a != b, self.ENODEOS, node,
2333
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2334
                 kind, os_name, base.name,
2335
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2336

    
2337
    # check any missing OSes
2338
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2339
    _ErrorIf(missing, self.ENODEOS, node,
2340
             "OSes present on reference node %s but missing on this node: %s",
2341
             base.name, utils.CommaJoin(missing))
2342

    
2343
  def _VerifyOob(self, ninfo, nresult):
2344
    """Verifies out of band functionality of a node.
2345

2346
    @type ninfo: L{objects.Node}
2347
    @param ninfo: the node to check
2348
    @param nresult: the remote results for the node
2349

2350
    """
2351
    node = ninfo.name
2352
    # We just have to verify the paths on master and/or master candidates
2353
    # as the oob helper is invoked on the master
2354
    if ((ninfo.master_candidate or ninfo.master_capable) and
2355
        constants.NV_OOB_PATHS in nresult):
2356
      for path_result in nresult[constants.NV_OOB_PATHS]:
2357
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2358

    
2359
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2360
    """Verifies and updates the node volume data.
2361

2362
    This function will update a L{NodeImage}'s internal structures
2363
    with data from the remote call.
2364

2365
    @type ninfo: L{objects.Node}
2366
    @param ninfo: the node to check
2367
    @param nresult: the remote results for the node
2368
    @param nimg: the node image object
2369
    @param vg_name: the configured VG name
2370

2371
    """
2372
    node = ninfo.name
2373
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2374

    
2375
    nimg.lvm_fail = True
2376
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2377
    if vg_name is None:
2378
      pass
2379
    elif isinstance(lvdata, basestring):
2380
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2381
               utils.SafeEncode(lvdata))
2382
    elif not isinstance(lvdata, dict):
2383
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2384
    else:
2385
      nimg.volumes = lvdata
2386
      nimg.lvm_fail = False
2387

    
2388
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2389
    """Verifies and updates the node instance list.
2390

2391
    If the listing was successful, then updates this node's instance
2392
    list. Otherwise, it marks the RPC call as failed for the instance
2393
    list key.
2394

2395
    @type ninfo: L{objects.Node}
2396
    @param ninfo: the node to check
2397
    @param nresult: the remote results for the node
2398
    @param nimg: the node image object
2399

2400
    """
2401
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2402
    test = not isinstance(idata, list)
2403
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2404
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2405
    if test:
2406
      nimg.hyp_fail = True
2407
    else:
2408
      nimg.instances = idata
2409

    
2410
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2411
    """Verifies and computes a node information map
2412

2413
    @type ninfo: L{objects.Node}
2414
    @param ninfo: the node to check
2415
    @param nresult: the remote results for the node
2416
    @param nimg: the node image object
2417
    @param vg_name: the configured VG name
2418

2419
    """
2420
    node = ninfo.name
2421
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2422

    
2423
    # try to read free memory (from the hypervisor)
2424
    hv_info = nresult.get(constants.NV_HVINFO, None)
2425
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2426
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2427
    if not test:
2428
      try:
2429
        nimg.mfree = int(hv_info["memory_free"])
2430
      except (ValueError, TypeError):
2431
        _ErrorIf(True, self.ENODERPC, node,
2432
                 "node returned invalid nodeinfo, check hypervisor")
2433

    
2434
    # FIXME: devise a free space model for file based instances as well
2435
    if vg_name is not None:
2436
      test = (constants.NV_VGLIST not in nresult or
2437
              vg_name not in nresult[constants.NV_VGLIST])
2438
      _ErrorIf(test, self.ENODELVM, node,
2439
               "node didn't return data for the volume group '%s'"
2440
               " - it is either missing or broken", vg_name)
2441
      if not test:
2442
        try:
2443
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2444
        except (ValueError, TypeError):
2445
          _ErrorIf(True, self.ENODERPC, node,
2446
                   "node returned invalid LVM info, check LVM status")
2447

    
2448
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2449
    """Gets per-disk status information for all instances.
2450

2451
    @type nodelist: list of strings
2452
    @param nodelist: Node names
2453
    @type node_image: dict of (name, L{objects.Node})
2454
    @param node_image: Node objects
2455
    @type instanceinfo: dict of (name, L{objects.Instance})
2456
    @param instanceinfo: Instance objects
2457
    @rtype: {instance: {node: [(succes, payload)]}}
2458
    @return: a dictionary of per-instance dictionaries with nodes as
2459
        keys and disk information as values; the disk information is a
2460
        list of tuples (success, payload)
2461

2462
    """
2463
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2464

    
2465
    node_disks = {}
2466
    node_disks_devonly = {}
2467
    diskless_instances = set()
2468
    diskless = constants.DT_DISKLESS
2469

    
2470
    for nname in nodelist:
2471
      node_instances = list(itertools.chain(node_image[nname].pinst,
2472
                                            node_image[nname].sinst))
2473
      diskless_instances.update(inst for inst in node_instances
2474
                                if instanceinfo[inst].disk_template == diskless)
2475
      disks = [(inst, disk)
2476
               for inst in node_instances
2477
               for disk in instanceinfo[inst].disks]
2478

    
2479
      if not disks:
2480
        # No need to collect data
2481
        continue
2482

    
2483
      node_disks[nname] = disks
2484

    
2485
      # Creating copies as SetDiskID below will modify the objects and that can
2486
      # lead to incorrect data returned from nodes
2487
      devonly = [dev.Copy() for (_, dev) in disks]
2488

    
2489
      for dev in devonly:
2490
        self.cfg.SetDiskID(dev, nname)
2491

    
2492
      node_disks_devonly[nname] = devonly
2493

    
2494
    assert len(node_disks) == len(node_disks_devonly)
2495

    
2496
    # Collect data from all nodes with disks
2497
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2498
                                                          node_disks_devonly)
2499

    
2500
    assert len(result) == len(node_disks)
2501

    
2502
    instdisk = {}
2503

    
2504
    for (nname, nres) in result.items():
2505
      disks = node_disks[nname]
2506

    
2507
      if nres.offline:
2508
        # No data from this node
2509
        data = len(disks) * [(False, "node offline")]
2510
      else:
2511
        msg = nres.fail_msg
2512
        _ErrorIf(msg, self.ENODERPC, nname,
2513
                 "while getting disk information: %s", msg)
2514
        if msg:
2515
          # No data from this node
2516
          data = len(disks) * [(False, msg)]
2517
        else:
2518
          data = []
2519
          for idx, i in enumerate(nres.payload):
2520
            if isinstance(i, (tuple, list)) and len(i) == 2:
2521
              data.append(i)
2522
            else:
2523
              logging.warning("Invalid result from node %s, entry %d: %s",
2524
                              nname, idx, i)
2525
              data.append((False, "Invalid result from the remote node"))
2526

    
2527
      for ((inst, _), status) in zip(disks, data):
2528
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2529

    
2530
    # Add empty entries for diskless instances.
2531
    for inst in diskless_instances:
2532
      assert inst not in instdisk
2533
      instdisk[inst] = {}
2534

    
2535
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2536
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2537
                      compat.all(isinstance(s, (tuple, list)) and
2538
                                 len(s) == 2 for s in statuses)
2539
                      for inst, nnames in instdisk.items()
2540
                      for nname, statuses in nnames.items())
2541
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2542

    
2543
    return instdisk
2544

    
2545
  def BuildHooksEnv(self):
2546
    """Build hooks env.
2547

2548
    Cluster-Verify hooks just ran in the post phase and their failure makes
2549
    the output be logged in the verify output and the verification to fail.
2550

2551
    """
2552
    env = {
2553
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2554
      }
2555

    
2556
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2557
               for node in self.my_node_info.values())
2558

    
2559
    return env
2560

    
2561
  def BuildHooksNodes(self):
2562
    """Build hooks nodes.
2563

2564
    """
2565
    return ([], self.my_node_names)
2566

    
2567
  def Exec(self, feedback_fn):
2568
    """Verify integrity of the node group, performing various test on nodes.
2569

2570
    """
2571
    # This method has too many local variables. pylint: disable=R0914
2572
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2573

    
2574
    if not self.my_node_names:
2575
      # empty node group
2576
      feedback_fn("* Empty node group, skipping verification")
2577
      return True
2578

    
2579
    self.bad = False
2580
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2581
    verbose = self.op.verbose
2582
    self._feedback_fn = feedback_fn
2583

    
2584
    vg_name = self.cfg.GetVGName()
2585
    drbd_helper = self.cfg.GetDRBDHelper()
2586
    cluster = self.cfg.GetClusterInfo()
2587
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2588
    hypervisors = cluster.enabled_hypervisors
2589
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2590

    
2591
    i_non_redundant = [] # Non redundant instances
2592
    i_non_a_balanced = [] # Non auto-balanced instances
2593
    n_offline = 0 # Count of offline nodes
2594
    n_drained = 0 # Count of nodes being drained
2595
    node_vol_should = {}
2596

    
2597
    # FIXME: verify OS list
2598

    
2599
    # File verification
2600
    filemap = _ComputeAncillaryFiles(cluster, False)
2601

    
2602
    # do local checksums
2603
    master_node = self.master_node = self.cfg.GetMasterNode()
2604
    master_ip = self.cfg.GetMasterIP()
2605

    
2606
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2607

    
2608
    # We will make nodes contact all nodes in their group, and one node from
2609
    # every other group.
2610
    # TODO: should it be a *random* node, different every time?
2611
    online_nodes = [node.name for node in node_data_list if not node.offline]
2612
    other_group_nodes = {}
2613

    
2614
    for name in sorted(self.all_node_info):
2615
      node = self.all_node_info[name]
2616
      if (node.group not in other_group_nodes
2617
          and node.group != self.group_uuid
2618
          and not node.offline):
2619
        other_group_nodes[node.group] = node.name
2620

    
2621
    node_verify_param = {
2622
      constants.NV_FILELIST:
2623
        utils.UniqueSequence(filename
2624
                             for files in filemap
2625
                             for filename in files),
2626
      constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2627
      constants.NV_HYPERVISOR: hypervisors,
2628
      constants.NV_HVPARAMS:
2629
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2630
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2631
                                 for node in node_data_list
2632
                                 if not node.offline],
2633
      constants.NV_INSTANCELIST: hypervisors,
2634
      constants.NV_VERSION: None,
2635
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2636
      constants.NV_NODESETUP: None,
2637
      constants.NV_TIME: None,
2638
      constants.NV_MASTERIP: (master_node, master_ip),
2639
      constants.NV_OSLIST: None,
2640
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2641
      }
2642

    
2643
    if vg_name is not None:
2644
      node_verify_param[constants.NV_VGLIST] = None
2645
      node_verify_param[constants.NV_LVLIST] = vg_name
2646
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2647
      node_verify_param[constants.NV_DRBDLIST] = None
2648

    
2649
    if drbd_helper:
2650
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2651

    
2652
    # bridge checks
2653
    # FIXME: this needs to be changed per node-group, not cluster-wide
2654
    bridges = set()
2655
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2656
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2657
      bridges.add(default_nicpp[constants.NIC_LINK])
2658
    for instance in self.my_inst_info.values():
2659
      for nic in instance.nics:
2660
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2661
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2662
          bridges.add(full_nic[constants.NIC_LINK])
2663

    
2664
    if bridges:
2665
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2666

    
2667
    # Build our expected cluster state
2668
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2669
                                                 name=node.name,
2670
                                                 vm_capable=node.vm_capable))
2671
                      for node in node_data_list)
2672

    
2673
    # Gather OOB paths
2674
    oob_paths = []
2675
    for node in self.all_node_info.values():
2676
      path = _SupportsOob(self.cfg, node)
2677
      if path and path not in oob_paths:
2678
        oob_paths.append(path)
2679

    
2680
    if oob_paths:
2681
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2682

    
2683
    for instance in self.my_inst_names:
2684
      inst_config = self.my_inst_info[instance]
2685

    
2686
      for nname in inst_config.all_nodes:
2687
        if nname not in node_image:
2688
          gnode = self.NodeImage(name=nname)
2689
          gnode.ghost = (nname not in self.all_node_info)
2690
          node_image[nname] = gnode
2691

    
2692
      inst_config.MapLVsByNode(node_vol_should)
2693

    
2694
      pnode = inst_config.primary_node
2695
      node_image[pnode].pinst.append(instance)
2696

    
2697
      for snode in inst_config.secondary_nodes:
2698
        nimg = node_image[snode]
2699
        nimg.sinst.append(instance)
2700
        if pnode not in nimg.sbp:
2701
          nimg.sbp[pnode] = []
2702
        nimg.sbp[pnode].append(instance)
2703

    
2704
    # At this point, we have the in-memory data structures complete,
2705
    # except for the runtime information, which we'll gather next
2706

    
2707
    # Due to the way our RPC system works, exact response times cannot be
2708
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2709
    # time before and after executing the request, we can at least have a time
2710
    # window.
2711
    nvinfo_starttime = time.time()
2712
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2713
                                           node_verify_param,
2714
                                           self.cfg.GetClusterName())
2715
    nvinfo_endtime = time.time()
2716

    
2717
    if self.extra_lv_nodes and vg_name is not None:
2718
      extra_lv_nvinfo = \
2719
          self.rpc.call_node_verify(self.extra_lv_nodes,
2720
                                    {constants.NV_LVLIST: vg_name},
2721
                                    self.cfg.GetClusterName())
2722
    else:
2723
      extra_lv_nvinfo = {}
2724

    
2725
    all_drbd_map = self.cfg.ComputeDRBDMap()
2726

    
2727
    feedback_fn("* Gathering disk information (%s nodes)" %
2728
                len(self.my_node_names))
2729
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2730
                                     self.my_inst_info)
2731

    
2732
    feedback_fn("* Verifying configuration file consistency")
2733

    
2734
    # If not all nodes are being checked, we need to make sure the master node
2735
    # and a non-checked vm_capable node are in the list.
2736
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2737
    if absent_nodes:
2738
      vf_nvinfo = all_nvinfo.copy()
2739
      vf_node_info = list(self.my_node_info.values())
2740
      additional_nodes = []
2741
      if master_node not in self.my_node_info:
2742
        additional_nodes.append(master_node)
2743
        vf_node_info.append(self.all_node_info[master_node])
2744
      # Add the first vm_capable node we find which is not included
2745
      for node in absent_nodes:
2746
        nodeinfo = self.all_node_info[node]
2747
        if nodeinfo.vm_capable and not nodeinfo.offline:
2748
          additional_nodes.append(node)
2749
          vf_node_info.append(self.all_node_info[node])
2750
          break
2751
      key = constants.NV_FILELIST
2752
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2753
                                                 {key: node_verify_param[key]},
2754
                                                 self.cfg.GetClusterName()))
2755
    else:
2756
      vf_nvinfo = all_nvinfo
2757
      vf_node_info = self.my_node_info.values()
2758

    
2759
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2760

    
2761
    feedback_fn("* Verifying node status")
2762

    
2763
    refos_img = None
2764

    
2765
    for node_i in node_data_list:
2766
      node = node_i.name
2767
      nimg = node_image[node]
2768

    
2769
      if node_i.offline:
2770
        if verbose:
2771
          feedback_fn("* Skipping offline node %s" % (node,))
2772
        n_offline += 1
2773
        continue
2774

    
2775
      if node == master_node:
2776
        ntype = "master"
2777
      elif node_i.master_candidate:
2778
        ntype = "master candidate"
2779
      elif node_i.drained:
2780
        ntype = "drained"
2781
        n_drained += 1
2782
      else:
2783
        ntype = "regular"
2784
      if verbose:
2785
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2786

    
2787
      msg = all_nvinfo[node].fail_msg
2788
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2789
      if msg:
2790
        nimg.rpc_fail = True
2791
        continue
2792

    
2793
      nresult = all_nvinfo[node].payload
2794

    
2795
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2796
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2797
      self._VerifyNodeNetwork(node_i, nresult)
2798
      self._VerifyOob(node_i, nresult)
2799

    
2800
      if nimg.vm_capable:
2801
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2802
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2803
                             all_drbd_map)
2804

    
2805
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2806
        self._UpdateNodeInstances(node_i, nresult, nimg)
2807
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2808
        self._UpdateNodeOS(node_i, nresult, nimg)
2809

    
2810
        if not nimg.os_fail:
2811
          if refos_img is None:
2812
            refos_img = nimg
2813
          self._VerifyNodeOS(node_i, nimg, refos_img)
2814
        self._VerifyNodeBridges(node_i, nresult, bridges)
2815

    
2816
        # Check whether all running instancies are primary for the node. (This
2817
        # can no longer be done from _VerifyInstance below, since some of the
2818
        # wrong instances could be from other node groups.)
2819
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2820

    
2821
        for inst in non_primary_inst:
2822
          test = inst in self.all_inst_info
2823
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2824
                   "instance should not run on node %s", node_i.name)
2825
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2826
                   "node is running unknown instance %s", inst)
2827

    
2828
    for node, result in extra_lv_nvinfo.items():
2829
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2830
                              node_image[node], vg_name)
2831

    
2832
    feedback_fn("* Verifying instance status")
2833
    for instance in self.my_inst_names:
2834
      if verbose:
2835
        feedback_fn("* Verifying instance %s" % instance)
2836
      inst_config = self.my_inst_info[instance]
2837
      self._VerifyInstance(instance, inst_config, node_image,
2838
                           instdisk[instance])
2839
      inst_nodes_offline = []
2840

    
2841
      pnode = inst_config.primary_node
2842
      pnode_img = node_image[pnode]
2843
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2844
               self.ENODERPC, pnode, "instance %s, connection to"
2845
               " primary node failed", instance)
2846

    
2847
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2848
               self.EINSTANCEBADNODE, instance,
2849
               "instance is marked as running and lives on offline node %s",
2850
               inst_config.primary_node)
2851

    
2852
      # If the instance is non-redundant we cannot survive losing its primary
2853
      # node, so we are not N+1 compliant. On the other hand we have no disk
2854
      # templates with more than one secondary so that situation is not well
2855
      # supported either.
2856
      # FIXME: does not support file-backed instances
2857
      if not inst_config.secondary_nodes:
2858
        i_non_redundant.append(instance)
2859

    
2860
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2861
               instance, "instance has multiple secondary nodes: %s",
2862
               utils.CommaJoin(inst_config.secondary_nodes),
2863
               code=self.ETYPE_WARNING)
2864

    
2865
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2866
        pnode = inst_config.primary_node
2867
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2868
        instance_groups = {}
2869

    
2870
        for node in instance_nodes:
2871
          instance_groups.setdefault(self.all_node_info[node].group,
2872
                                     []).append(node)
2873

    
2874
        pretty_list = [
2875
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2876
          # Sort so that we always list the primary node first.
2877
          for group, nodes in sorted(instance_groups.items(),
2878
                                     key=lambda (_, nodes): pnode in nodes,
2879
                                     reverse=True)]
2880

    
2881
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2882
                      instance, "instance has primary and secondary nodes in"
2883
                      " different groups: %s", utils.CommaJoin(pretty_list),
2884
                      code=self.ETYPE_WARNING)
2885

    
2886
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2887
        i_non_a_balanced.append(instance)
2888

    
2889
      for snode in inst_config.secondary_nodes:
2890
        s_img = node_image[snode]
2891
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2892
                 "instance %s, connection to secondary node failed", instance)
2893

    
2894
        if s_img.offline:
2895
          inst_nodes_offline.append(snode)
2896

    
2897
      # warn that the instance lives on offline nodes
2898
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2899
               "instance has offline secondary node(s) %s",
2900
               utils.CommaJoin(inst_nodes_offline))
2901
      # ... or ghost/non-vm_capable nodes
2902
      for node in inst_config.all_nodes:
2903
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2904
                 "instance lives on ghost node %s", node)
2905
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2906
                 instance, "instance lives on non-vm_capable node %s", node)
2907

    
2908
    feedback_fn("* Verifying orphan volumes")
2909
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2910

    
2911
    # We will get spurious "unknown volume" warnings if any node of this group
2912
    # is secondary for an instance whose primary is in another group. To avoid
2913
    # them, we find these instances and add their volumes to node_vol_should.
2914
    for inst in self.all_inst_info.values():
2915
      for secondary in inst.secondary_nodes:
2916
        if (secondary in self.my_node_info
2917
            and inst.name not in self.my_inst_info):
2918
          inst.MapLVsByNode(node_vol_should)
2919
          break
2920

    
2921
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2922

    
2923
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2924
      feedback_fn("* Verifying N+1 Memory redundancy")
2925
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2926

    
2927
    feedback_fn("* Other Notes")
2928
    if i_non_redundant:
2929
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2930
                  % len(i_non_redundant))
2931

    
2932
    if i_non_a_balanced:
2933
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2934
                  % len(i_non_a_balanced))
2935

    
2936
    if n_offline:
2937
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2938

    
2939
    if n_drained:
2940
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2941

    
2942
    return not self.bad
2943

    
2944
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2945
    """Analyze the post-hooks' result
2946

2947
    This method analyses the hook result, handles it, and sends some
2948
    nicely-formatted feedback back to the user.
2949

2950
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2951
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2952
    @param hooks_results: the results of the multi-node hooks rpc call
2953
    @param feedback_fn: function used send feedback back to the caller
2954
    @param lu_result: previous Exec result
2955
    @return: the new Exec result, based on the previous result
2956
        and hook results
2957

2958
    """
2959
    # We only really run POST phase hooks, only for non-empty groups,
2960
    # and are only interested in their results
2961
    if not self.my_node_names:
2962
      # empty node group
2963
      pass
2964
    elif phase == constants.HOOKS_PHASE_POST:
2965
      # Used to change hooks' output to proper indentation
2966
      feedback_fn("* Hooks Results")
2967
      assert hooks_results, "invalid result from hooks"
2968

    
2969
      for node_name in hooks_results:
2970
        res = hooks_results[node_name]
2971
        msg = res.fail_msg
2972
        test = msg and not res.offline
2973
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2974
                      "Communication failure in hooks execution: %s", msg)
2975
        if res.offline or msg:
2976
          # No need to investigate payload if node is offline or gave an error.
2977
          # override manually lu_result here as _ErrorIf only
2978
          # overrides self.bad
2979
          lu_result = 1
2980
          continue
2981
        for script, hkr, output in res.payload:
2982
          test = hkr == constants.HKR_FAIL
2983
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2984
                        "Script %s failed, output:", script)
2985
          if test:
2986
            output = self._HOOKS_INDENT_RE.sub("      ", output)
2987
            feedback_fn("%s" % output)
2988
            lu_result = 0
2989

    
2990
    return lu_result
2991

    
2992

    
2993
class LUClusterVerifyDisks(NoHooksLU):
2994
  """Verifies the cluster disks status.
2995

2996
  """
2997
  REQ_BGL = False
2998

    
2999
  def ExpandNames(self):
3000
    self.share_locks = _ShareAll()
3001
    self.needed_locks = {
3002
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3003
      }
3004

    
3005
  def Exec(self, feedback_fn):
3006
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3007

    
3008
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3009
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3010
                           for group in group_names])
3011

    
3012

    
3013
class LUGroupVerifyDisks(NoHooksLU):
3014
  """Verifies the status of all disks in a node group.
3015

3016
  """
3017
  REQ_BGL = False
3018

    
3019
  def ExpandNames(self):
3020
    # Raises errors.OpPrereqError on its own if group can't be found
3021
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3022

    
3023
    self.share_locks = _ShareAll()
3024
    self.needed_locks = {
3025
      locking.LEVEL_INSTANCE: [],
3026
      locking.LEVEL_NODEGROUP: [],
3027
      locking.LEVEL_NODE: [],
3028
      }
3029

    
3030
  def DeclareLocks(self, level):
3031
    if level == locking.LEVEL_INSTANCE:
3032
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3033

    
3034
      # Lock instances optimistically, needs verification once node and group
3035
      # locks have been acquired
3036
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3037
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3038

    
3039
    elif level == locking.LEVEL_NODEGROUP:
3040
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3041

    
3042
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3043
        set([self.group_uuid] +
3044
            # Lock all groups used by instances optimistically; this requires
3045
            # going via the node before it's locked, requiring verification
3046
            # later on
3047
            [group_uuid
3048
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3049
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3050

    
3051
    elif level == locking.LEVEL_NODE:
3052
      # This will only lock the nodes in the group to be verified which contain
3053
      # actual instances
3054
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3055
      self._LockInstancesNodes()
3056

    
3057
      # Lock all nodes in group to be verified
3058
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3059
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3060
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3061

    
3062
  def CheckPrereq(self):
3063
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3064
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3065
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3066

    
3067
    assert self.group_uuid in owned_groups
3068

    
3069
    # Check if locked instances are still correct
3070
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3071

    
3072
    # Get instance information
3073
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3074

    
3075
    # Check if node groups for locked instances are still correct
3076
    for (instance_name, inst) in self.instances.items():
3077
      assert owned_nodes.issuperset(inst.all_nodes), \
3078
        "Instance %s's nodes changed while we kept the lock" % instance_name
3079

    
3080
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3081
                                             owned_groups)
3082

    
3083
      assert self.group_uuid in inst_groups, \
3084
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3085

    
3086
  def Exec(self, feedback_fn):
3087
    """Verify integrity of cluster disks.
3088

3089
    @rtype: tuple of three items
3090
    @return: a tuple of (dict of node-to-node_error, list of instances
3091
        which need activate-disks, dict of instance: (node, volume) for
3092
        missing volumes
3093

3094
    """
3095
    res_nodes = {}
3096
    res_instances = set()
3097
    res_missing = {}
3098

    
3099
    nv_dict = _MapInstanceDisksToNodes([inst
3100
                                        for inst in self.instances.values()
3101
                                        if inst.admin_up])
3102

    
3103
    if nv_dict:
3104
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3105
                             set(self.cfg.GetVmCapableNodeList()))
3106

    
3107
      node_lvs = self.rpc.call_lv_list(nodes, [])
3108

    
3109
      for (node, node_res) in node_lvs.items():
3110
        if node_res.offline:
3111
          continue
3112

    
3113
        msg = node_res.fail_msg
3114
        if msg:
3115
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3116
          res_nodes[node] = msg
3117
          continue
3118

    
3119
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3120
          inst = nv_dict.pop((node, lv_name), None)
3121
          if not (lv_online or inst is None):
3122
            res_instances.add(inst)
3123

    
3124
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3125
      # better
3126
      for key, inst in nv_dict.iteritems():
3127
        res_missing.setdefault(inst, []).append(key)
3128

    
3129
    return (res_nodes, list(res_instances), res_missing)
3130

    
3131

    
3132
class LUClusterRepairDiskSizes(NoHooksLU):
3133
  """Verifies the cluster disks sizes.
3134

3135
  """
3136
  REQ_BGL = False
3137

    
3138
  def ExpandNames(self):
3139
    if self.op.instances:
3140
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3141
      self.needed_locks = {
3142
        locking.LEVEL_NODE: [],
3143
        locking.LEVEL_INSTANCE: self.wanted_names,
3144
        }
3145
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3146
    else:
3147
      self.wanted_names = None
3148
      self.needed_locks = {
3149
        locking.LEVEL_NODE: locking.ALL_SET,
3150
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3151
        }
3152
    self.share_locks = _ShareAll()
3153

    
3154
  def DeclareLocks(self, level):
3155
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3156
      self._LockInstancesNodes(primary_only=True)
3157

    
3158
  def CheckPrereq(self):
3159
    """Check prerequisites.
3160

3161
    This only checks the optional instance list against the existing names.
3162

3163
    """
3164
    if self.wanted_names is None:
3165
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3166

    
3167
    self.wanted_instances = \
3168
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3169

    
3170
  def _EnsureChildSizes(self, disk):
3171
    """Ensure children of the disk have the needed disk size.
3172

3173
    This is valid mainly for DRBD8 and fixes an issue where the
3174
    children have smaller disk size.
3175

3176
    @param disk: an L{ganeti.objects.Disk} object
3177

3178
    """
3179
    if disk.dev_type == constants.LD_DRBD8:
3180
      assert disk.children, "Empty children for DRBD8?"
3181
      fchild = disk.children[0]
3182
      mismatch = fchild.size < disk.size
3183
      if mismatch:
3184
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3185
                     fchild.size, disk.size)
3186
        fchild.size = disk.size
3187

    
3188
      # and we recurse on this child only, not on the metadev
3189
      return self._EnsureChildSizes(fchild) or mismatch
3190
    else:
3191
      return False
3192

    
3193
  def Exec(self, feedback_fn):
3194
    """Verify the size of cluster disks.
3195

3196
    """
3197
    # TODO: check child disks too
3198
    # TODO: check differences in size between primary/secondary nodes
3199
    per_node_disks = {}
3200
    for instance in self.wanted_instances:
3201
      pnode = instance.primary_node
3202
      if pnode not in per_node_disks:
3203
        per_node_disks[pnode] = []
3204
      for idx, disk in enumerate(instance.disks):
3205
        per_node_disks[pnode].append((instance, idx, disk))
3206

    
3207
    changed = []
3208
    for node, dskl in per_node_disks.items():
3209
      newl = [v[2].Copy() for v in dskl]
3210
      for dsk in newl:
3211
        self.cfg.SetDiskID(dsk, node)
3212
      result = self.rpc.call_blockdev_getsize(node, newl)
3213
      if result.fail_msg:
3214
        self.LogWarning("Failure in blockdev_getsize call to node"
3215
                        " %s, ignoring", node)
3216
        continue
3217
      if len(result.payload) != len(dskl):
3218
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3219
                        " result.payload=%s", node, len(dskl), result.payload)
3220
        self.LogWarning("Invalid result from node %s, ignoring node results",
3221
                        node)
3222
        continue
3223
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3224
        if size is None:
3225
          self.LogWarning("Disk %d of instance %s did not return size"
3226
                          " information, ignoring", idx, instance.name)
3227
          continue
3228
        if not isinstance(size, (int, long)):
3229
          self.LogWarning("Disk %d of instance %s did not return valid"
3230
                          " size information, ignoring", idx, instance.name)
3231
          continue
3232
        size = size >> 20
3233
        if size != disk.size:
3234
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3235
                       " correcting: recorded %d, actual %d", idx,
3236
                       instance.name, disk.size, size)
3237
          disk.size = size
3238
          self.cfg.Update(instance, feedback_fn)
3239
          changed.append((instance.name, idx, size))
3240
        if self._EnsureChildSizes(disk):
3241
          self.cfg.Update(instance, feedback_fn)
3242
          changed.append((instance.name, idx, disk.size))
3243
    return changed
3244

    
3245

    
3246
class LUClusterRename(LogicalUnit):
3247
  """Rename the cluster.
3248

3249
  """
3250
  HPATH = "cluster-rename"
3251
  HTYPE = constants.HTYPE_CLUSTER
3252

    
3253
  def BuildHooksEnv(self):
3254
    """Build hooks env.
3255

3256
    """
3257
    return {
3258
      "OP_TARGET": self.cfg.GetClusterName(),
3259
      "NEW_NAME": self.op.name,
3260
      }
3261

    
3262
  def BuildHooksNodes(self):
3263
    """Build hooks nodes.
3264

3265
    """
3266
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3267

    
3268
  def CheckPrereq(self):
3269
    """Verify that the passed name is a valid one.
3270

3271
    """
3272
    hostname = netutils.GetHostname(name=self.op.name,
3273
                                    family=self.cfg.GetPrimaryIPFamily())
3274

    
3275
    new_name = hostname.name
3276
    self.ip = new_ip = hostname.ip
3277
    old_name = self.cfg.GetClusterName()
3278
    old_ip = self.cfg.GetMasterIP()
3279
    if new_name == old_name and new_ip == old_ip:
3280
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3281
                                 " cluster has changed",
3282
                                 errors.ECODE_INVAL)
3283
    if new_ip != old_ip:
3284
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3285
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3286
                                   " reachable on the network" %
3287
                                   new_ip, errors.ECODE_NOTUNIQUE)
3288

    
3289
    self.op.name = new_name
3290

    
3291
  def Exec(self, feedback_fn):
3292
    """Rename the cluster.
3293

3294
    """
3295
    clustername = self.op.name
3296
    ip = self.ip
3297

    
3298
    # shutdown the master IP
3299
    master = self.cfg.GetMasterNode()
3300
    result = self.rpc.call_node_deactivate_master_ip(master)
3301
    result.Raise("Could not disable the master role")
3302

    
3303
    try:
3304
      cluster = self.cfg.GetClusterInfo()
3305
      cluster.cluster_name = clustername
3306
      cluster.master_ip = ip
3307
      self.cfg.Update(cluster, feedback_fn)
3308

    
3309
      # update the known hosts file
3310
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3311
      node_list = self.cfg.GetOnlineNodeList()
3312
      try:
3313
        node_list.remove(master)
3314
      except ValueError:
3315
        pass
3316
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3317
    finally:
3318
      result = self.rpc.call_node_activate_master_ip(master)
3319
      msg = result.fail_msg
3320
      if msg:
3321
        self.LogWarning("Could not re-enable the master role on"
3322
                        " the master, please restart manually: %s", msg)
3323

    
3324
    return clustername
3325

    
3326

    
3327
class LUClusterSetParams(LogicalUnit):
3328
  """Change the parameters of the cluster.
3329

3330
  """
3331
  HPATH = "cluster-modify"
3332
  HTYPE = constants.HTYPE_CLUSTER
3333
  REQ_BGL = False
3334

    
3335
  def CheckArguments(self):
3336
    """Check parameters
3337

3338
    """
3339
    if self.op.uid_pool:
3340
      uidpool.CheckUidPool(self.op.uid_pool)
3341

    
3342
    if self.op.add_uids:
3343
      uidpool.CheckUidPool(self.op.add_uids)
3344

    
3345
    if self.op.remove_uids:
3346
      uidpool.CheckUidPool(self.op.remove_uids)
3347

    
3348
  def ExpandNames(self):
3349
    # FIXME: in the future maybe other cluster params won't require checking on
3350
    # all nodes to be modified.
3351
    self.needed_locks = {
3352
      locking.LEVEL_NODE: locking.ALL_SET,
3353
    }
3354
    self.share_locks[locking.LEVEL_NODE] = 1
3355

    
3356
  def BuildHooksEnv(self):
3357
    """Build hooks env.
3358

3359
    """
3360
    return {
3361
      "OP_TARGET": self.cfg.GetClusterName(),
3362
      "NEW_VG_NAME": self.op.vg_name,
3363
      }
3364

    
3365
  def BuildHooksNodes(self):
3366
    """Build hooks nodes.
3367

3368
    """
3369
    mn = self.cfg.GetMasterNode()
3370
    return ([mn], [mn])
3371

    
3372
  def CheckPrereq(self):
3373
    """Check prerequisites.
3374

3375
    This checks whether the given params don't conflict and
3376
    if the given volume group is valid.
3377

3378
    """
3379
    if self.op.vg_name is not None and not self.op.vg_name:
3380
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3381
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3382
                                   " instances exist", errors.ECODE_INVAL)
3383

    
3384
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3385
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3386
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3387
                                   " drbd-based instances exist",
3388
                                   errors.ECODE_INVAL)
3389

    
3390
    node_list = self.owned_locks(locking.LEVEL_NODE)
3391

    
3392
    # if vg_name not None, checks given volume group on all nodes
3393
    if self.op.vg_name:
3394
      vglist = self.rpc.call_vg_list(node_list)
3395
      for node in node_list:
3396
        msg = vglist[node].fail_msg
3397
        if msg:
3398
          # ignoring down node
3399
          self.LogWarning("Error while gathering data on node %s"
3400
                          " (ignoring node): %s", node, msg)
3401
          continue
3402
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3403
                                              self.op.vg_name,
3404
                                              constants.MIN_VG_SIZE)
3405
        if vgstatus:
3406
          raise errors.OpPrereqError("Error on node '%s': %s" %
3407
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3408

    
3409
    if self.op.drbd_helper:
3410
      # checks given drbd helper on all nodes
3411
      helpers = self.rpc.call_drbd_helper(node_list)
3412
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3413
        if ninfo.offline:
3414
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3415
          continue
3416
        msg = helpers[node].fail_msg
3417
        if msg:
3418
          raise errors.OpPrereqError("Error checking drbd helper on node"
3419
                                     " '%s': %s" % (node, msg),
3420
                                     errors.ECODE_ENVIRON)
3421
        node_helper = helpers[node].payload
3422
        if node_helper != self.op.drbd_helper:
3423
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3424
                                     (node, node_helper), errors.ECODE_ENVIRON)
3425

    
3426
    self.cluster = cluster = self.cfg.GetClusterInfo()
3427
    # validate params changes
3428
    if self.op.beparams:
3429
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3430
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3431

    
3432
    if self.op.ndparams:
3433
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3434
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3435

    
3436
      # TODO: we need a more general way to handle resetting
3437
      # cluster-level parameters to default values
3438
      if self.new_ndparams["oob_program"] == "":
3439
        self.new_ndparams["oob_program"] = \
3440
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3441

    
3442
    if self.op.nicparams:
3443
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3444
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3445
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3446
      nic_errors = []
3447

    
3448
      # check all instances for consistency
3449
      for instance in self.cfg.GetAllInstancesInfo().values():
3450
        for nic_idx, nic in enumerate(instance.nics):
3451
          params_copy = copy.deepcopy(nic.nicparams)
3452
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3453

    
3454
          # check parameter syntax
3455
          try:
3456
            objects.NIC.CheckParameterSyntax(params_filled)
3457
          except errors.ConfigurationError, err:
3458
            nic_errors.append("Instance %s, nic/%d: %s" %
3459
                              (instance.name, nic_idx, err))
3460

    
3461
          # if we're moving instances to routed, check that they have an ip
3462
          target_mode = params_filled[constants.NIC_MODE]
3463
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3464
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3465
                              " address" % (instance.name, nic_idx))
3466
      if nic_errors:
3467
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3468
                                   "\n".join(nic_errors))
3469

    
3470
    # hypervisor list/parameters
3471
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3472
    if self.op.hvparams:
3473
      for hv_name, hv_dict in self.op.hvparams.items():
3474
        if hv_name not in self.new_hvparams:
3475
          self.new_hvparams[hv_name] = hv_dict
3476
        else:
3477
          self.new_hvparams[hv_name].update(hv_dict)
3478

    
3479
    # os hypervisor parameters
3480
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3481
    if self.op.os_hvp:
3482
      for os_name, hvs in self.op.os_hvp.items():
3483
        if os_name not in self.new_os_hvp:
3484
          self.new_os_hvp[os_name] = hvs
3485
        else:
3486
          for hv_name, hv_dict in hvs.items():
3487
            if hv_name not in self.new_os_hvp[os_name]:
3488
              self.new_os_hvp[os_name][hv_name] = hv_dict
3489
            else:
3490
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3491

    
3492
    # os parameters
3493
    self.new_osp = objects.FillDict(cluster.osparams, {})
3494
    if self.op.osparams:
3495
      for os_name, osp in self.op.osparams.items():
3496
        if os_name not in self.new_osp:
3497
          self.new_osp[os_name] = {}
3498

    
3499
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3500
                                                  use_none=True)
3501

    
3502
        if not self.new_osp[os_name]:
3503
          # we removed all parameters
3504
          del self.new_osp[os_name]
3505
        else:
3506
          # check the parameter validity (remote check)
3507
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3508
                         os_name, self.new_osp[os_name])
3509

    
3510
    # changes to the hypervisor list
3511
    if self.op.enabled_hypervisors is not None:
3512
      self.hv_list = self.op.enabled_hypervisors
3513
      for hv in self.hv_list:
3514
        # if the hypervisor doesn't already exist in the cluster
3515
        # hvparams, we initialize it to empty, and then (in both
3516
        # cases) we make sure to fill the defaults, as we might not
3517
        # have a complete defaults list if the hypervisor wasn't
3518
        # enabled before
3519
        if hv not in new_hvp:
3520
          new_hvp[hv] = {}
3521
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3522
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3523
    else:
3524
      self.hv_list = cluster.enabled_hypervisors
3525

    
3526
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3527
      # either the enabled list has changed, or the parameters have, validate
3528
      for hv_name, hv_params in self.new_hvparams.items():
3529
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3530
            (self.op.enabled_hypervisors and
3531
             hv_name in self.op.enabled_hypervisors)):
3532
          # either this is a new hypervisor, or its parameters have changed
3533
          hv_class = hypervisor.GetHypervisor(hv_name)
3534
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3535
          hv_class.CheckParameterSyntax(hv_params)
3536
          _CheckHVParams(self, node_list, hv_name, hv_params)
3537

    
3538
    if self.op.os_hvp:
3539
      # no need to check any newly-enabled hypervisors, since the
3540
      # defaults have already been checked in the above code-block
3541
      for os_name, os_hvp in self.new_os_hvp.items():
3542
        for hv_name, hv_params in os_hvp.items():
3543
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3544
          # we need to fill in the new os_hvp on top of the actual hv_p
3545
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3546
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3547
          hv_class = hypervisor.GetHypervisor(hv_name)
3548
          hv_class.CheckParameterSyntax(new_osp)
3549
          _CheckHVParams(self, node_list, hv_name, new_osp)
3550

    
3551
    if self.op.default_iallocator:
3552
      alloc_script = utils.FindFile(self.op.default_iallocator,
3553
                                    constants.IALLOCATOR_SEARCH_PATH,
3554
                                    os.path.isfile)
3555
      if alloc_script is None:
3556
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3557
                                   " specified" % self.op.default_iallocator,
3558
                                   errors.ECODE_INVAL)
3559

    
3560
  def Exec(self, feedback_fn):
3561
    """Change the parameters of the cluster.
3562

3563
    """
3564
    if self.op.vg_name is not None:
3565
      new_volume = self.op.vg_name
3566
      if not new_volume:
3567
        new_volume = None
3568
      if new_volume != self.cfg.GetVGName():
3569
        self.cfg.SetVGName(new_volume)
3570
      else:
3571
        feedback_fn("Cluster LVM configuration already in desired"
3572
                    " state, not changing")
3573
    if self.op.drbd_helper is not None:
3574
      new_helper = self.op.drbd_helper
3575
      if not new_helper:
3576
        new_helper = None
3577
      if new_helper != self.cfg.GetDRBDHelper():
3578
        self.cfg.SetDRBDHelper(new_helper)
3579
      else:
3580
        feedback_fn("Cluster DRBD helper already in desired state,"
3581
                    " not changing")
3582
    if self.op.hvparams:
3583
      self.cluster.hvparams = self.new_hvparams
3584
    if self.op.os_hvp:
3585
      self.cluster.os_hvp = self.new_os_hvp
3586
    if self.op.enabled_hypervisors is not None:
3587
      self.cluster.hvparams = self.new_hvparams
3588
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3589
    if self.op.beparams:
3590
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3591
    if self.op.nicparams:
3592
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3593
    if self.op.osparams:
3594
      self.cluster.osparams = self.new_osp
3595
    if self.op.ndparams:
3596
      self.cluster.ndparams = self.new_ndparams
3597

    
3598
    if self.op.candidate_pool_size is not None:
3599
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3600
      # we need to update the pool size here, otherwise the save will fail
3601
      _AdjustCandidatePool(self, [])
3602

    
3603
    if self.op.maintain_node_health is not None:
3604
      self.cluster.maintain_node_health = self.op.maintain_node_health
3605

    
3606
    if self.op.prealloc_wipe_disks is not None:
3607
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3608

    
3609
    if self.op.add_uids is not None:
3610
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3611

    
3612
    if self.op.remove_uids is not None:
3613
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3614

    
3615
    if self.op.uid_pool is not None:
3616
      self.cluster.uid_pool = self.op.uid_pool
3617

    
3618
    if self.op.default_iallocator is not None:
3619
      self.cluster.default_iallocator = self.op.default_iallocator
3620

    
3621
    if self.op.reserved_lvs is not None:
3622
      self.cluster.reserved_lvs = self.op.reserved_lvs
3623

    
3624
    def helper_os(aname, mods, desc):
3625
      desc += " OS list"
3626
      lst = getattr(self.cluster, aname)
3627
      for key, val in mods:
3628
        if key == constants.DDM_ADD:
3629
          if val in lst:
3630
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3631
          else:
3632
            lst.append(val)
3633
        elif key == constants.DDM_REMOVE:
3634
          if val in lst:
3635
            lst.remove(val)
3636
          else:
3637
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3638
        else:
3639
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3640

    
3641
    if self.op.hidden_os:
3642
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3643

    
3644
    if self.op.blacklisted_os:
3645
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3646

    
3647
    if self.op.master_netdev:
3648
      master = self.cfg.GetMasterNode()
3649
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3650
                  self.cluster.master_netdev)
3651
      result = self.rpc.call_node_deactivate_master_ip(master)
3652
      result.Raise("Could not disable the master ip")
3653
      feedback_fn("Changing master_netdev from %s to %s" %
3654
                  (self.cluster.master_netdev, self.op.master_netdev))
3655
      self.cluster.master_netdev = self.op.master_netdev
3656

    
3657
    self.cfg.Update(self.cluster, feedback_fn)
3658

    
3659
    if self.op.master_netdev:
3660
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3661
                  self.op.master_netdev)
3662
      result = self.rpc.call_node_activate_master_ip(master)
3663
      if result.fail_msg:
3664
        self.LogWarning("Could not re-enable the master ip on"
3665
                        " the master, please restart manually: %s",
3666
                        result.fail_msg)
3667

    
3668

    
3669
def _UploadHelper(lu, nodes, fname):
3670
  """Helper for uploading a file and showing warnings.
3671

3672
  """
3673
  if os.path.exists(fname):
3674
    result = lu.rpc.call_upload_file(nodes, fname)
3675
    for to_node, to_result in result.items():
3676
      msg = to_result.fail_msg
3677
      if msg:
3678
        msg = ("Copy of file %s to node %s failed: %s" %
3679
               (fname, to_node, msg))
3680
        lu.proc.LogWarning(msg)
3681

    
3682

    
3683
def _ComputeAncillaryFiles(cluster, redist):
3684
  """Compute files external to Ganeti which need to be consistent.
3685

3686
  @type redist: boolean
3687
  @param redist: Whether to include files which need to be redistributed
3688

3689
  """
3690
  # Compute files for all nodes
3691
  files_all = set([
3692
    constants.SSH_KNOWN_HOSTS_FILE,
3693
    constants.CONFD_HMAC_KEY,
3694
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3695
    ])
3696

    
3697
  if not redist:
3698
    files_all.update(constants.ALL_CERT_FILES)
3699
    files_all.update(ssconf.SimpleStore().GetFileList())
3700

    
3701
  if cluster.modify_etc_hosts:
3702
    files_all.add(constants.ETC_HOSTS)
3703

    
3704
  # Files which must either exist on all nodes or on none
3705
  files_all_opt = set([
3706
    constants.RAPI_USERS_FILE,
3707
    ])
3708

    
3709
  # Files which should only be on master candidates
3710
  files_mc = set()
3711
  if not redist:
3712
    files_mc.add(constants.CLUSTER_CONF_FILE)
3713

    
3714
  # Files which should only be on VM-capable nodes
3715
  files_vm = set(filename
3716
    for hv_name in cluster.enabled_hypervisors
3717
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3718

    
3719
  # Filenames must be unique
3720
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3721
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3722
         "Found file listed in more than one file list"
3723

    
3724
  return (files_all, files_all_opt, files_mc, files_vm)
3725

    
3726

    
3727
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3728
  """Distribute additional files which are part of the cluster configuration.
3729

3730
  ConfigWriter takes care of distributing the config and ssconf files, but
3731
  there are more files which should be distributed to all nodes. This function
3732
  makes sure those are copied.
3733

3734
  @param lu: calling logical unit
3735
  @param additional_nodes: list of nodes not in the config to distribute to
3736
  @type additional_vm: boolean
3737
  @param additional_vm: whether the additional nodes are vm-capable or not
3738

3739
  """
3740
  # Gather target nodes
3741
  cluster = lu.cfg.GetClusterInfo()
3742
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3743

    
3744
  online_nodes = lu.cfg.GetOnlineNodeList()
3745
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3746

    
3747
  if additional_nodes is not None:
3748
    online_nodes.extend(additional_nodes)
3749
    if additional_vm:
3750
      vm_nodes.extend(additional_nodes)
3751

    
3752
  # Never distribute to master node
3753
  for nodelist in [online_nodes, vm_nodes]:
3754
    if master_info.name in nodelist:
3755
      nodelist.remove(master_info.name)
3756

    
3757
  # Gather file lists
3758
  (files_all, files_all_opt, files_mc, files_vm) = \
3759
    _ComputeAncillaryFiles(cluster, True)
3760

    
3761
  # Never re-distribute configuration file from here
3762
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3763
              constants.CLUSTER_CONF_FILE in files_vm)
3764
  assert not files_mc, "Master candidates not handled in this function"
3765

    
3766
  filemap = [
3767
    (online_nodes, files_all),
3768
    (online_nodes, files_all_opt),
3769
    (vm_nodes, files_vm),
3770
    ]
3771

    
3772
  # Upload the files
3773
  for (node_list, files) in filemap:
3774
    for fname in files:
3775
      _UploadHelper(lu, node_list, fname)
3776

    
3777

    
3778
class LUClusterRedistConf(NoHooksLU):
3779
  """Force the redistribution of cluster configuration.
3780

3781
  This is a very simple LU.
3782

3783
  """
3784
  REQ_BGL = False
3785

    
3786
  def ExpandNames(self):
3787
    self.needed_locks = {
3788
      locking.LEVEL_NODE: locking.ALL_SET,
3789
    }
3790
    self.share_locks[locking.LEVEL_NODE] = 1
3791

    
3792
  def Exec(self, feedback_fn):
3793
    """Redistribute the configuration.
3794

3795
    """
3796
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3797
    _RedistributeAncillaryFiles(self)
3798

    
3799

    
3800
class LUClusterActivateMasterIp(NoHooksLU):
3801
  """Activate the master IP on the master node.
3802

3803
  """
3804
  def Exec(self, feedback_fn):
3805
    """Activate the master IP.
3806

3807
    """
3808
    master = self.cfg.GetMasterNode()
3809
    self.rpc.call_node_activate_master_ip(master)
3810

    
3811

    
3812
class LUClusterDeactivateMasterIp(NoHooksLU):
3813
  """Deactivate the master IP on the master node.
3814

3815
  """
3816
  def Exec(self, feedback_fn):
3817
    """Deactivate the master IP.
3818

3819
    """
3820
    master = self.cfg.GetMasterNode()
3821
    self.rpc.call_node_deactivate_master_ip(master)
3822

    
3823

    
3824
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3825
  """Sleep and poll for an instance's disk to sync.
3826

3827
  """
3828
  if not instance.disks or disks is not None and not disks:
3829
    return True
3830

    
3831
  disks = _ExpandCheckDisks(instance, disks)
3832

    
3833
  if not oneshot:
3834
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3835

    
3836
  node = instance.primary_node
3837

    
3838
  for dev in disks:
3839
    lu.cfg.SetDiskID(dev, node)
3840

    
3841
  # TODO: Convert to utils.Retry
3842

    
3843
  retries = 0
3844
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3845
  while True:
3846
    max_time = 0
3847
    done = True
3848
    cumul_degraded = False
3849
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3850
    msg = rstats.fail_msg
3851
    if msg:
3852
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3853
      retries += 1
3854
      if retries >= 10:
3855
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3856
                                 " aborting." % node)
3857
      time.sleep(6)
3858
      continue
3859
    rstats = rstats.payload
3860
    retries = 0
3861
    for i, mstat in enumerate(rstats):
3862
      if mstat is None:
3863
        lu.LogWarning("Can't compute data for node %s/%s",
3864
                           node, disks[i].iv_name)
3865
        continue
3866

    
3867
      cumul_degraded = (cumul_degraded or
3868
                        (mstat.is_degraded and mstat.sync_percent is None))
3869
      if mstat.sync_percent is not None:
3870
        done = False
3871
        if mstat.estimated_time is not None:
3872
          rem_time = ("%s remaining (estimated)" %
3873
                      utils.FormatSeconds(mstat.estimated_time))
3874
          max_time = mstat.estimated_time
3875
        else:
3876
          rem_time = "no time estimate"
3877
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3878
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3879

    
3880
    # if we're done but degraded, let's do a few small retries, to
3881
    # make sure we see a stable and not transient situation; therefore
3882
    # we force restart of the loop
3883
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3884
      logging.info("Degraded disks found, %d retries left", degr_retries)
3885
      degr_retries -= 1
3886
      time.sleep(1)
3887
      continue
3888

    
3889
    if done or oneshot:
3890
      break
3891

    
3892
    time.sleep(min(60, max_time))
3893

    
3894
  if done:
3895
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3896
  return not cumul_degraded
3897

    
3898

    
3899
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3900
  """Check that mirrors are not degraded.
3901

3902
  The ldisk parameter, if True, will change the test from the
3903
  is_degraded attribute (which represents overall non-ok status for
3904
  the device(s)) to the ldisk (representing the local storage status).
3905

3906
  """
3907
  lu.cfg.SetDiskID(dev, node)
3908

    
3909
  result = True
3910

    
3911
  if on_primary or dev.AssembleOnSecondary():
3912
    rstats = lu.rpc.call_blockdev_find(node, dev)
3913
    msg = rstats.fail_msg
3914
    if msg:
3915
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3916
      result = False
3917
    elif not rstats.payload:
3918
      lu.LogWarning("Can't find disk on node %s", node)
3919
      result = False
3920
    else:
3921
      if ldisk:
3922
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3923
      else:
3924
        result = result and not rstats.payload.is_degraded
3925

    
3926
  if dev.children:
3927
    for child in dev.children:
3928
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3929

    
3930
  return result
3931

    
3932

    
3933
class LUOobCommand(NoHooksLU):
3934
  """Logical unit for OOB handling.
3935

3936
  """
3937
  REG_BGL = False
3938
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3939

    
3940
  def ExpandNames(self):
3941
    """Gather locks we need.
3942

3943
    """
3944
    if self.op.node_names:
3945
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3946
      lock_names = self.op.node_names
3947
    else:
3948
      lock_names = locking.ALL_SET
3949

    
3950
    self.needed_locks = {
3951
      locking.LEVEL_NODE: lock_names,
3952
      }
3953

    
3954
  def CheckPrereq(self):
3955
    """Check prerequisites.
3956

3957
    This checks:
3958
     - the node exists in the configuration
3959
     - OOB is supported
3960

3961
    Any errors are signaled by raising errors.OpPrereqError.
3962

3963
    """
3964
    self.nodes = []
3965
    self.master_node = self.cfg.GetMasterNode()
3966

    
3967
    assert self.op.power_delay >= 0.0
3968

    
3969
    if self.op.node_names:
3970
      if (self.op.command in self._SKIP_MASTER and
3971
          self.master_node in self.op.node_names):
3972
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3973
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3974

    
3975
        if master_oob_handler:
3976
          additional_text = ("run '%s %s %s' if you want to operate on the"
3977
                             " master regardless") % (master_oob_handler,
3978
                                                      self.op.command,
3979
                                                      self.master_node)
3980
        else:
3981
          additional_text = "it does not support out-of-band operations"
3982

    
3983
        raise errors.OpPrereqError(("Operating on the master node %s is not"
3984
                                    " allowed for %s; %s") %
3985
                                   (self.master_node, self.op.command,
3986
                                    additional_text), errors.ECODE_INVAL)
3987
    else:
3988
      self.op.node_names = self.cfg.GetNodeList()
3989
      if self.op.command in self._SKIP_MASTER:
3990
        self.op.node_names.remove(self.master_node)
3991

    
3992
    if self.op.command in self._SKIP_MASTER:
3993
      assert self.master_node not in self.op.node_names
3994

    
3995
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
3996
      if node is None:
3997
        raise errors.OpPrereqError("Node %s not found" % node_name,
3998
                                   errors.ECODE_NOENT)
3999
      else:
4000
        self.nodes.append(node)
4001

    
4002
      if (not self.op.ignore_status and
4003
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4004
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4005
                                    " not marked offline") % node_name,
4006
                                   errors.ECODE_STATE)
4007

    
4008
  def Exec(self, feedback_fn):
4009
    """Execute OOB and return result if we expect any.
4010

4011
    """
4012
    master_node = self.master_node
4013
    ret = []
4014

    
4015
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4016
                                              key=lambda node: node.name)):
4017
      node_entry = [(constants.RS_NORMAL, node.name)]
4018
      ret.append(node_entry)
4019

    
4020
      oob_program = _SupportsOob(self.cfg, node)
4021

    
4022
      if not oob_program:
4023
        node_entry.append((constants.RS_UNAVAIL, None))
4024
        continue
4025

    
4026
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4027
                   self.op.command, oob_program, node.name)
4028
      result = self.rpc.call_run_oob(master_node, oob_program,
4029
                                     self.op.command, node.name,
4030
                                     self.op.timeout)
4031

    
4032
      if result.fail_msg:
4033
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4034
                        node.name, result.fail_msg)
4035
        node_entry.append((constants.RS_NODATA, None))
4036
      else:
4037
        try:
4038
          self._CheckPayload(result)
4039
        except errors.OpExecError, err:
4040
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4041
                          node.name, err)
4042
          node_entry.append((constants.RS_NODATA, None))
4043
        else:
4044
          if self.op.command == constants.OOB_HEALTH:
4045
            # For health we should log important events
4046
            for item, status in result.payload:
4047
              if status in [constants.OOB_STATUS_WARNING,
4048
                            constants.OOB_STATUS_CRITICAL]:
4049
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4050
                                item, node.name, status)
4051

    
4052
          if self.op.command == constants.OOB_POWER_ON:
4053
            node.powered = True
4054
          elif self.op.command == constants.OOB_POWER_OFF:
4055
            node.powered = False
4056
          elif self.op.command == constants.OOB_POWER_STATUS:
4057
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4058
            if powered != node.powered:
4059
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4060
                               " match actual power state (%s)"), node.powered,
4061
                              node.name, powered)
4062

    
4063
          # For configuration changing commands we should update the node
4064
          if self.op.command in (constants.OOB_POWER_ON,
4065
                                 constants.OOB_POWER_OFF):
4066
            self.cfg.Update(node, feedback_fn)
4067

    
4068
          node_entry.append((constants.RS_NORMAL, result.payload))
4069

    
4070
          if (self.op.command == constants.OOB_POWER_ON and
4071
              idx < len(self.nodes) - 1):
4072
            time.sleep(self.op.power_delay)
4073

    
4074
    return ret
4075

    
4076
  def _CheckPayload(self, result):
4077
    """Checks if the payload is valid.
4078

4079
    @param result: RPC result
4080
    @raises errors.OpExecError: If payload is not valid
4081

4082
    """
4083
    errs = []
4084
    if self.op.command == constants.OOB_HEALTH:
4085
      if not isinstance(result.payload, list):
4086
        errs.append("command 'health' is expected to return a list but got %s" %
4087
                    type(result.payload))
4088
      else:
4089
        for item, status in result.payload:
4090
          if status not in constants.OOB_STATUSES:
4091
            errs.append("health item '%s' has invalid status '%s'" %
4092
                        (item, status))
4093

    
4094
    if self.op.command == constants.OOB_POWER_STATUS:
4095
      if not isinstance(result.payload, dict):
4096
        errs.append("power-status is expected to return a dict but got %s" %
4097
                    type(result.payload))
4098

    
4099
    if self.op.command in [
4100
        constants.OOB_POWER_ON,
4101
        constants.OOB_POWER_OFF,
4102
        constants.OOB_POWER_CYCLE,
4103
        ]:
4104
      if result.payload is not None:
4105
        errs.append("%s is expected to not return payload but got '%s'" %
4106
                    (self.op.command, result.payload))
4107

    
4108
    if errs:
4109
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4110
                               utils.CommaJoin(errs))
4111

    
4112

    
4113
class _OsQuery(_QueryBase):
4114
  FIELDS = query.OS_FIELDS
4115

    
4116
  def ExpandNames(self, lu):
4117
    # Lock all nodes in shared mode
4118
    # Temporary removal of locks, should be reverted later
4119
    # TODO: reintroduce locks when they are lighter-weight
4120
    lu.needed_locks = {}
4121
    #self.share_locks[locking.LEVEL_NODE] = 1
4122
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4123

    
4124
    # The following variables interact with _QueryBase._GetNames
4125
    if self.names:
4126
      self.wanted = self.names
4127
    else:
4128
      self.wanted = locking.ALL_SET
4129

    
4130
    self.do_locking = self.use_locking
4131

    
4132
  def DeclareLocks(self, lu, level):
4133
    pass
4134

    
4135
  @staticmethod
4136
  def _DiagnoseByOS(rlist):
4137
    """Remaps a per-node return list into an a per-os per-node dictionary
4138

4139
    @param rlist: a map with node names as keys and OS objects as values
4140

4141
    @rtype: dict
4142
    @return: a dictionary with osnames as keys and as value another
4143
        map, with nodes as keys and tuples of (path, status, diagnose,
4144
        variants, parameters, api_versions) as values, eg::
4145

4146
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4147
                                     (/srv/..., False, "invalid api")],
4148
                           "node2": [(/srv/..., True, "", [], [])]}
4149
          }
4150

4151
    """
4152
    all_os = {}
4153
    # we build here the list of nodes that didn't fail the RPC (at RPC
4154
    # level), so that nodes with a non-responding node daemon don't
4155
    # make all OSes invalid
4156
    good_nodes = [node_name for node_name in rlist
4157
                  if not rlist[node_name].fail_msg]
4158
    for node_name, nr in rlist.items():
4159
      if nr.fail_msg or not nr.payload:
4160
        continue
4161
      for (name, path, status, diagnose, variants,
4162
           params, api_versions) in nr.payload:
4163
        if name not in all_os:
4164
          # build a list of nodes for this os containing empty lists
4165
          # for each node in node_list
4166
          all_os[name] = {}
4167
          for nname in good_nodes:
4168
            all_os[name][nname] = []
4169
        # convert params from [name, help] to (name, help)
4170
        params = [tuple(v) for v in params]
4171
        all_os[name][node_name].append((path, status, diagnose,
4172
                                        variants, params, api_versions))
4173
    return all_os
4174

    
4175
  def _GetQueryData(self, lu):
4176
    """Computes the list of nodes and their attributes.
4177

4178
    """
4179
    # Locking is not used
4180
    assert not (compat.any(lu.glm.is_owned(level)
4181
                           for level in locking.LEVELS
4182
                           if level != locking.LEVEL_CLUSTER) or
4183
                self.do_locking or self.use_locking)
4184

    
4185
    valid_nodes = [node.name
4186
                   for node in lu.cfg.GetAllNodesInfo().values()
4187
                   if not node.offline and node.vm_capable]
4188
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4189
    cluster = lu.cfg.GetClusterInfo()
4190

    
4191
    data = {}
4192

    
4193
    for (os_name, os_data) in pol.items():
4194
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4195
                          hidden=(os_name in cluster.hidden_os),
4196
                          blacklisted=(os_name in cluster.blacklisted_os))
4197

    
4198
      variants = set()
4199
      parameters = set()
4200
      api_versions = set()
4201

    
4202
      for idx, osl in enumerate(os_data.values()):
4203
        info.valid = bool(info.valid and osl and osl[0][1])
4204
        if not info.valid:
4205
          break
4206

    
4207
        (node_variants, node_params, node_api) = osl[0][3:6]
4208
        if idx == 0:
4209
          # First entry
4210
          variants.update(node_variants)
4211
          parameters.update(node_params)
4212
          api_versions.update(node_api)
4213
        else:
4214
          # Filter out inconsistent values
4215
          variants.intersection_update(node_variants)
4216
          parameters.intersection_update(node_params)
4217
          api_versions.intersection_update(node_api)
4218

    
4219
      info.variants = list(variants)
4220
      info.parameters = list(parameters)
4221
      info.api_versions = list(api_versions)
4222

    
4223
      data[os_name] = info
4224

    
4225
    # Prepare data in requested order
4226
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4227
            if name in data]
4228

    
4229

    
4230
class LUOsDiagnose(NoHooksLU):
4231
  """Logical unit for OS diagnose/query.
4232

4233
  """
4234
  REQ_BGL = False
4235

    
4236
  @staticmethod
4237
  def _BuildFilter(fields, names):
4238
    """Builds a filter for querying OSes.
4239

4240
    """
4241
    name_filter = qlang.MakeSimpleFilter("name", names)
4242

    
4243
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4244
    # respective field is not requested
4245
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4246
                     for fname in ["hidden", "blacklisted"]
4247
                     if fname not in fields]
4248
    if "valid" not in fields:
4249
      status_filter.append([qlang.OP_TRUE, "valid"])
4250

    
4251
    if status_filter:
4252
      status_filter.insert(0, qlang.OP_AND)
4253
    else:
4254
      status_filter = None
4255

    
4256
    if name_filter and status_filter:
4257
      return [qlang.OP_AND, name_filter, status_filter]
4258
    elif name_filter:
4259
      return name_filter
4260
    else:
4261
      return status_filter
4262

    
4263
  def CheckArguments(self):
4264
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4265
                       self.op.output_fields, False)
4266

    
4267
  def ExpandNames(self):
4268
    self.oq.ExpandNames(self)
4269

    
4270
  def Exec(self, feedback_fn):
4271
    return self.oq.OldStyleQuery(self)
4272

    
4273

    
4274
class LUNodeRemove(LogicalUnit):
4275
  """Logical unit for removing a node.
4276

4277
  """
4278
  HPATH = "node-remove"
4279
  HTYPE = constants.HTYPE_NODE
4280

    
4281
  def BuildHooksEnv(self):
4282
    """Build hooks env.
4283

4284
    This doesn't run on the target node in the pre phase as a failed
4285
    node would then be impossible to remove.
4286

4287
    """
4288
    return {
4289
      "OP_TARGET": self.op.node_name,
4290
      "NODE_NAME": self.op.node_name,
4291
      }
4292

    
4293
  def BuildHooksNodes(self):
4294
    """Build hooks nodes.
4295

4296
    """
4297
    all_nodes = self.cfg.GetNodeList()
4298
    try:
4299
      all_nodes.remove(self.op.node_name)
4300
    except ValueError:
4301
      logging.warning("Node '%s', which is about to be removed, was not found"
4302
                      " in the list of all nodes", self.op.node_name)
4303
    return (all_nodes, all_nodes)
4304

    
4305
  def CheckPrereq(self):
4306
    """Check prerequisites.
4307

4308
    This checks:
4309
     - the node exists in the configuration
4310
     - it does not have primary or secondary instances
4311
     - it's not the master
4312

4313
    Any errors are signaled by raising errors.OpPrereqError.
4314

4315
    """
4316
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4317
    node = self.cfg.GetNodeInfo(self.op.node_name)
4318
    assert node is not None
4319

    
4320
    masternode = self.cfg.GetMasterNode()
4321
    if node.name == masternode:
4322
      raise errors.OpPrereqError("Node is the master node, failover to another"
4323
                                 " node is required", errors.ECODE_INVAL)
4324

    
4325
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4326
      if node.name in instance.all_nodes:
4327
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4328
                                   " please remove first" % instance_name,
4329
                                   errors.ECODE_INVAL)
4330
    self.op.node_name = node.name
4331
    self.node = node
4332

    
4333
  def Exec(self, feedback_fn):
4334
    """Removes the node from the cluster.
4335

4336
    """
4337
    node = self.node
4338
    logging.info("Stopping the node daemon and removing configs from node %s",
4339
                 node.name)
4340

    
4341
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4342

    
4343
    # Promote nodes to master candidate as needed
4344
    _AdjustCandidatePool(self, exceptions=[node.name])
4345
    self.context.RemoveNode(node.name)
4346

    
4347
    # Run post hooks on the node before it's removed
4348
    _RunPostHook(self, node.name)
4349

    
4350
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4351
    msg = result.fail_msg
4352
    if msg:
4353
      self.LogWarning("Errors encountered on the remote node while leaving"
4354
                      " the cluster: %s", msg)
4355

    
4356
    # Remove node from our /etc/hosts
4357
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4358
      master_node = self.cfg.GetMasterNode()
4359
      result = self.rpc.call_etc_hosts_modify(master_node,
4360
                                              constants.ETC_HOSTS_REMOVE,
4361
                                              node.name, None)
4362
      result.Raise("Can't update hosts file with new host data")
4363
      _RedistributeAncillaryFiles(self)
4364

    
4365

    
4366
class _NodeQuery(_QueryBase):
4367
  FIELDS = query.NODE_FIELDS
4368

    
4369
  def ExpandNames(self, lu):
4370
    lu.needed_locks = {}
4371
    lu.share_locks = _ShareAll()
4372

    
4373
    if self.names:
4374
      self.wanted = _GetWantedNodes(lu, self.names)
4375
    else:
4376
      self.wanted = locking.ALL_SET
4377

    
4378
    self.do_locking = (self.use_locking and
4379
                       query.NQ_LIVE in self.requested_data)
4380

    
4381
    if self.do_locking:
4382
      # If any non-static field is requested we need to lock the nodes
4383
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4384

    
4385
  def DeclareLocks(self, lu, level):
4386
    pass
4387

    
4388
  def _GetQueryData(self, lu):
4389
    """Computes the list of nodes and their attributes.
4390

4391
    """
4392
    all_info = lu.cfg.GetAllNodesInfo()
4393

    
4394
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4395

    
4396
    # Gather data as requested
4397
    if query.NQ_LIVE in self.requested_data:
4398
      # filter out non-vm_capable nodes
4399
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4400

    
4401
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4402
                                        lu.cfg.GetHypervisorType())
4403
      live_data = dict((name, nresult.payload)
4404
                       for (name, nresult) in node_data.items()
4405
                       if not nresult.fail_msg and nresult.payload)
4406
    else:
4407
      live_data = None
4408

    
4409
    if query.NQ_INST in self.requested_data:
4410
      node_to_primary = dict([(name, set()) for name in nodenames])
4411
      node_to_secondary = dict([(name, set()) for name in nodenames])
4412

    
4413
      inst_data = lu.cfg.GetAllInstancesInfo()
4414

    
4415
      for inst in inst_data.values():
4416
        if inst.primary_node in node_to_primary:
4417
          node_to_primary[inst.primary_node].add(inst.name)
4418
        for secnode in inst.secondary_nodes:
4419
          if secnode in node_to_secondary:
4420
            node_to_secondary[secnode].add(inst.name)
4421
    else:
4422
      node_to_primary = None
4423
      node_to_secondary = None
4424

    
4425
    if query.NQ_OOB in self.requested_data:
4426
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4427
                         for name, node in all_info.iteritems())
4428
    else:
4429
      oob_support = None
4430

    
4431
    if query.NQ_GROUP in self.requested_data:
4432
      groups = lu.cfg.GetAllNodeGroupsInfo()
4433
    else:
4434
      groups = {}
4435

    
4436
    return query.NodeQueryData([all_info[name] for name in nodenames],
4437
                               live_data, lu.cfg.GetMasterNode(),
4438
                               node_to_primary, node_to_secondary, groups,
4439
                               oob_support, lu.cfg.GetClusterInfo())
4440

    
4441

    
4442
class LUNodeQuery(NoHooksLU):
4443
  """Logical unit for querying nodes.
4444

4445
  """
4446
  # pylint: disable=W0142
4447
  REQ_BGL = False
4448

    
4449
  def CheckArguments(self):
4450
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4451
                         self.op.output_fields, self.op.use_locking)
4452

    
4453
  def ExpandNames(self):
4454
    self.nq.ExpandNames(self)
4455

    
4456
  def Exec(self, feedback_fn):
4457
    return self.nq.OldStyleQuery(self)
4458

    
4459

    
4460
class LUNodeQueryvols(NoHooksLU):
4461
  """Logical unit for getting volumes on node(s).
4462

4463
  """
4464
  REQ_BGL = False
4465
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4466
  _FIELDS_STATIC = utils.FieldSet("node")
4467

    
4468
  def CheckArguments(self):
4469
    _CheckOutputFields(static=self._FIELDS_STATIC,
4470
                       dynamic=self._FIELDS_DYNAMIC,
4471
                       selected=self.op.output_fields)
4472

    
4473
  def ExpandNames(self):
4474
    self.needed_locks = {}
4475
    self.share_locks[locking.LEVEL_NODE] = 1
4476
    if not self.op.nodes:
4477
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4478
    else:
4479
      self.needed_locks[locking.LEVEL_NODE] = \
4480
        _GetWantedNodes(self, self.op.nodes)
4481

    
4482
  def Exec(self, feedback_fn):
4483
    """Computes the list of nodes and their attributes.
4484

4485
    """
4486
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4487
    volumes = self.rpc.call_node_volumes(nodenames)
4488

    
4489
    ilist = self.cfg.GetAllInstancesInfo()
4490
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4491

    
4492
    output = []
4493
    for node in nodenames:
4494
      nresult = volumes[node]
4495
      if nresult.offline:
4496
        continue
4497
      msg = nresult.fail_msg
4498
      if msg:
4499
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4500
        continue
4501

    
4502
      node_vols = sorted(nresult.payload,
4503
                         key=operator.itemgetter("dev"))
4504

    
4505
      for vol in node_vols:
4506
        node_output = []
4507
        for field in self.op.output_fields:
4508
          if field == "node":
4509
            val = node
4510
          elif field == "phys":
4511
            val = vol["dev"]
4512
          elif field == "vg":
4513
            val = vol["vg"]
4514
          elif field == "name":
4515
            val = vol["name"]
4516
          elif field == "size":
4517
            val = int(float(vol["size"]))
4518
          elif field == "instance":
4519
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4520
          else:
4521
            raise errors.ParameterError(field)
4522
          node_output.append(str(val))
4523

    
4524
        output.append(node_output)
4525

    
4526
    return output
4527

    
4528

    
4529
class LUNodeQueryStorage(NoHooksLU):
4530
  """Logical unit for getting information on storage units on node(s).
4531

4532
  """
4533
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4534
  REQ_BGL = False
4535

    
4536
  def CheckArguments(self):
4537
    _CheckOutputFields(static=self._FIELDS_STATIC,
4538
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4539
                       selected=self.op.output_fields)
4540

    
4541
  def ExpandNames(self):
4542
    self.needed_locks = {}
4543
    self.share_locks[locking.LEVEL_NODE] = 1
4544

    
4545
    if self.op.nodes:
4546
      self.needed_locks[locking.LEVEL_NODE] = \
4547
        _GetWantedNodes(self, self.op.nodes)
4548
    else:
4549
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4550

    
4551
  def Exec(self, feedback_fn):
4552
    """Computes the list of nodes and their attributes.
4553

4554
    """
4555
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4556

    
4557
    # Always get name to sort by
4558
    if constants.SF_NAME in self.op.output_fields:
4559
      fields = self.op.output_fields[:]
4560
    else:
4561
      fields = [constants.SF_NAME] + self.op.output_fields
4562

    
4563
    # Never ask for node or type as it's only known to the LU
4564
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4565
      while extra in fields:
4566
        fields.remove(extra)
4567

    
4568
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4569
    name_idx = field_idx[constants.SF_NAME]
4570

    
4571
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4572
    data = self.rpc.call_storage_list(self.nodes,
4573
                                      self.op.storage_type, st_args,
4574
                                      self.op.name, fields)
4575

    
4576
    result = []
4577

    
4578
    for node in utils.NiceSort(self.nodes):
4579
      nresult = data[node]
4580
      if nresult.offline:
4581
        continue
4582

    
4583
      msg = nresult.fail_msg
4584
      if msg:
4585
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4586
        continue
4587

    
4588
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4589

    
4590
      for name in utils.NiceSort(rows.keys()):
4591
        row = rows[name]
4592

    
4593
        out = []
4594

    
4595
        for field in self.op.output_fields:
4596
          if field == constants.SF_NODE:
4597
            val = node
4598
          elif field == constants.SF_TYPE:
4599
            val = self.op.storage_type
4600
          elif field in field_idx:
4601
            val = row[field_idx[field]]
4602
          else:
4603
            raise errors.ParameterError(field)
4604

    
4605
          out.append(val)
4606

    
4607
        result.append(out)
4608

    
4609
    return result
4610

    
4611

    
4612
class _InstanceQuery(_QueryBase):
4613
  FIELDS = query.INSTANCE_FIELDS
4614

    
4615
  def ExpandNames(self, lu):
4616
    lu.needed_locks = {}
4617
    lu.share_locks = _ShareAll()
4618

    
4619
    if self.names:
4620
      self.wanted = _GetWantedInstances(lu, self.names)
4621
    else:
4622
      self.wanted = locking.ALL_SET
4623

    
4624
    self.do_locking = (self.use_locking and
4625
                       query.IQ_LIVE in self.requested_data)
4626
    if self.do_locking:
4627
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4628
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4629
      lu.needed_locks[locking.LEVEL_NODE] = []
4630
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4631

    
4632
    self.do_grouplocks = (self.do_locking and
4633
                          query.IQ_NODES in self.requested_data)
4634

    
4635
  def DeclareLocks(self, lu, level):
4636
    if self.do_locking:
4637
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4638
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4639

    
4640
        # Lock all groups used by instances optimistically; this requires going
4641
        # via the node before it's locked, requiring verification later on
4642
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4643
          set(group_uuid
4644
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4645
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4646
      elif level == locking.LEVEL_NODE:
4647
        lu._LockInstancesNodes() # pylint: disable=W0212
4648

    
4649
  @staticmethod
4650
  def _CheckGroupLocks(lu):
4651
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4652
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4653

    
4654
    # Check if node groups for locked instances are still correct
4655
    for instance_name in owned_instances:
4656
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4657

    
4658
  def _GetQueryData(self, lu):
4659
    """Computes the list of instances and their attributes.
4660

4661
    """
4662
    if self.do_grouplocks:
4663
      self._CheckGroupLocks(lu)
4664

    
4665
    cluster = lu.cfg.GetClusterInfo()
4666
    all_info = lu.cfg.GetAllInstancesInfo()
4667

    
4668
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4669

    
4670
    instance_list = [all_info[name] for name in instance_names]
4671
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4672
                                        for inst in instance_list)))
4673
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4674
    bad_nodes = []
4675
    offline_nodes = []
4676
    wrongnode_inst = set()
4677

    
4678
    # Gather data as requested
4679
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4680
      live_data = {}
4681
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4682
      for name in nodes:
4683
        result = node_data[name]
4684
        if result.offline:
4685
          # offline nodes will be in both lists
4686
          assert result.fail_msg
4687
          offline_nodes.append(name)
4688
        if result.fail_msg:
4689
          bad_nodes.append(name)
4690
        elif result.payload:
4691
          for inst in result.payload:
4692
            if inst in all_info:
4693
              if all_info[inst].primary_node == name:
4694
                live_data.update(result.payload)
4695
              else:
4696
                wrongnode_inst.add(inst)
4697
            else:
4698
              # orphan instance; we don't list it here as we don't
4699
              # handle this case yet in the output of instance listing
4700
              logging.warning("Orphan instance '%s' found on node %s",
4701
                              inst, name)
4702
        # else no instance is alive
4703
    else:
4704
      live_data = {}
4705

    
4706
    if query.IQ_DISKUSAGE in self.requested_data:
4707
      disk_usage = dict((inst.name,
4708
                         _ComputeDiskSize(inst.disk_template,
4709
                                          [{constants.IDISK_SIZE: disk.size}
4710
                                           for disk in inst.disks]))
4711
                        for inst in instance_list)
4712
    else:
4713
      disk_usage = None
4714

    
4715
    if query.IQ_CONSOLE in self.requested_data:
4716
      consinfo = {}
4717
      for inst in instance_list:
4718
        if inst.name in live_data:
4719
          # Instance is running
4720
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4721
        else:
4722
          consinfo[inst.name] = None
4723
      assert set(consinfo.keys()) == set(instance_names)
4724
    else:
4725
      consinfo = None
4726

    
4727
    if query.IQ_NODES in self.requested_data:
4728
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4729
                                            instance_list)))
4730
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4731
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4732
                    for uuid in set(map(operator.attrgetter("group"),
4733
                                        nodes.values())))
4734
    else:
4735
      nodes = None
4736
      groups = None
4737

    
4738
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4739
                                   disk_usage, offline_nodes, bad_nodes,
4740
                                   live_data, wrongnode_inst, consinfo,
4741
                                   nodes, groups)
4742

    
4743

    
4744
class LUQuery(NoHooksLU):
4745
  """Query for resources/items of a certain kind.
4746

4747
  """
4748
  # pylint: disable=W0142
4749
  REQ_BGL = False
4750

    
4751
  def CheckArguments(self):
4752
    qcls = _GetQueryImplementation(self.op.what)
4753

    
4754
    self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4755

    
4756
  def ExpandNames(self):
4757
    self.impl.ExpandNames(self)
4758

    
4759
  def DeclareLocks(self, level):
4760
    self.impl.DeclareLocks(self, level)
4761

    
4762
  def Exec(self, feedback_fn):
4763
    return self.impl.NewStyleQuery(self)
4764

    
4765

    
4766
class LUQueryFields(NoHooksLU):
4767
  """Query for resources/items of a certain kind.
4768

4769
  """
4770
  # pylint: disable=W0142
4771
  REQ_BGL = False
4772

    
4773
  def CheckArguments(self):
4774
    self.qcls = _GetQueryImplementation(self.op.what)
4775

    
4776
  def ExpandNames(self):
4777
    self.needed_locks = {}
4778

    
4779
  def Exec(self, feedback_fn):
4780
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4781

    
4782

    
4783
class LUNodeModifyStorage(NoHooksLU):
4784
  """Logical unit for modifying a storage volume on a node.
4785

4786
  """
4787
  REQ_BGL = False
4788

    
4789
  def CheckArguments(self):
4790
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4791

    
4792
    storage_type = self.op.storage_type
4793

    
4794
    try:
4795
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4796
    except KeyError:
4797
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4798
                                 " modified" % storage_type,
4799
                                 errors.ECODE_INVAL)
4800

    
4801
    diff = set(self.op.changes.keys()) - modifiable
4802
    if diff:
4803
      raise errors.OpPrereqError("The following fields can not be modified for"
4804
                                 " storage units of type '%s': %r" %
4805
                                 (storage_type, list(diff)),
4806
                                 errors.ECODE_INVAL)
4807

    
4808
  def ExpandNames(self):
4809
    self.needed_locks = {
4810
      locking.LEVEL_NODE: self.op.node_name,
4811
      }
4812

    
4813
  def Exec(self, feedback_fn):
4814
    """Computes the list of nodes and their attributes.
4815

4816
    """
4817
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4818
    result = self.rpc.call_storage_modify(self.op.node_name,
4819
                                          self.op.storage_type, st_args,
4820
                                          self.op.name, self.op.changes)
4821
    result.Raise("Failed to modify storage unit '%s' on %s" %
4822
                 (self.op.name, self.op.node_name))
4823

    
4824

    
4825
class LUNodeAdd(LogicalUnit):
4826
  """Logical unit for adding node to the cluster.
4827

4828
  """
4829
  HPATH = "node-add"
4830
  HTYPE = constants.HTYPE_NODE
4831
  _NFLAGS = ["master_capable", "vm_capable"]
4832

    
4833
  def CheckArguments(self):
4834
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4835
    # validate/normalize the node name
4836
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4837
                                         family=self.primary_ip_family)
4838
    self.op.node_name = self.hostname.name
4839

    
4840
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4841
      raise errors.OpPrereqError("Cannot readd the master node",
4842
                                 errors.ECODE_STATE)
4843

    
4844
    if self.op.readd and self.op.group:
4845
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4846
                                 " being readded", errors.ECODE_INVAL)
4847

    
4848
  def BuildHooksEnv(self):
4849
    """Build hooks env.
4850

4851
    This will run on all nodes before, and on all nodes + the new node after.
4852

4853
    """
4854
    return {
4855
      "OP_TARGET": self.op.node_name,
4856
      "NODE_NAME": self.op.node_name,
4857
      "NODE_PIP": self.op.primary_ip,
4858
      "NODE_SIP": self.op.secondary_ip,
4859
      "MASTER_CAPABLE": str(self.op.master_capable),
4860
      "VM_CAPABLE": str(self.op.vm_capable),
4861
      }
4862

    
4863
  def BuildHooksNodes(self):
4864
    """Build hooks nodes.
4865

4866
    """
4867
    # Exclude added node
4868
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4869
    post_nodes = pre_nodes + [self.op.node_name, ]
4870

    
4871
    return (pre_nodes, post_nodes)
4872

    
4873
  def CheckPrereq(self):
4874
    """Check prerequisites.
4875

4876
    This checks:
4877
     - the new node is not already in the config
4878
     - it is resolvable
4879
     - its parameters (single/dual homed) matches the cluster
4880

4881
    Any errors are signaled by raising errors.OpPrereqError.
4882

4883
    """
4884
    cfg = self.cfg
4885
    hostname = self.hostname
4886
    node = hostname.name
4887
    primary_ip = self.op.primary_ip = hostname.ip
4888
    if self.op.secondary_ip is None:
4889
      if self.primary_ip_family == netutils.IP6Address.family:
4890
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4891
                                   " IPv4 address must be given as secondary",
4892
                                   errors.ECODE_INVAL)
4893
      self.op.secondary_ip = primary_ip
4894

    
4895
    secondary_ip = self.op.secondary_ip
4896
    if not netutils.IP4Address.IsValid(secondary_ip):
4897
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4898
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4899

    
4900
    node_list = cfg.GetNodeList()
4901
    if not self.op.readd and node in node_list:
4902
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4903
                                 node, errors.ECODE_EXISTS)
4904
    elif self.op.readd and node not in node_list:
4905
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4906
                                 errors.ECODE_NOENT)
4907

    
4908
    self.changed_primary_ip = False
4909

    
4910
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4911
      if self.op.readd and node == existing_node_name:
4912
        if existing_node.secondary_ip != secondary_ip:
4913
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4914
                                     " address configuration as before",
4915
                                     errors.ECODE_INVAL)
4916
        if existing_node.primary_ip != primary_ip:
4917
          self.changed_primary_ip = True
4918

    
4919
        continue
4920

    
4921
      if (existing_node.primary_ip == primary_ip or
4922
          existing_node.secondary_ip == primary_ip or
4923
          existing_node.primary_ip == secondary_ip or
4924
          existing_node.secondary_ip == secondary_ip):
4925
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4926
                                   " existing node %s" % existing_node.name,
4927
                                   errors.ECODE_NOTUNIQUE)
4928

    
4929
    # After this 'if' block, None is no longer a valid value for the
4930
    # _capable op attributes
4931
    if self.op.readd:
4932
      old_node = self.cfg.GetNodeInfo(node)
4933
      assert old_node is not None, "Can't retrieve locked node %s" % node
4934
      for attr in self._NFLAGS:
4935
        if getattr(self.op, attr) is None:
4936
          setattr(self.op, attr, getattr(old_node, attr))
4937
    else:
4938
      for attr in self._NFLAGS:
4939
        if getattr(self.op, attr) is None:
4940
          setattr(self.op, attr, True)
4941

    
4942
    if self.op.readd and not self.op.vm_capable:
4943
      pri, sec = cfg.GetNodeInstances(node)
4944
      if pri or sec:
4945
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4946
                                   " flag set to false, but it already holds"
4947
                                   " instances" % node,
4948
                                   errors.ECODE_STATE)
4949

    
4950
    # check that the type of the node (single versus dual homed) is the
4951
    # same as for the master
4952
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4953
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4954
    newbie_singlehomed = secondary_ip == primary_ip
4955
    if master_singlehomed != newbie_singlehomed:
4956
      if master_singlehomed:
4957
        raise errors.OpPrereqError("The master has no secondary ip but the"
4958
                                   " new node has one",
4959
                                   errors.ECODE_INVAL)
4960
      else:
4961
        raise errors.OpPrereqError("The master has a secondary ip but the"
4962
                                   " new node doesn't have one",
4963
                                   errors.ECODE_INVAL)
4964

    
4965
    # checks reachability
4966
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4967
      raise errors.OpPrereqError("Node not reachable by ping",
4968
                                 errors.ECODE_ENVIRON)
4969

    
4970
    if not newbie_singlehomed:
4971
      # check reachability from my secondary ip to newbie's secondary ip
4972
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4973
                           source=myself.secondary_ip):
4974
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4975
                                   " based ping to node daemon port",
4976
                                   errors.ECODE_ENVIRON)
4977

    
4978
    if self.op.readd:
4979
      exceptions = [node]
4980
    else:
4981
      exceptions = []
4982

    
4983
    if self.op.master_capable:
4984
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4985
    else:
4986
      self.master_candidate = False
4987

    
4988
    if self.op.readd:
4989
      self.new_node = old_node
4990
    else:
4991
      node_group = cfg.LookupNodeGroup(self.op.group)
4992
      self.new_node = objects.Node(name=node,
4993
                                   primary_ip=primary_ip,
4994
                                   secondary_ip=secondary_ip,
4995
                                   master_candidate=self.master_candidate,
4996
                                   offline=False, drained=False,
4997
                                   group=node_group)
4998

    
4999
    if self.op.ndparams:
5000
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5001

    
5002
  def Exec(self, feedback_fn):
5003
    """Adds the new node to the cluster.
5004

5005
    """
5006
    new_node = self.new_node
5007
    node = new_node.name
5008

    
5009
    # We adding a new node so we assume it's powered
5010
    new_node.powered = True
5011

    
5012
    # for re-adds, reset the offline/drained/master-candidate flags;
5013
    # we need to reset here, otherwise offline would prevent RPC calls
5014
    # later in the procedure; this also means that if the re-add
5015
    # fails, we are left with a non-offlined, broken node
5016
    if self.op.readd:
5017
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5018
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5019
      # if we demote the node, we do cleanup later in the procedure
5020
      new_node.master_candidate = self.master_candidate
5021
      if self.changed_primary_ip:
5022
        new_node.primary_ip = self.op.primary_ip
5023

    
5024
    # copy the master/vm_capable flags
5025
    for attr in self._NFLAGS:
5026
      setattr(new_node, attr, getattr(self.op, attr))
5027

    
5028
    # notify the user about any possible mc promotion
5029
    if new_node.master_candidate:
5030
      self.LogInfo("Node will be a master candidate")
5031

    
5032
    if self.op.ndparams:
5033
      new_node.ndparams = self.op.ndparams
5034
    else:
5035
      new_node.ndparams = {}
5036

    
5037
    # check connectivity
5038
    result = self.rpc.call_version([node])[node]
5039
    result.Raise("Can't get version information from node %s" % node)
5040
    if constants.PROTOCOL_VERSION == result.payload:
5041
      logging.info("Communication to node %s fine, sw version %s match",
5042
                   node, result.payload)
5043
    else:
5044
      raise errors.OpExecError("Version mismatch master version %s,"
5045
                               " node version %s" %
5046
                               (constants.PROTOCOL_VERSION, result.payload))
5047

    
5048
    # Add node to our /etc/hosts, and add key to known_hosts
5049
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5050
      master_node = self.cfg.GetMasterNode()
5051
      result = self.rpc.call_etc_hosts_modify(master_node,
5052
                                              constants.ETC_HOSTS_ADD,
5053
                                              self.hostname.name,
5054
                                              self.hostname.ip)
5055
      result.Raise("Can't update hosts file with new host data")
5056

    
5057
    if new_node.secondary_ip != new_node.primary_ip:
5058
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5059
                               False)
5060

    
5061
    node_verify_list = [self.cfg.GetMasterNode()]
5062
    node_verify_param = {
5063
      constants.NV_NODELIST: [node],
5064
      # TODO: do a node-net-test as well?
5065
    }
5066

    
5067
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5068
                                       self.cfg.GetClusterName())
5069
    for verifier in node_verify_list:
5070
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5071
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5072
      if nl_payload:
5073
        for failed in nl_payload:
5074
          feedback_fn("ssh/hostname verification failed"
5075
                      " (checking from %s): %s" %
5076
                      (verifier, nl_payload[failed]))
5077
        raise errors.OpExecError("ssh/hostname verification failed")
5078

    
5079
    if self.op.readd:
5080
      _RedistributeAncillaryFiles(self)
5081
      self.context.ReaddNode(new_node)
5082
      # make sure we redistribute the config
5083
      self.cfg.Update(new_node, feedback_fn)
5084
      # and make sure the new node will not have old files around
5085
      if not new_node.master_candidate:
5086
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5087
        msg = result.fail_msg
5088
        if msg:
5089
          self.LogWarning("Node failed to demote itself from master"
5090
                          " candidate status: %s" % msg)
5091
    else:
5092
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5093
                                  additional_vm=self.op.vm_capable)
5094
      self.context.AddNode(new_node, self.proc.GetECId())
5095

    
5096

    
5097
class LUNodeSetParams(LogicalUnit):
5098
  """Modifies the parameters of a node.
5099

5100
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5101
      to the node role (as _ROLE_*)
5102
  @cvar _R2F: a dictionary from node role to tuples of flags
5103
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5104

5105
  """
5106
  HPATH = "node-modify"
5107
  HTYPE = constants.HTYPE_NODE
5108
  REQ_BGL = False
5109
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5110
  _F2R = {
5111
    (True, False, False): _ROLE_CANDIDATE,
5112
    (False, True, False): _ROLE_DRAINED,
5113
    (False, False, True): _ROLE_OFFLINE,
5114
    (False, False, False): _ROLE_REGULAR,
5115
    }
5116
  _R2F = dict((v, k) for k, v in _F2R.items())
5117
  _FLAGS = ["master_candidate", "drained", "offline"]
5118

    
5119
  def CheckArguments(self):
5120
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5121
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5122
                self.op.master_capable, self.op.vm_capable,
5123
                self.op.secondary_ip, self.op.ndparams]
5124
    if all_mods.count(None) == len(all_mods):
5125
      raise errors.OpPrereqError("Please pass at least one modification",
5126
                                 errors.ECODE_INVAL)
5127
    if all_mods.count(True) > 1:
5128
      raise errors.OpPrereqError("Can't set the node into more than one"
5129
                                 " state at the same time",
5130
                                 errors.ECODE_INVAL)
5131

    
5132
    # Boolean value that tells us whether we might be demoting from MC
5133
    self.might_demote = (self.op.master_candidate == False or
5134
                         self.op.offline == True or
5135
                         self.op.drained == True or
5136
                         self.op.master_capable == False)
5137

    
5138
    if self.op.secondary_ip:
5139
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5140
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5141
                                   " address" % self.op.secondary_ip,
5142
                                   errors.ECODE_INVAL)
5143

    
5144
    self.lock_all = self.op.auto_promote and self.might_demote
5145
    self.lock_instances = self.op.secondary_ip is not None
5146

    
5147
  def ExpandNames(self):
5148
    if self.lock_all:
5149
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5150
    else:
5151
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5152

    
5153
    if self.lock_instances:
5154
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5155

    
5156
  def DeclareLocks(self, level):
5157
    # If we have locked all instances, before waiting to lock nodes, release
5158
    # all the ones living on nodes unrelated to the current operation.
5159
    if level == locking.LEVEL_NODE and self.lock_instances:
5160
      self.affected_instances = []
5161
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5162
        instances_keep = []
5163

    
5164
        # Build list of instances to release
5165
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5166
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5167
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5168
              self.op.node_name in instance.all_nodes):
5169
            instances_keep.append(instance_name)
5170
            self.affected_instances.append(instance)
5171

    
5172
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5173

    
5174
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5175
                set(instances_keep))
5176

    
5177
  def BuildHooksEnv(self):
5178
    """Build hooks env.
5179

5180
    This runs on the master node.
5181

5182
    """
5183
    return {
5184
      "OP_TARGET": self.op.node_name,
5185
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5186
      "OFFLINE": str(self.op.offline),
5187
      "DRAINED": str(self.op.drained),
5188
      "MASTER_CAPABLE": str(self.op.master_capable),
5189
      "VM_CAPABLE": str(self.op.vm_capable),
5190
      }
5191

    
5192
  def BuildHooksNodes(self):
5193
    """Build hooks nodes.
5194

5195
    """
5196
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5197
    return (nl, nl)
5198

    
5199
  def CheckPrereq(self):
5200
    """Check prerequisites.
5201

5202
    This only checks the instance list against the existing names.
5203

5204
    """
5205
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5206

    
5207
    if (self.op.master_candidate is not None or
5208
        self.op.drained is not None or
5209
        self.op.offline is not None):
5210
      # we can't change the master's node flags
5211
      if self.op.node_name == self.cfg.GetMasterNode():
5212
        raise errors.OpPrereqError("The master role can be changed"
5213
                                   " only via master-failover",
5214
                                   errors.ECODE_INVAL)
5215

    
5216
    if self.op.master_candidate and not node.master_capable:
5217
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5218
                                 " it a master candidate" % node.name,
5219
                                 errors.ECODE_STATE)
5220

    
5221
    if self.op.vm_capable == False:
5222
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5223
      if ipri or isec:
5224
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5225
                                   " the vm_capable flag" % node.name,
5226
                                   errors.ECODE_STATE)
5227

    
5228
    if node.master_candidate and self.might_demote and not self.lock_all:
5229
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5230
      # check if after removing the current node, we're missing master
5231
      # candidates
5232
      (mc_remaining, mc_should, _) = \
5233
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5234
      if mc_remaining < mc_should:
5235
        raise errors.OpPrereqError("Not enough master candidates, please"
5236
                                   " pass auto promote option to allow"
5237
                                   " promotion", errors.ECODE_STATE)
5238

    
5239
    self.old_flags = old_flags = (node.master_candidate,
5240
                                  node.drained, node.offline)
5241
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5242
    self.old_role = old_role = self._F2R[old_flags]
5243

    
5244
    # Check for ineffective changes
5245
    for attr in self._FLAGS:
5246
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5247
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5248
        setattr(self.op, attr, None)
5249

    
5250
    # Past this point, any flag change to False means a transition
5251
    # away from the respective state, as only real changes are kept
5252

    
5253
    # TODO: We might query the real power state if it supports OOB
5254
    if _SupportsOob(self.cfg, node):
5255
      if self.op.offline is False and not (node.powered or
5256
                                           self.op.powered == True):
5257
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5258
                                    " offline status can be reset") %
5259
                                   self.op.node_name)
5260
    elif self.op.powered is not None:
5261
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5262
                                  " as it does not support out-of-band"
5263
                                  " handling") % self.op.node_name)
5264

    
5265
    # If we're being deofflined/drained, we'll MC ourself if needed
5266
    if (self.op.drained == False or self.op.offline == False or
5267
        (self.op.master_capable and not node.master_capable)):
5268
      if _DecideSelfPromotion(self):
5269
        self.op.master_candidate = True
5270
        self.LogInfo("Auto-promoting node to master candidate")
5271

    
5272
    # If we're no longer master capable, we'll demote ourselves from MC
5273
    if self.op.master_capable == False and node.master_candidate:
5274
      self.LogInfo("Demoting from master candidate")
5275
      self.op.master_candidate = False
5276

    
5277
    # Compute new role
5278
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5279
    if self.op.master_candidate:
5280
      new_role = self._ROLE_CANDIDATE
5281
    elif self.op.drained:
5282
      new_role = self._ROLE_DRAINED
5283
    elif self.op.offline:
5284
      new_role = self._ROLE_OFFLINE
5285
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5286
      # False is still in new flags, which means we're un-setting (the
5287
      # only) True flag
5288
      new_role = self._ROLE_REGULAR
5289
    else: # no new flags, nothing, keep old role
5290
      new_role = old_role
5291

    
5292
    self.new_role = new_role
5293

    
5294
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5295
      # Trying to transition out of offline status
5296
      result = self.rpc.call_version([node.name])[node.name]
5297
      if result.fail_msg:
5298
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5299
                                   " to report its version: %s" %
5300
                                   (node.name, result.fail_msg),
5301
                                   errors.ECODE_STATE)
5302
      else:
5303
        self.LogWarning("Transitioning node from offline to online state"
5304
                        " without using re-add. Please make sure the node"
5305
                        " is healthy!")
5306

    
5307
    if self.op.secondary_ip:
5308
      # Ok even without locking, because this can't be changed by any LU
5309
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5310
      master_singlehomed = master.secondary_ip == master.primary_ip
5311
      if master_singlehomed and self.op.secondary_ip:
5312
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5313
                                   " homed cluster", errors.ECODE_INVAL)
5314

    
5315
      if node.offline:
5316
        if self.affected_instances:
5317
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5318
                                     " node has instances (%s) configured"
5319
                                     " to use it" % self.affected_instances)
5320
      else:
5321
        # On online nodes, check that no instances are running, and that
5322
        # the node has the new ip and we can reach it.
5323
        for instance in self.affected_instances:
5324
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5325

    
5326
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5327
        if master.name != node.name:
5328
          # check reachability from master secondary ip to new secondary ip
5329
          if not netutils.TcpPing(self.op.secondary_ip,
5330
                                  constants.DEFAULT_NODED_PORT,
5331
                                  source=master.secondary_ip):
5332
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5333
                                       " based ping to node daemon port",
5334
                                       errors.ECODE_ENVIRON)
5335

    
5336
    if self.op.ndparams:
5337
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5338
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5339
      self.new_ndparams = new_ndparams
5340

    
5341
  def Exec(self, feedback_fn):
5342
    """Modifies a node.
5343

5344
    """
5345
    node = self.node
5346
    old_role = self.old_role
5347
    new_role = self.new_role
5348

    
5349
    result = []
5350

    
5351
    if self.op.ndparams:
5352
      node.ndparams = self.new_ndparams
5353

    
5354
    if self.op.powered is not None:
5355
      node.powered = self.op.powered
5356

    
5357
    for attr in ["master_capable", "vm_capable"]:
5358
      val = getattr(self.op, attr)
5359
      if val is not None:
5360
        setattr(node, attr, val)
5361
        result.append((attr, str(val)))
5362

    
5363
    if new_role != old_role:
5364
      # Tell the node to demote itself, if no longer MC and not offline
5365
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5366
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5367
        if msg:
5368
          self.LogWarning("Node failed to demote itself: %s", msg)
5369

    
5370
      new_flags = self._R2F[new_role]
5371
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5372
        if of != nf:
5373
          result.append((desc, str(nf)))
5374
      (node.master_candidate, node.drained, node.offline) = new_flags
5375

    
5376
      # we locked all nodes, we adjust the CP before updating this node
5377
      if self.lock_all:
5378
        _AdjustCandidatePool(self, [node.name])
5379

    
5380
    if self.op.secondary_ip:
5381
      node.secondary_ip = self.op.secondary_ip
5382
      result.append(("secondary_ip", self.op.secondary_ip))
5383

    
5384
    # this will trigger configuration file update, if needed
5385
    self.cfg.Update(node, feedback_fn)
5386

    
5387
    # this will trigger job queue propagation or cleanup if the mc
5388
    # flag changed
5389
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5390
      self.context.ReaddNode(node)
5391

    
5392
    return result
5393

    
5394

    
5395
class LUNodePowercycle(NoHooksLU):
5396
  """Powercycles a node.
5397

5398
  """
5399
  REQ_BGL = False
5400

    
5401
  def CheckArguments(self):
5402
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5403
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5404
      raise errors.OpPrereqError("The node is the master and the force"
5405
                                 " parameter was not set",
5406
                                 errors.ECODE_INVAL)
5407

    
5408
  def ExpandNames(self):
5409
    """Locking for PowercycleNode.
5410

5411
    This is a last-resort option and shouldn't block on other
5412
    jobs. Therefore, we grab no locks.
5413

5414
    """
5415
    self.needed_locks = {}
5416

    
5417
  def Exec(self, feedback_fn):
5418
    """Reboots a node.
5419

5420
    """
5421
    result = self.rpc.call_node_powercycle(self.op.node_name,
5422
                                           self.cfg.GetHypervisorType())
5423
    result.Raise("Failed to schedule the reboot")
5424
    return result.payload
5425

    
5426

    
5427
class LUClusterQuery(NoHooksLU):
5428
  """Query cluster configuration.
5429

5430
  """
5431
  REQ_BGL = False
5432

    
5433
  def ExpandNames(self):
5434
    self.needed_locks = {}
5435

    
5436
  def Exec(self, feedback_fn):
5437
    """Return cluster config.
5438

5439
    """
5440
    cluster = self.cfg.GetClusterInfo()
5441
    os_hvp = {}
5442

    
5443
    # Filter just for enabled hypervisors
5444
    for os_name, hv_dict in cluster.os_hvp.items():
5445
      os_hvp[os_name] = {}
5446
      for hv_name, hv_params in hv_dict.items():
5447
        if hv_name in cluster.enabled_hypervisors:
5448
          os_hvp[os_name][hv_name] = hv_params
5449

    
5450
    # Convert ip_family to ip_version
5451
    primary_ip_version = constants.IP4_VERSION
5452
    if cluster.primary_ip_family == netutils.IP6Address.family:
5453
      primary_ip_version = constants.IP6_VERSION
5454

    
5455
    result = {
5456
      "software_version": constants.RELEASE_VERSION,
5457
      "protocol_version": constants.PROTOCOL_VERSION,
5458
      "config_version": constants.CONFIG_VERSION,
5459
      "os_api_version": max(constants.OS_API_VERSIONS),
5460
      "export_version": constants.EXPORT_VERSION,
5461
      "architecture": (platform.architecture()[0], platform.machine()),
5462
      "name": cluster.cluster_name,
5463
      "master": cluster.master_node,
5464
      "default_hypervisor": cluster.enabled_hypervisors[0],
5465
      "enabled_hypervisors": cluster.enabled_hypervisors,
5466
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5467
                        for hypervisor_name in cluster.enabled_hypervisors]),
5468
      "os_hvp": os_hvp,
5469
      "beparams": cluster.beparams,
5470
      "osparams": cluster.osparams,
5471
      "nicparams": cluster.nicparams,
5472
      "ndparams": cluster.ndparams,
5473
      "candidate_pool_size": cluster.candidate_pool_size,
5474
      "master_netdev": cluster.master_netdev,
5475
      "volume_group_name": cluster.volume_group_name,
5476
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5477
      "file_storage_dir": cluster.file_storage_dir,
5478
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5479
      "maintain_node_health": cluster.maintain_node_health,
5480
      "ctime": cluster.ctime,
5481
      "mtime": cluster.mtime,
5482
      "uuid": cluster.uuid,
5483
      "tags": list(cluster.GetTags()),
5484
      "uid_pool": cluster.uid_pool,
5485
      "default_iallocator": cluster.default_iallocator,
5486
      "reserved_lvs": cluster.reserved_lvs,
5487
      "primary_ip_version": primary_ip_version,
5488
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5489
      "hidden_os": cluster.hidden_os,
5490
      "blacklisted_os": cluster.blacklisted_os,
5491
      }
5492

    
5493
    return result
5494

    
5495

    
5496
class LUClusterConfigQuery(NoHooksLU):
5497
  """Return configuration values.
5498

5499
  """
5500
  REQ_BGL = False
5501
  _FIELDS_DYNAMIC = utils.FieldSet()
5502
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5503
                                  "watcher_pause", "volume_group_name")
5504

    
5505
  def CheckArguments(self):
5506
    _CheckOutputFields(static=self._FIELDS_STATIC,
5507
                       dynamic=self._FIELDS_DYNAMIC,
5508
                       selected=self.op.output_fields)
5509

    
5510
  def ExpandNames(self):
5511
    self.needed_locks = {}
5512

    
5513
  def Exec(self, feedback_fn):
5514
    """Dump a representation of the cluster config to the standard output.
5515

5516
    """
5517
    values = []
5518
    for field in self.op.output_fields:
5519
      if field == "cluster_name":
5520
        entry = self.cfg.GetClusterName()
5521
      elif field == "master_node":
5522
        entry = self.cfg.GetMasterNode()
5523
      elif field == "drain_flag":
5524
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5525
      elif field == "watcher_pause":
5526
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5527
      elif field == "volume_group_name":
5528
        entry = self.cfg.GetVGName()
5529
      else:
5530
        raise errors.ParameterError(field)
5531
      values.append(entry)
5532
    return values
5533

    
5534

    
5535
class LUInstanceActivateDisks(NoHooksLU):
5536
  """Bring up an instance's disks.
5537

5538
  """
5539
  REQ_BGL = False
5540

    
5541
  def ExpandNames(self):
5542
    self._ExpandAndLockInstance()
5543
    self.needed_locks[locking.LEVEL_NODE] = []
5544
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5545

    
5546
  def DeclareLocks(self, level):
5547
    if level == locking.LEVEL_NODE:
5548
      self._LockInstancesNodes()
5549

    
5550
  def CheckPrereq(self):
5551
    """Check prerequisites.
5552

5553
    This checks that the instance is in the cluster.
5554

5555
    """
5556
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5557
    assert self.instance is not None, \
5558
      "Cannot retrieve locked instance %s" % self.op.instance_name
5559
    _CheckNodeOnline(self, self.instance.primary_node)
5560

    
5561
  def Exec(self, feedback_fn):
5562
    """Activate the disks.
5563

5564
    """
5565
    disks_ok, disks_info = \
5566
              _AssembleInstanceDisks(self, self.instance,
5567
                                     ignore_size=self.op.ignore_size)
5568
    if not disks_ok:
5569
      raise errors.OpExecError("Cannot activate block devices")
5570

    
5571
    return disks_info
5572

    
5573

    
5574
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5575
                           ignore_size=False):
5576
  """Prepare the block devices for an instance.
5577

5578
  This sets up the block devices on all nodes.
5579

5580
  @type lu: L{LogicalUnit}
5581
  @param lu: the logical unit on whose behalf we execute
5582
  @type instance: L{objects.Instance}
5583
  @param instance: the instance for whose disks we assemble
5584
  @type disks: list of L{objects.Disk} or None
5585
  @param disks: which disks to assemble (or all, if None)
5586
  @type ignore_secondaries: boolean
5587
  @param ignore_secondaries: if true, errors on secondary nodes
5588
      won't result in an error return from the function
5589
  @type ignore_size: boolean
5590
  @param ignore_size: if true, the current known size of the disk
5591
      will not be used during the disk activation, useful for cases
5592
      when the size is wrong
5593
  @return: False if the operation failed, otherwise a list of
5594
      (host, instance_visible_name, node_visible_name)
5595
      with the mapping from node devices to instance devices
5596

5597
  """
5598
  device_info = []
5599
  disks_ok = True
5600
  iname = instance.name
5601
  disks = _ExpandCheckDisks(instance, disks)
5602

    
5603
  # With the two passes mechanism we try to reduce the window of
5604
  # opportunity for the race condition of switching DRBD to primary
5605
  # before handshaking occured, but we do not eliminate it
5606

    
5607
  # The proper fix would be to wait (with some limits) until the
5608
  # connection has been made and drbd transitions from WFConnection
5609
  # into any other network-connected state (Connected, SyncTarget,
5610
  # SyncSource, etc.)
5611

    
5612
  # 1st pass, assemble on all nodes in secondary mode
5613
  for idx, inst_disk in enumerate(disks):
5614
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5615
      if ignore_size:
5616
        node_disk = node_disk.Copy()
5617
        node_disk.UnsetSize()
5618
      lu.cfg.SetDiskID(node_disk, node)
5619
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5620
      msg = result.fail_msg
5621
      if msg:
5622
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5623
                           " (is_primary=False, pass=1): %s",
5624
                           inst_disk.iv_name, node, msg)
5625
        if not ignore_secondaries:
5626
          disks_ok = False
5627

    
5628
  # FIXME: race condition on drbd migration to primary
5629

    
5630
  # 2nd pass, do only the primary node
5631
  for idx, inst_disk in enumerate(disks):
5632
    dev_path = None
5633

    
5634
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5635
      if node != instance.primary_node:
5636
        continue
5637
      if ignore_size:
5638
        node_disk = node_disk.Copy()
5639
        node_disk.UnsetSize()
5640
      lu.cfg.SetDiskID(node_disk, node)
5641
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5642
      msg = result.fail_msg
5643
      if msg:
5644
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5645
                           " (is_primary=True, pass=2): %s",
5646
                           inst_disk.iv_name, node, msg)
5647
        disks_ok = False
5648
      else:
5649
        dev_path = result.payload
5650

    
5651
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5652

    
5653
  # leave the disks configured for the primary node
5654
  # this is a workaround that would be fixed better by
5655
  # improving the logical/physical id handling
5656
  for disk in disks:
5657
    lu.cfg.SetDiskID(disk, instance.primary_node)
5658

    
5659
  return disks_ok, device_info
5660

    
5661

    
5662
def _StartInstanceDisks(lu, instance, force):
5663
  """Start the disks of an instance.
5664

5665
  """
5666
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5667
                                           ignore_secondaries=force)
5668
  if not disks_ok:
5669
    _ShutdownInstanceDisks(lu, instance)
5670
    if force is not None and not force:
5671
      lu.proc.LogWarning("", hint="If the message above refers to a"
5672
                         " secondary node,"
5673
                         " you can retry the operation using '--force'.")
5674
    raise errors.OpExecError("Disk consistency error")
5675

    
5676

    
5677
class LUInstanceDeactivateDisks(NoHooksLU):
5678
  """Shutdown an instance's disks.
5679

5680
  """
5681
  REQ_BGL = False
5682

    
5683
  def ExpandNames(self):
5684
    self._ExpandAndLockInstance()
5685
    self.needed_locks[locking.LEVEL_NODE] = []
5686
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5687

    
5688
  def DeclareLocks(self, level):
5689
    if level == locking.LEVEL_NODE:
5690
      self._LockInstancesNodes()
5691

    
5692
  def CheckPrereq(self):
5693
    """Check prerequisites.
5694

5695
    This checks that the instance is in the cluster.
5696

5697
    """
5698
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5699
    assert self.instance is not None, \
5700
      "Cannot retrieve locked instance %s" % self.op.instance_name
5701

    
5702
  def Exec(self, feedback_fn):
5703
    """Deactivate the disks
5704

5705
    """
5706
    instance = self.instance
5707
    if self.op.force:
5708
      _ShutdownInstanceDisks(self, instance)
5709
    else:
5710
      _SafeShutdownInstanceDisks(self, instance)
5711

    
5712

    
5713
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5714
  """Shutdown block devices of an instance.
5715

5716
  This function checks if an instance is running, before calling
5717
  _ShutdownInstanceDisks.
5718

5719
  """
5720
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5721
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5722

    
5723

    
5724
def _ExpandCheckDisks(instance, disks):
5725
  """Return the instance disks selected by the disks list
5726

5727
  @type disks: list of L{objects.Disk} or None
5728
  @param disks: selected disks
5729
  @rtype: list of L{objects.Disk}
5730
  @return: selected instance disks to act on
5731

5732
  """
5733
  if disks is None:
5734
    return instance.disks
5735
  else:
5736
    if not set(disks).issubset(instance.disks):
5737
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5738
                                   " target instance")
5739
    return disks
5740

    
5741

    
5742
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5743
  """Shutdown block devices of an instance.
5744

5745
  This does the shutdown on all nodes of the instance.
5746

5747
  If the ignore_primary is false, errors on the primary node are
5748
  ignored.
5749

5750
  """
5751
  all_result = True
5752
  disks = _ExpandCheckDisks(instance, disks)
5753

    
5754
  for disk in disks:
5755
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5756
      lu.cfg.SetDiskID(top_disk, node)
5757
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5758
      msg = result.fail_msg
5759
      if msg:
5760
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5761
                      disk.iv_name, node, msg)
5762
        if ((node == instance.primary_node and not ignore_primary) or
5763
            (node != instance.primary_node and not result.offline)):
5764
          all_result = False
5765
  return all_result
5766

    
5767

    
5768
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5769
  """Checks if a node has enough free memory.
5770

5771
  This function check if a given node has the needed amount of free
5772
  memory. In case the node has less memory or we cannot get the
5773
  information from the node, this function raise an OpPrereqError
5774
  exception.
5775

5776
  @type lu: C{LogicalUnit}
5777
  @param lu: a logical unit from which we get configuration data
5778
  @type node: C{str}
5779
  @param node: the node to check
5780
  @type reason: C{str}
5781
  @param reason: string to use in the error message
5782
  @type requested: C{int}
5783
  @param requested: the amount of memory in MiB to check for
5784
  @type hypervisor_name: C{str}
5785
  @param hypervisor_name: the hypervisor to ask for memory stats
5786
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5787
      we cannot check the node
5788

5789
  """
5790
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5791
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5792
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5793
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5794
  if not isinstance(free_mem, int):
5795
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5796
                               " was '%s'" % (node, free_mem),
5797
                               errors.ECODE_ENVIRON)
5798
  if requested > free_mem:
5799
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5800
                               " needed %s MiB, available %s MiB" %
5801
                               (node, reason, requested, free_mem),
5802
                               errors.ECODE_NORES)
5803

    
5804

    
5805
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5806
  """Checks if nodes have enough free disk space in the all VGs.
5807

5808
  This function check if all given nodes have the needed amount of
5809
  free disk. In case any node has less disk or we cannot get the
5810
  information from the node, this function raise an OpPrereqError
5811
  exception.
5812

5813
  @type lu: C{LogicalUnit}
5814
  @param lu: a logical unit from which we get configuration data
5815
  @type nodenames: C{list}
5816
  @param nodenames: the list of node names to check
5817
  @type req_sizes: C{dict}
5818
  @param req_sizes: the hash of vg and corresponding amount of disk in
5819
      MiB to check for
5820
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5821
      or we cannot check the node
5822

5823
  """
5824
  for vg, req_size in req_sizes.items():
5825
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5826

    
5827

    
5828
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5829
  """Checks if nodes have enough free disk space in the specified VG.
5830

5831
  This function check if all given nodes have the needed amount of
5832
  free disk. In case any node has less disk or we cannot get the
5833
  information from the node, this function raise an OpPrereqError
5834
  exception.
5835

5836
  @type lu: C{LogicalUnit}
5837
  @param lu: a logical unit from which we get configuration data
5838
  @type nodenames: C{list}
5839
  @param nodenames: the list of node names to check
5840
  @type vg: C{str}
5841
  @param vg: the volume group to check
5842
  @type requested: C{int}
5843
  @param requested: the amount of disk in MiB to check for
5844
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5845
      or we cannot check the node
5846

5847
  """
5848
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5849
  for node in nodenames:
5850
    info = nodeinfo[node]
5851
    info.Raise("Cannot get current information from node %s" % node,
5852
               prereq=True, ecode=errors.ECODE_ENVIRON)
5853
    vg_free = info.payload.get("vg_free", None)
5854
    if not isinstance(vg_free, int):
5855
      raise errors.OpPrereqError("Can't compute free disk space on node"
5856
                                 " %s for vg %s, result was '%s'" %
5857
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5858
    if requested > vg_free:
5859
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5860
                                 " vg %s: required %d MiB, available %d MiB" %
5861
                                 (node, vg, requested, vg_free),
5862
                                 errors.ECODE_NORES)
5863

    
5864

    
5865
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
5866
  """Checks if nodes have enough physical CPUs
5867

5868
  This function checks if all given nodes have the needed number of
5869
  physical CPUs. In case any node has less CPUs or we cannot get the
5870
  information from the node, this function raises an OpPrereqError
5871
  exception.
5872

5873
  @type lu: C{LogicalUnit}
5874
  @param lu: a logical unit from which we get configuration data
5875
  @type nodenames: C{list}
5876
  @param nodenames: the list of node names to check
5877
  @type requested: C{int}
5878
  @param requested: the minimum acceptable number of physical CPUs
5879
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
5880
      or we cannot check the node
5881

5882
  """
5883
  nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
5884
  for node in nodenames:
5885
    info = nodeinfo[node]
5886
    info.Raise("Cannot get current information from node %s" % node,
5887
               prereq=True, ecode=errors.ECODE_ENVIRON)
5888
    num_cpus = info.payload.get("cpu_total", None)
5889
    if not isinstance(num_cpus, int):
5890
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
5891
                                 " on node %s, result was '%s'" %
5892
                                 (node, num_cpus), errors.ECODE_ENVIRON)
5893
    if requested > num_cpus:
5894
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
5895
                                 "required" % (node, num_cpus, requested),
5896
                                 errors.ECODE_NORES)
5897

    
5898

    
5899
class LUInstanceStartup(LogicalUnit):
5900
  """Starts an instance.
5901

5902
  """
5903
  HPATH = "instance-start"
5904
  HTYPE = constants.HTYPE_INSTANCE
5905
  REQ_BGL = False
5906

    
5907
  def CheckArguments(self):
5908
    # extra beparams
5909
    if self.op.beparams:
5910
      # fill the beparams dict
5911
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5912

    
5913
  def ExpandNames(self):
5914
    self._ExpandAndLockInstance()
5915

    
5916
  def BuildHooksEnv(self):
5917
    """Build hooks env.
5918

5919
    This runs on master, primary and secondary nodes of the instance.
5920

5921
    """
5922
    env = {
5923
      "FORCE": self.op.force,
5924
      }
5925

    
5926
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5927

    
5928
    return env
5929

    
5930
  def BuildHooksNodes(self):
5931
    """Build hooks nodes.
5932

5933
    """
5934
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5935
    return (nl, nl)
5936

    
5937
  def CheckPrereq(self):
5938
    """Check prerequisites.
5939

5940
    This checks that the instance is in the cluster.
5941

5942
    """
5943
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5944
    assert self.instance is not None, \
5945
      "Cannot retrieve locked instance %s" % self.op.instance_name
5946

    
5947
    # extra hvparams
5948
    if self.op.hvparams:
5949
      # check hypervisor parameter syntax (locally)
5950
      cluster = self.cfg.GetClusterInfo()
5951
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5952
      filled_hvp = cluster.FillHV(instance)
5953
      filled_hvp.update(self.op.hvparams)
5954
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5955
      hv_type.CheckParameterSyntax(filled_hvp)
5956
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5957

    
5958
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5959

    
5960
    if self.primary_offline and self.op.ignore_offline_nodes:
5961
      self.proc.LogWarning("Ignoring offline primary node")
5962

    
5963
      if self.op.hvparams or self.op.beparams:
5964
        self.proc.LogWarning("Overridden parameters are ignored")
5965
    else:
5966
      _CheckNodeOnline(self, instance.primary_node)
5967

    
5968
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5969

    
5970
      # check bridges existence
5971
      _CheckInstanceBridgesExist(self, instance)
5972

    
5973
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5974
                                                instance.name,
5975
                                                instance.hypervisor)
5976
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5977
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5978
      if not remote_info.payload: # not running already
5979
        _CheckNodeFreeMemory(self, instance.primary_node,
5980
                             "starting instance %s" % instance.name,
5981
                             bep[constants.BE_MEMORY], instance.hypervisor)
5982

    
5983
  def Exec(self, feedback_fn):
5984
    """Start the instance.
5985

5986
    """
5987
    instance = self.instance
5988
    force = self.op.force
5989

    
5990
    if not self.op.no_remember:
5991
      self.cfg.MarkInstanceUp(instance.name)
5992

    
5993
    if self.primary_offline:
5994
      assert self.op.ignore_offline_nodes
5995
      self.proc.LogInfo("Primary node offline, marked instance as started")
5996
    else:
5997
      node_current = instance.primary_node
5998

    
5999
      _StartInstanceDisks(self, instance, force)
6000

    
6001
      result = self.rpc.call_instance_start(node_current, instance,
6002
                                            self.op.hvparams, self.op.beparams,
6003
                                            self.op.startup_paused)
6004
      msg = result.fail_msg
6005
      if msg:
6006
        _ShutdownInstanceDisks(self, instance)
6007
        raise errors.OpExecError("Could not start instance: %s" % msg)
6008

    
6009

    
6010
class LUInstanceReboot(LogicalUnit):
6011
  """Reboot an instance.
6012

6013
  """
6014
  HPATH = "instance-reboot"
6015
  HTYPE = constants.HTYPE_INSTANCE
6016
  REQ_BGL = False
6017

    
6018
  def ExpandNames(self):
6019
    self._ExpandAndLockInstance()
6020

    
6021
  def BuildHooksEnv(self):
6022
    """Build hooks env.
6023

6024
    This runs on master, primary and secondary nodes of the instance.
6025

6026
    """
6027
    env = {
6028
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6029
      "REBOOT_TYPE": self.op.reboot_type,
6030
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6031
      }
6032

    
6033
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6034

    
6035
    return env
6036

    
6037
  def BuildHooksNodes(self):
6038
    """Build hooks nodes.
6039

6040
    """
6041
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6042
    return (nl, nl)
6043

    
6044
  def CheckPrereq(self):
6045
    """Check prerequisites.
6046

6047
    This checks that the instance is in the cluster.
6048

6049
    """
6050
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6051
    assert self.instance is not None, \
6052
      "Cannot retrieve locked instance %s" % self.op.instance_name
6053

    
6054
    _CheckNodeOnline(self, instance.primary_node)
6055

    
6056
    # check bridges existence
6057
    _CheckInstanceBridgesExist(self, instance)
6058

    
6059
  def Exec(self, feedback_fn):
6060
    """Reboot the instance.
6061

6062
    """
6063
    instance = self.instance
6064
    ignore_secondaries = self.op.ignore_secondaries
6065
    reboot_type = self.op.reboot_type
6066

    
6067
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6068
                                              instance.name,
6069
                                              instance.hypervisor)
6070
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6071
    instance_running = bool(remote_info.payload)
6072

    
6073
    node_current = instance.primary_node
6074

    
6075
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6076
                                            constants.INSTANCE_REBOOT_HARD]:
6077
      for disk in instance.disks:
6078
        self.cfg.SetDiskID(disk, node_current)
6079
      result = self.rpc.call_instance_reboot(node_current, instance,
6080
                                             reboot_type,
6081
                                             self.op.shutdown_timeout)
6082
      result.Raise("Could not reboot instance")
6083
    else:
6084
      if instance_running:
6085
        result = self.rpc.call_instance_shutdown(node_current, instance,
6086
                                                 self.op.shutdown_timeout)
6087
        result.Raise("Could not shutdown instance for full reboot")
6088
        _ShutdownInstanceDisks(self, instance)
6089
      else:
6090
        self.LogInfo("Instance %s was already stopped, starting now",
6091
                     instance.name)
6092
      _StartInstanceDisks(self, instance, ignore_secondaries)
6093
      result = self.rpc.call_instance_start(node_current, instance,
6094
                                            None, None, False)
6095
      msg = result.fail_msg
6096
      if msg:
6097
        _ShutdownInstanceDisks(self, instance)
6098
        raise errors.OpExecError("Could not start instance for"
6099
                                 " full reboot: %s" % msg)
6100

    
6101
    self.cfg.MarkInstanceUp(instance.name)
6102

    
6103

    
6104
class LUInstanceShutdown(LogicalUnit):
6105
  """Shutdown an instance.
6106

6107
  """
6108
  HPATH = "instance-stop"
6109
  HTYPE = constants.HTYPE_INSTANCE
6110
  REQ_BGL = False
6111

    
6112
  def ExpandNames(self):
6113
    self._ExpandAndLockInstance()
6114

    
6115
  def BuildHooksEnv(self):
6116
    """Build hooks env.
6117

6118
    This runs on master, primary and secondary nodes of the instance.
6119

6120
    """
6121
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6122
    env["TIMEOUT"] = self.op.timeout
6123
    return env
6124

    
6125
  def BuildHooksNodes(self):
6126
    """Build hooks nodes.
6127

6128
    """
6129
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6130
    return (nl, nl)
6131

    
6132
  def CheckPrereq(self):
6133
    """Check prerequisites.
6134

6135
    This checks that the instance is in the cluster.
6136

6137
    """
6138
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6139
    assert self.instance is not None, \
6140
      "Cannot retrieve locked instance %s" % self.op.instance_name
6141

    
6142
    self.primary_offline = \
6143
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6144

    
6145
    if self.primary_offline and self.op.ignore_offline_nodes:
6146
      self.proc.LogWarning("Ignoring offline primary node")
6147
    else:
6148
      _CheckNodeOnline(self, self.instance.primary_node)
6149

    
6150
  def Exec(self, feedback_fn):
6151
    """Shutdown the instance.
6152

6153
    """
6154
    instance = self.instance
6155
    node_current = instance.primary_node
6156
    timeout = self.op.timeout
6157

    
6158
    if not self.op.no_remember:
6159
      self.cfg.MarkInstanceDown(instance.name)
6160

    
6161
    if self.primary_offline:
6162
      assert self.op.ignore_offline_nodes
6163
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6164
    else:
6165
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6166
      msg = result.fail_msg
6167
      if msg:
6168
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6169

    
6170
      _ShutdownInstanceDisks(self, instance)
6171

    
6172

    
6173
class LUInstanceReinstall(LogicalUnit):
6174
  """Reinstall an instance.
6175

6176
  """
6177
  HPATH = "instance-reinstall"
6178
  HTYPE = constants.HTYPE_INSTANCE
6179
  REQ_BGL = False
6180

    
6181
  def ExpandNames(self):
6182
    self._ExpandAndLockInstance()
6183

    
6184
  def BuildHooksEnv(self):
6185
    """Build hooks env.
6186

6187
    This runs on master, primary and secondary nodes of the instance.
6188

6189
    """
6190
    return _BuildInstanceHookEnvByObject(self, self.instance)
6191

    
6192
  def BuildHooksNodes(self):
6193
    """Build hooks nodes.
6194

6195
    """
6196
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6197
    return (nl, nl)
6198

    
6199
  def CheckPrereq(self):
6200
    """Check prerequisites.
6201

6202
    This checks that the instance is in the cluster and is not running.
6203

6204
    """
6205
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6206
    assert instance is not None, \
6207
      "Cannot retrieve locked instance %s" % self.op.instance_name
6208
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6209
                     " offline, cannot reinstall")
6210
    for node in instance.secondary_nodes:
6211
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6212
                       " cannot reinstall")
6213

    
6214
    if instance.disk_template == constants.DT_DISKLESS:
6215
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6216
                                 self.op.instance_name,
6217
                                 errors.ECODE_INVAL)
6218
    _CheckInstanceDown(self, instance, "cannot reinstall")
6219

    
6220
    if self.op.os_type is not None:
6221
      # OS verification
6222
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6223
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6224
      instance_os = self.op.os_type
6225
    else:
6226
      instance_os = instance.os
6227

    
6228
    nodelist = list(instance.all_nodes)
6229

    
6230
    if self.op.osparams:
6231
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6232
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6233
      self.os_inst = i_osdict # the new dict (without defaults)
6234
    else:
6235
      self.os_inst = None
6236

    
6237
    self.instance = instance
6238

    
6239
  def Exec(self, feedback_fn):
6240
    """Reinstall the instance.
6241

6242
    """
6243
    inst = self.instance
6244

    
6245
    if self.op.os_type is not None:
6246
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6247
      inst.os = self.op.os_type
6248
      # Write to configuration
6249
      self.cfg.Update(inst, feedback_fn)
6250

    
6251
    _StartInstanceDisks(self, inst, None)
6252
    try:
6253
      feedback_fn("Running the instance OS create scripts...")
6254
      # FIXME: pass debug option from opcode to backend
6255
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6256
                                             self.op.debug_level,
6257
                                             osparams=self.os_inst)
6258
      result.Raise("Could not install OS for instance %s on node %s" %
6259
                   (inst.name, inst.primary_node))
6260
    finally:
6261
      _ShutdownInstanceDisks(self, inst)
6262

    
6263

    
6264
class LUInstanceRecreateDisks(LogicalUnit):
6265
  """Recreate an instance's missing disks.
6266

6267
  """
6268
  HPATH = "instance-recreate-disks"
6269
  HTYPE = constants.HTYPE_INSTANCE
6270
  REQ_BGL = False
6271

    
6272
  def CheckArguments(self):
6273
    # normalise the disk list
6274
    self.op.disks = sorted(frozenset(self.op.disks))
6275

    
6276
  def ExpandNames(self):
6277
    self._ExpandAndLockInstance()
6278
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6279
    if self.op.nodes:
6280
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6281
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6282
    else:
6283
      self.needed_locks[locking.LEVEL_NODE] = []
6284

    
6285
  def DeclareLocks(self, level):
6286
    if level == locking.LEVEL_NODE:
6287
      # if we replace the nodes, we only need to lock the old primary,
6288
      # otherwise we need to lock all nodes for disk re-creation
6289
      primary_only = bool(self.op.nodes)
6290
      self._LockInstancesNodes(primary_only=primary_only)
6291

    
6292
  def BuildHooksEnv(self):
6293
    """Build hooks env.
6294

6295
    This runs on master, primary and secondary nodes of the instance.
6296

6297
    """
6298
    return _BuildInstanceHookEnvByObject(self, self.instance)
6299

    
6300
  def BuildHooksNodes(self):
6301
    """Build hooks nodes.
6302

6303
    """
6304
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6305
    return (nl, nl)
6306

    
6307
  def CheckPrereq(self):
6308
    """Check prerequisites.
6309

6310
    This checks that the instance is in the cluster and is not running.
6311

6312
    """
6313
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6314
    assert instance is not None, \
6315
      "Cannot retrieve locked instance %s" % self.op.instance_name
6316
    if self.op.nodes:
6317
      if len(self.op.nodes) != len(instance.all_nodes):
6318
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6319
                                   " %d replacement nodes were specified" %
6320
                                   (instance.name, len(instance.all_nodes),
6321
                                    len(self.op.nodes)),
6322
                                   errors.ECODE_INVAL)
6323
      assert instance.disk_template != constants.DT_DRBD8 or \
6324
          len(self.op.nodes) == 2
6325
      assert instance.disk_template != constants.DT_PLAIN or \
6326
          len(self.op.nodes) == 1
6327
      primary_node = self.op.nodes[0]
6328
    else:
6329
      primary_node = instance.primary_node
6330
    _CheckNodeOnline(self, primary_node)
6331

    
6332
    if instance.disk_template == constants.DT_DISKLESS:
6333
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6334
                                 self.op.instance_name, errors.ECODE_INVAL)
6335
    # if we replace nodes *and* the old primary is offline, we don't
6336
    # check
6337
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6338
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6339
    if not (self.op.nodes and old_pnode.offline):
6340
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6341

    
6342
    if not self.op.disks:
6343
      self.op.disks = range(len(instance.disks))
6344
    else:
6345
      for idx in self.op.disks:
6346
        if idx >= len(instance.disks):
6347
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6348
                                     errors.ECODE_INVAL)
6349
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6350
      raise errors.OpPrereqError("Can't recreate disks partially and"
6351
                                 " change the nodes at the same time",
6352
                                 errors.ECODE_INVAL)
6353
    self.instance = instance
6354

    
6355
  def Exec(self, feedback_fn):
6356
    """Recreate the disks.
6357

6358
    """
6359
    instance = self.instance
6360

    
6361
    to_skip = []
6362
    mods = [] # keeps track of needed logical_id changes
6363

    
6364
    for idx, disk in enumerate(instance.disks):
6365
      if idx not in self.op.disks: # disk idx has not been passed in
6366
        to_skip.append(idx)
6367
        continue
6368
      # update secondaries for disks, if needed
6369
      if self.op.nodes:
6370
        if disk.dev_type == constants.LD_DRBD8:
6371
          # need to update the nodes and minors
6372
          assert len(self.op.nodes) == 2
6373
          assert len(disk.logical_id) == 6 # otherwise disk internals
6374
                                           # have changed
6375
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6376
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6377
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6378
                    new_minors[0], new_minors[1], old_secret)
6379
          assert len(disk.logical_id) == len(new_id)
6380
          mods.append((idx, new_id))
6381

    
6382
    # now that we have passed all asserts above, we can apply the mods
6383
    # in a single run (to avoid partial changes)
6384
    for idx, new_id in mods:
6385
      instance.disks[idx].logical_id = new_id
6386

    
6387
    # change primary node, if needed
6388
    if self.op.nodes:
6389
      instance.primary_node = self.op.nodes[0]
6390
      self.LogWarning("Changing the instance's nodes, you will have to"
6391
                      " remove any disks left on the older nodes manually")
6392

    
6393
    if self.op.nodes:
6394
      self.cfg.Update(instance, feedback_fn)
6395

    
6396
    _CreateDisks(self, instance, to_skip=to_skip)
6397

    
6398

    
6399
class LUInstanceRename(LogicalUnit):
6400
  """Rename an instance.
6401

6402
  """
6403
  HPATH = "instance-rename"
6404
  HTYPE = constants.HTYPE_INSTANCE
6405

    
6406
  def CheckArguments(self):
6407
    """Check arguments.
6408

6409
    """
6410
    if self.op.ip_check and not self.op.name_check:
6411
      # TODO: make the ip check more flexible and not depend on the name check
6412
      raise errors.OpPrereqError("IP address check requires a name check",
6413
                                 errors.ECODE_INVAL)
6414

    
6415
  def BuildHooksEnv(self):
6416
    """Build hooks env.
6417

6418
    This runs on master, primary and secondary nodes of the instance.
6419

6420
    """
6421
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6422
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6423
    return env
6424

    
6425
  def BuildHooksNodes(self):
6426
    """Build hooks nodes.
6427

6428
    """
6429
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6430
    return (nl, nl)
6431

    
6432
  def CheckPrereq(self):
6433
    """Check prerequisites.
6434

6435
    This checks that the instance is in the cluster and is not running.
6436

6437
    """
6438
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6439
                                                self.op.instance_name)
6440
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6441
    assert instance is not None
6442
    _CheckNodeOnline(self, instance.primary_node)
6443
    _CheckInstanceDown(self, instance, "cannot rename")
6444
    self.instance = instance
6445

    
6446
    new_name = self.op.new_name
6447
    if self.op.name_check:
6448
      hostname = netutils.GetHostname(name=new_name)
6449
      if hostname != new_name:
6450
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6451
                     hostname.name)
6452
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6453
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6454
                                    " same as given hostname '%s'") %
6455
                                    (hostname.name, self.op.new_name),
6456
                                    errors.ECODE_INVAL)
6457
      new_name = self.op.new_name = hostname.name
6458
      if (self.op.ip_check and
6459
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6460
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6461
                                   (hostname.ip, new_name),
6462
                                   errors.ECODE_NOTUNIQUE)
6463

    
6464
    instance_list = self.cfg.GetInstanceList()
6465
    if new_name in instance_list and new_name != instance.name:
6466
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6467
                                 new_name, errors.ECODE_EXISTS)
6468

    
6469
  def Exec(self, feedback_fn):
6470
    """Rename the instance.
6471

6472
    """
6473
    inst = self.instance
6474
    old_name = inst.name
6475

    
6476
    rename_file_storage = False
6477
    if (inst.disk_template in constants.DTS_FILEBASED and
6478
        self.op.new_name != inst.name):
6479
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6480
      rename_file_storage = True
6481

    
6482
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6483
    # Change the instance lock. This is definitely safe while we hold the BGL.
6484
    # Otherwise the new lock would have to be added in acquired mode.
6485
    assert self.REQ_BGL
6486
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6487
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6488

    
6489
    # re-read the instance from the configuration after rename
6490
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6491

    
6492
    if rename_file_storage:
6493
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6494
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6495
                                                     old_file_storage_dir,
6496
                                                     new_file_storage_dir)
6497
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6498
                   " (but the instance has been renamed in Ganeti)" %
6499
                   (inst.primary_node, old_file_storage_dir,
6500
                    new_file_storage_dir))
6501

    
6502
    _StartInstanceDisks(self, inst, None)
6503
    try:
6504
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6505
                                                 old_name, self.op.debug_level)
6506
      msg = result.fail_msg
6507
      if msg:
6508
        msg = ("Could not run OS rename script for instance %s on node %s"
6509
               " (but the instance has been renamed in Ganeti): %s" %
6510
               (inst.name, inst.primary_node, msg))
6511
        self.proc.LogWarning(msg)
6512
    finally:
6513
      _ShutdownInstanceDisks(self, inst)
6514

    
6515
    return inst.name
6516

    
6517

    
6518
class LUInstanceRemove(LogicalUnit):
6519
  """Remove an instance.
6520

6521
  """
6522
  HPATH = "instance-remove"
6523
  HTYPE = constants.HTYPE_INSTANCE
6524
  REQ_BGL = False
6525

    
6526
  def ExpandNames(self):
6527
    self._ExpandAndLockInstance()
6528
    self.needed_locks[locking.LEVEL_NODE] = []
6529
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6530

    
6531
  def DeclareLocks(self, level):
6532
    if level == locking.LEVEL_NODE:
6533
      self._LockInstancesNodes()
6534

    
6535
  def BuildHooksEnv(self):
6536
    """Build hooks env.
6537

6538
    This runs on master, primary and secondary nodes of the instance.
6539

6540
    """
6541
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6542
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6543
    return env
6544

    
6545
  def BuildHooksNodes(self):
6546
    """Build hooks nodes.
6547

6548
    """
6549
    nl = [self.cfg.GetMasterNode()]
6550
    nl_post = list(self.instance.all_nodes) + nl
6551
    return (nl, nl_post)
6552

    
6553
  def CheckPrereq(self):
6554
    """Check prerequisites.
6555

6556
    This checks that the instance is in the cluster.
6557

6558
    """
6559
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6560
    assert self.instance is not None, \
6561
      "Cannot retrieve locked instance %s" % self.op.instance_name
6562

    
6563
  def Exec(self, feedback_fn):
6564
    """Remove the instance.
6565

6566
    """
6567
    instance = self.instance
6568
    logging.info("Shutting down instance %s on node %s",
6569
                 instance.name, instance.primary_node)
6570

    
6571
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6572
                                             self.op.shutdown_timeout)
6573
    msg = result.fail_msg
6574
    if msg:
6575
      if self.op.ignore_failures:
6576
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6577
      else:
6578
        raise errors.OpExecError("Could not shutdown instance %s on"
6579
                                 " node %s: %s" %
6580
                                 (instance.name, instance.primary_node, msg))
6581

    
6582
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6583

    
6584

    
6585
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6586
  """Utility function to remove an instance.
6587

6588
  """
6589
  logging.info("Removing block devices for instance %s", instance.name)
6590

    
6591
  if not _RemoveDisks(lu, instance):
6592
    if not ignore_failures:
6593
      raise errors.OpExecError("Can't remove instance's disks")
6594
    feedback_fn("Warning: can't remove instance's disks")
6595

    
6596
  logging.info("Removing instance %s out of cluster config", instance.name)
6597

    
6598
  lu.cfg.RemoveInstance(instance.name)
6599

    
6600
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6601
    "Instance lock removal conflict"
6602

    
6603
  # Remove lock for the instance
6604
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6605

    
6606

    
6607
class LUInstanceQuery(NoHooksLU):
6608
  """Logical unit for querying instances.
6609

6610
  """
6611
  # pylint: disable=W0142
6612
  REQ_BGL = False
6613

    
6614
  def CheckArguments(self):
6615
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6616
                             self.op.output_fields, self.op.use_locking)
6617

    
6618
  def ExpandNames(self):
6619
    self.iq.ExpandNames(self)
6620

    
6621
  def DeclareLocks(self, level):
6622
    self.iq.DeclareLocks(self, level)
6623

    
6624
  def Exec(self, feedback_fn):
6625
    return self.iq.OldStyleQuery(self)
6626

    
6627

    
6628
class LUInstanceFailover(LogicalUnit):
6629
  """Failover an instance.
6630

6631
  """
6632
  HPATH = "instance-failover"
6633
  HTYPE = constants.HTYPE_INSTANCE
6634
  REQ_BGL = False
6635

    
6636
  def CheckArguments(self):
6637
    """Check the arguments.
6638

6639
    """
6640
    self.iallocator = getattr(self.op, "iallocator", None)
6641
    self.target_node = getattr(self.op, "target_node", None)
6642

    
6643
  def ExpandNames(self):
6644
    self._ExpandAndLockInstance()
6645

    
6646
    if self.op.target_node is not None:
6647
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6648

    
6649
    self.needed_locks[locking.LEVEL_NODE] = []
6650
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6651

    
6652
    ignore_consistency = self.op.ignore_consistency
6653
    shutdown_timeout = self.op.shutdown_timeout
6654
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6655
                                       cleanup=False,
6656
                                       failover=True,
6657
                                       ignore_consistency=ignore_consistency,
6658
                                       shutdown_timeout=shutdown_timeout)
6659
    self.tasklets = [self._migrater]
6660

    
6661
  def DeclareLocks(self, level):
6662
    if level == locking.LEVEL_NODE:
6663
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6664
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6665
        if self.op.target_node is None:
6666
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6667
        else:
6668
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6669
                                                   self.op.target_node]
6670
        del self.recalculate_locks[locking.LEVEL_NODE]
6671
      else:
6672
        self._LockInstancesNodes()
6673

    
6674
  def BuildHooksEnv(self):
6675
    """Build hooks env.
6676

6677
    This runs on master, primary and secondary nodes of the instance.
6678

6679
    """
6680
    instance = self._migrater.instance
6681
    source_node = instance.primary_node
6682
    target_node = self.op.target_node
6683
    env = {
6684
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6685
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6686
      "OLD_PRIMARY": source_node,
6687
      "NEW_PRIMARY": target_node,
6688
      }
6689

    
6690
    if instance.disk_template in constants.DTS_INT_MIRROR:
6691
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6692
      env["NEW_SECONDARY"] = source_node
6693
    else:
6694
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6695

    
6696
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6697

    
6698
    return env
6699

    
6700
  def BuildHooksNodes(self):
6701
    """Build hooks nodes.
6702

6703
    """
6704
    instance = self._migrater.instance
6705
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6706
    return (nl, nl + [instance.primary_node])
6707

    
6708

    
6709
class LUInstanceMigrate(LogicalUnit):
6710
  """Migrate an instance.
6711

6712
  This is migration without shutting down, compared to the failover,
6713
  which is done with shutdown.
6714

6715
  """
6716
  HPATH = "instance-migrate"
6717
  HTYPE = constants.HTYPE_INSTANCE
6718
  REQ_BGL = False
6719

    
6720
  def ExpandNames(self):
6721
    self._ExpandAndLockInstance()
6722

    
6723
    if self.op.target_node is not None:
6724
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6725

    
6726
    self.needed_locks[locking.LEVEL_NODE] = []
6727
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6728

    
6729
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6730
                                       cleanup=self.op.cleanup,
6731
                                       failover=False,
6732
                                       fallback=self.op.allow_failover)
6733
    self.tasklets = [self._migrater]
6734

    
6735
  def DeclareLocks(self, level):
6736
    if level == locking.LEVEL_NODE:
6737
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6738
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6739
        if self.op.target_node is None:
6740
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6741
        else:
6742
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6743
                                                   self.op.target_node]
6744
        del self.recalculate_locks[locking.LEVEL_NODE]
6745
      else:
6746
        self._LockInstancesNodes()
6747

    
6748
  def BuildHooksEnv(self):
6749
    """Build hooks env.
6750

6751
    This runs on master, primary and secondary nodes of the instance.
6752

6753
    """
6754
    instance = self._migrater.instance
6755
    source_node = instance.primary_node
6756
    target_node = self.op.target_node
6757
    env = _BuildInstanceHookEnvByObject(self, instance)
6758
    env.update({
6759
      "MIGRATE_LIVE": self._migrater.live,
6760
      "MIGRATE_CLEANUP": self.op.cleanup,
6761
      "OLD_PRIMARY": source_node,
6762
      "NEW_PRIMARY": target_node,
6763
      })
6764

    
6765
    if instance.disk_template in constants.DTS_INT_MIRROR:
6766
      env["OLD_SECONDARY"] = target_node
6767
      env["NEW_SECONDARY"] = source_node
6768
    else:
6769
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6770

    
6771
    return env
6772

    
6773
  def BuildHooksNodes(self):
6774
    """Build hooks nodes.
6775

6776
    """
6777
    instance = self._migrater.instance
6778
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6779
    return (nl, nl + [instance.primary_node])
6780

    
6781

    
6782
class LUInstanceMove(LogicalUnit):
6783
  """Move an instance by data-copying.
6784

6785
  """
6786
  HPATH = "instance-move"
6787
  HTYPE = constants.HTYPE_INSTANCE
6788
  REQ_BGL = False
6789

    
6790
  def ExpandNames(self):
6791
    self._ExpandAndLockInstance()
6792
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6793
    self.op.target_node = target_node
6794
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6795
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6796

    
6797
  def DeclareLocks(self, level):
6798
    if level == locking.LEVEL_NODE:
6799
      self._LockInstancesNodes(primary_only=True)
6800

    
6801
  def BuildHooksEnv(self):
6802
    """Build hooks env.
6803

6804
    This runs on master, primary and secondary nodes of the instance.
6805

6806
    """
6807
    env = {
6808
      "TARGET_NODE": self.op.target_node,
6809
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6810
      }
6811
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6812
    return env
6813

    
6814
  def BuildHooksNodes(self):
6815
    """Build hooks nodes.
6816

6817
    """
6818
    nl = [
6819
      self.cfg.GetMasterNode(),
6820
      self.instance.primary_node,
6821
      self.op.target_node,
6822
      ]
6823
    return (nl, nl)
6824

    
6825
  def CheckPrereq(self):
6826
    """Check prerequisites.
6827

6828
    This checks that the instance is in the cluster.
6829

6830
    """
6831
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6832
    assert self.instance is not None, \
6833
      "Cannot retrieve locked instance %s" % self.op.instance_name
6834

    
6835
    node = self.cfg.GetNodeInfo(self.op.target_node)
6836
    assert node is not None, \
6837
      "Cannot retrieve locked node %s" % self.op.target_node
6838

    
6839
    self.target_node = target_node = node.name
6840

    
6841
    if target_node == instance.primary_node:
6842
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6843
                                 (instance.name, target_node),
6844
                                 errors.ECODE_STATE)
6845

    
6846
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6847

    
6848
    for idx, dsk in enumerate(instance.disks):
6849
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6850
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6851
                                   " cannot copy" % idx, errors.ECODE_STATE)
6852

    
6853
    _CheckNodeOnline(self, target_node)
6854
    _CheckNodeNotDrained(self, target_node)
6855
    _CheckNodeVmCapable(self, target_node)
6856

    
6857
    if instance.admin_up:
6858
      # check memory requirements on the secondary node
6859
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6860
                           instance.name, bep[constants.BE_MEMORY],
6861
                           instance.hypervisor)
6862
    else:
6863
      self.LogInfo("Not checking memory on the secondary node as"
6864
                   " instance will not be started")
6865

    
6866
    # check bridge existance
6867
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6868

    
6869
  def Exec(self, feedback_fn):
6870
    """Move an instance.
6871

6872
    The move is done by shutting it down on its present node, copying
6873
    the data over (slow) and starting it on the new node.
6874

6875
    """
6876
    instance = self.instance
6877

    
6878
    source_node = instance.primary_node
6879
    target_node = self.target_node
6880

    
6881
    self.LogInfo("Shutting down instance %s on source node %s",
6882
                 instance.name, source_node)
6883

    
6884
    result = self.rpc.call_instance_shutdown(source_node, instance,
6885
                                             self.op.shutdown_timeout)
6886
    msg = result.fail_msg
6887
    if msg:
6888
      if self.op.ignore_consistency:
6889
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6890
                             " Proceeding anyway. Please make sure node"
6891
                             " %s is down. Error details: %s",
6892
                             instance.name, source_node, source_node, msg)
6893
      else:
6894
        raise errors.OpExecError("Could not shutdown instance %s on"
6895
                                 " node %s: %s" %
6896
                                 (instance.name, source_node, msg))
6897

    
6898
    # create the target disks
6899
    try:
6900
      _CreateDisks(self, instance, target_node=target_node)
6901
    except errors.OpExecError:
6902
      self.LogWarning("Device creation failed, reverting...")
6903
      try:
6904
        _RemoveDisks(self, instance, target_node=target_node)
6905
      finally:
6906
        self.cfg.ReleaseDRBDMinors(instance.name)
6907
        raise
6908

    
6909
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6910

    
6911
    errs = []
6912
    # activate, get path, copy the data over
6913
    for idx, disk in enumerate(instance.disks):
6914
      self.LogInfo("Copying data for disk %d", idx)
6915
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6916
                                               instance.name, True, idx)
6917
      if result.fail_msg:
6918
        self.LogWarning("Can't assemble newly created disk %d: %s",
6919
                        idx, result.fail_msg)
6920
        errs.append(result.fail_msg)
6921
        break
6922
      dev_path = result.payload
6923
      result = self.rpc.call_blockdev_export(source_node, disk,
6924
                                             target_node, dev_path,
6925
                                             cluster_name)
6926
      if result.fail_msg:
6927
        self.LogWarning("Can't copy data over for disk %d: %s",
6928
                        idx, result.fail_msg)
6929
        errs.append(result.fail_msg)
6930
        break
6931

    
6932
    if errs:
6933
      self.LogWarning("Some disks failed to copy, aborting")
6934
      try:
6935
        _RemoveDisks(self, instance, target_node=target_node)
6936
      finally:
6937
        self.cfg.ReleaseDRBDMinors(instance.name)
6938
        raise errors.OpExecError("Errors during disk copy: %s" %
6939
                                 (",".join(errs),))
6940

    
6941
    instance.primary_node = target_node
6942
    self.cfg.Update(instance, feedback_fn)
6943

    
6944
    self.LogInfo("Removing the disks on the original node")
6945
    _RemoveDisks(self, instance, target_node=source_node)
6946

    
6947
    # Only start the instance if it's marked as up
6948
    if instance.admin_up:
6949
      self.LogInfo("Starting instance %s on node %s",
6950
                   instance.name, target_node)
6951

    
6952
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6953
                                           ignore_secondaries=True)
6954
      if not disks_ok:
6955
        _ShutdownInstanceDisks(self, instance)
6956
        raise errors.OpExecError("Can't activate the instance's disks")
6957

    
6958
      result = self.rpc.call_instance_start(target_node, instance,
6959
                                            None, None, False)
6960
      msg = result.fail_msg
6961
      if msg:
6962
        _ShutdownInstanceDisks(self, instance)
6963
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6964
                                 (instance.name, target_node, msg))
6965

    
6966

    
6967
class LUNodeMigrate(LogicalUnit):
6968
  """Migrate all instances from a node.
6969

6970
  """
6971
  HPATH = "node-migrate"
6972
  HTYPE = constants.HTYPE_NODE
6973
  REQ_BGL = False
6974

    
6975
  def CheckArguments(self):
6976
    pass
6977

    
6978
  def ExpandNames(self):
6979
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6980

    
6981
    self.share_locks = _ShareAll()
6982
    self.needed_locks = {
6983
      locking.LEVEL_NODE: [self.op.node_name],
6984
      }
6985

    
6986
  def BuildHooksEnv(self):
6987
    """Build hooks env.
6988

6989
    This runs on the master, the primary and all the secondaries.
6990

6991
    """
6992
    return {
6993
      "NODE_NAME": self.op.node_name,
6994
      }
6995

    
6996
  def BuildHooksNodes(self):
6997
    """Build hooks nodes.
6998

6999
    """
7000
    nl = [self.cfg.GetMasterNode()]
7001
    return (nl, nl)
7002

    
7003
  def CheckPrereq(self):
7004
    pass
7005

    
7006
  def Exec(self, feedback_fn):
7007
    # Prepare jobs for migration instances
7008
    jobs = [
7009
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7010
                                 mode=self.op.mode,
7011
                                 live=self.op.live,
7012
                                 iallocator=self.op.iallocator,
7013
                                 target_node=self.op.target_node)]
7014
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7015
      ]
7016

    
7017
    # TODO: Run iallocator in this opcode and pass correct placement options to
7018
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7019
    # running the iallocator and the actual migration, a good consistency model
7020
    # will have to be found.
7021

    
7022
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7023
            frozenset([self.op.node_name]))
7024

    
7025
    return ResultWithJobs(jobs)
7026

    
7027

    
7028
class TLMigrateInstance(Tasklet):
7029
  """Tasklet class for instance migration.
7030

7031
  @type live: boolean
7032
  @ivar live: whether the migration will be done live or non-live;
7033
      this variable is initalized only after CheckPrereq has run
7034
  @type cleanup: boolean
7035
  @ivar cleanup: Wheater we cleanup from a failed migration
7036
  @type iallocator: string
7037
  @ivar iallocator: The iallocator used to determine target_node
7038
  @type target_node: string
7039
  @ivar target_node: If given, the target_node to reallocate the instance to
7040
  @type failover: boolean
7041
  @ivar failover: Whether operation results in failover or migration
7042
  @type fallback: boolean
7043
  @ivar fallback: Whether fallback to failover is allowed if migration not
7044
                  possible
7045
  @type ignore_consistency: boolean
7046
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7047
                            and target node
7048
  @type shutdown_timeout: int
7049
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7050

7051
  """
7052

    
7053
  # Constants
7054
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7055
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7056

    
7057
  def __init__(self, lu, instance_name, cleanup=False,
7058
               failover=False, fallback=False,
7059
               ignore_consistency=False,
7060
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7061
    """Initializes this class.
7062

7063
    """
7064
    Tasklet.__init__(self, lu)
7065

    
7066
    # Parameters
7067
    self.instance_name = instance_name
7068
    self.cleanup = cleanup
7069
    self.live = False # will be overridden later
7070
    self.failover = failover
7071
    self.fallback = fallback
7072
    self.ignore_consistency = ignore_consistency
7073
    self.shutdown_timeout = shutdown_timeout
7074

    
7075
  def CheckPrereq(self):
7076
    """Check prerequisites.
7077

7078
    This checks that the instance is in the cluster.
7079

7080
    """
7081
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7082
    instance = self.cfg.GetInstanceInfo(instance_name)
7083
    assert instance is not None
7084
    self.instance = instance
7085

    
7086
    if (not self.cleanup and not instance.admin_up and not self.failover and
7087
        self.fallback):
7088
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7089
                      " to failover")
7090
      self.failover = True
7091

    
7092
    if instance.disk_template not in constants.DTS_MIRRORED:
7093
      if self.failover:
7094
        text = "failovers"
7095
      else:
7096
        text = "migrations"
7097
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7098
                                 " %s" % (instance.disk_template, text),
7099
                                 errors.ECODE_STATE)
7100

    
7101
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7102
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7103

    
7104
      if self.lu.op.iallocator:
7105
        self._RunAllocator()
7106
      else:
7107
        # We set set self.target_node as it is required by
7108
        # BuildHooksEnv
7109
        self.target_node = self.lu.op.target_node
7110

    
7111
      # self.target_node is already populated, either directly or by the
7112
      # iallocator run
7113
      target_node = self.target_node
7114
      if self.target_node == instance.primary_node:
7115
        raise errors.OpPrereqError("Cannot migrate instance %s"
7116
                                   " to its primary (%s)" %
7117
                                   (instance.name, instance.primary_node))
7118

    
7119
      if len(self.lu.tasklets) == 1:
7120
        # It is safe to release locks only when we're the only tasklet
7121
        # in the LU
7122
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7123
                      keep=[instance.primary_node, self.target_node])
7124

    
7125
    else:
7126
      secondary_nodes = instance.secondary_nodes
7127
      if not secondary_nodes:
7128
        raise errors.ConfigurationError("No secondary node but using"
7129
                                        " %s disk template" %
7130
                                        instance.disk_template)
7131
      target_node = secondary_nodes[0]
7132
      if self.lu.op.iallocator or (self.lu.op.target_node and
7133
                                   self.lu.op.target_node != target_node):
7134
        if self.failover:
7135
          text = "failed over"
7136
        else:
7137
          text = "migrated"
7138
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7139
                                   " be %s to arbitrary nodes"
7140
                                   " (neither an iallocator nor a target"
7141
                                   " node can be passed)" %
7142
                                   (instance.disk_template, text),
7143
                                   errors.ECODE_INVAL)
7144

    
7145
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7146

    
7147
    # check memory requirements on the secondary node
7148
    if not self.failover or instance.admin_up:
7149
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7150
                           instance.name, i_be[constants.BE_MEMORY],
7151
                           instance.hypervisor)
7152
    else:
7153
      self.lu.LogInfo("Not checking memory on the secondary node as"
7154
                      " instance will not be started")
7155

    
7156
    # check bridge existance
7157
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7158

    
7159
    if not self.cleanup:
7160
      _CheckNodeNotDrained(self.lu, target_node)
7161
      if not self.failover:
7162
        result = self.rpc.call_instance_migratable(instance.primary_node,
7163
                                                   instance)
7164
        if result.fail_msg and self.fallback:
7165
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7166
                          " failover")
7167
          self.failover = True
7168
        else:
7169
          result.Raise("Can't migrate, please use failover",
7170
                       prereq=True, ecode=errors.ECODE_STATE)
7171

    
7172
    assert not (self.failover and self.cleanup)
7173

    
7174
    if not self.failover:
7175
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7176
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7177
                                   " parameters are accepted",
7178
                                   errors.ECODE_INVAL)
7179
      if self.lu.op.live is not None:
7180
        if self.lu.op.live:
7181
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7182
        else:
7183
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7184
        # reset the 'live' parameter to None so that repeated
7185
        # invocations of CheckPrereq do not raise an exception
7186
        self.lu.op.live = None
7187
      elif self.lu.op.mode is None:
7188
        # read the default value from the hypervisor
7189
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7190
                                                skip_globals=False)
7191
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7192

    
7193
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7194
    else:
7195
      # Failover is never live
7196
      self.live = False
7197

    
7198
  def _RunAllocator(self):
7199
    """Run the allocator based on input opcode.
7200

7201
    """
7202
    ial = IAllocator(self.cfg, self.rpc,
7203
                     mode=constants.IALLOCATOR_MODE_RELOC,
7204
                     name=self.instance_name,
7205
                     # TODO See why hail breaks with a single node below
7206
                     relocate_from=[self.instance.primary_node,
7207
                                    self.instance.primary_node],
7208
                     )
7209

    
7210
    ial.Run(self.lu.op.iallocator)
7211

    
7212
    if not ial.success:
7213
      raise errors.OpPrereqError("Can't compute nodes using"
7214
                                 " iallocator '%s': %s" %
7215
                                 (self.lu.op.iallocator, ial.info),
7216
                                 errors.ECODE_NORES)
7217
    if len(ial.result) != ial.required_nodes:
7218
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7219
                                 " of nodes (%s), required %s" %
7220
                                 (self.lu.op.iallocator, len(ial.result),
7221
                                  ial.required_nodes), errors.ECODE_FAULT)
7222
    self.target_node = ial.result[0]
7223
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7224
                 self.instance_name, self.lu.op.iallocator,
7225
                 utils.CommaJoin(ial.result))
7226

    
7227
  def _WaitUntilSync(self):
7228
    """Poll with custom rpc for disk sync.
7229

7230
    This uses our own step-based rpc call.
7231

7232
    """
7233
    self.feedback_fn("* wait until resync is done")
7234
    all_done = False
7235
    while not all_done:
7236
      all_done = True
7237
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7238
                                            self.nodes_ip,
7239
                                            self.instance.disks)
7240
      min_percent = 100
7241
      for node, nres in result.items():
7242
        nres.Raise("Cannot resync disks on node %s" % node)
7243
        node_done, node_percent = nres.payload
7244
        all_done = all_done and node_done
7245
        if node_percent is not None:
7246
          min_percent = min(min_percent, node_percent)
7247
      if not all_done:
7248
        if min_percent < 100:
7249
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7250
        time.sleep(2)
7251

    
7252
  def _EnsureSecondary(self, node):
7253
    """Demote a node to secondary.
7254

7255
    """
7256
    self.feedback_fn("* switching node %s to secondary mode" % node)
7257

    
7258
    for dev in self.instance.disks:
7259
      self.cfg.SetDiskID(dev, node)
7260

    
7261
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7262
                                          self.instance.disks)
7263
    result.Raise("Cannot change disk to secondary on node %s" % node)
7264

    
7265
  def _GoStandalone(self):
7266
    """Disconnect from the network.
7267

7268
    """
7269
    self.feedback_fn("* changing into standalone mode")
7270
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7271
                                               self.instance.disks)
7272
    for node, nres in result.items():
7273
      nres.Raise("Cannot disconnect disks node %s" % node)
7274

    
7275
  def _GoReconnect(self, multimaster):
7276
    """Reconnect to the network.
7277

7278
    """
7279
    if multimaster:
7280
      msg = "dual-master"
7281
    else:
7282
      msg = "single-master"
7283
    self.feedback_fn("* changing disks into %s mode" % msg)
7284
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7285
                                           self.instance.disks,
7286
                                           self.instance.name, multimaster)
7287
    for node, nres in result.items():
7288
      nres.Raise("Cannot change disks config on node %s" % node)
7289

    
7290
  def _ExecCleanup(self):
7291
    """Try to cleanup after a failed migration.
7292

7293
    The cleanup is done by:
7294
      - check that the instance is running only on one node
7295
        (and update the config if needed)
7296
      - change disks on its secondary node to secondary
7297
      - wait until disks are fully synchronized
7298
      - disconnect from the network
7299
      - change disks into single-master mode
7300
      - wait again until disks are fully synchronized
7301

7302
    """
7303
    instance = self.instance
7304
    target_node = self.target_node
7305
    source_node = self.source_node
7306

    
7307
    # check running on only one node
7308
    self.feedback_fn("* checking where the instance actually runs"
7309
                     " (if this hangs, the hypervisor might be in"
7310
                     " a bad state)")
7311
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7312
    for node, result in ins_l.items():
7313
      result.Raise("Can't contact node %s" % node)
7314

    
7315
    runningon_source = instance.name in ins_l[source_node].payload
7316
    runningon_target = instance.name in ins_l[target_node].payload
7317

    
7318
    if runningon_source and runningon_target:
7319
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7320
                               " or the hypervisor is confused; you will have"
7321
                               " to ensure manually that it runs only on one"
7322
                               " and restart this operation")
7323

    
7324
    if not (runningon_source or runningon_target):
7325
      raise errors.OpExecError("Instance does not seem to be running at all;"
7326
                               " in this case it's safer to repair by"
7327
                               " running 'gnt-instance stop' to ensure disk"
7328
                               " shutdown, and then restarting it")
7329

    
7330
    if runningon_target:
7331
      # the migration has actually succeeded, we need to update the config
7332
      self.feedback_fn("* instance running on secondary node (%s),"
7333
                       " updating config" % target_node)
7334
      instance.primary_node = target_node
7335
      self.cfg.Update(instance, self.feedback_fn)
7336
      demoted_node = source_node
7337
    else:
7338
      self.feedback_fn("* instance confirmed to be running on its"
7339
                       " primary node (%s)" % source_node)
7340
      demoted_node = target_node
7341

    
7342
    if instance.disk_template in constants.DTS_INT_MIRROR:
7343
      self._EnsureSecondary(demoted_node)
7344
      try:
7345
        self._WaitUntilSync()
7346
      except errors.OpExecError:
7347
        # we ignore here errors, since if the device is standalone, it
7348
        # won't be able to sync
7349
        pass
7350
      self._GoStandalone()
7351
      self._GoReconnect(False)
7352
      self._WaitUntilSync()
7353

    
7354
    self.feedback_fn("* done")
7355

    
7356
  def _RevertDiskStatus(self):
7357
    """Try to revert the disk status after a failed migration.
7358

7359
    """
7360
    target_node = self.target_node
7361
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7362
      return
7363

    
7364
    try:
7365
      self._EnsureSecondary(target_node)
7366
      self._GoStandalone()
7367
      self._GoReconnect(False)
7368
      self._WaitUntilSync()
7369
    except errors.OpExecError, err:
7370
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7371
                         " please try to recover the instance manually;"
7372
                         " error '%s'" % str(err))
7373

    
7374
  def _AbortMigration(self):
7375
    """Call the hypervisor code to abort a started migration.
7376

7377
    """
7378
    instance = self.instance
7379
    target_node = self.target_node
7380
    source_node = self.source_node
7381
    migration_info = self.migration_info
7382

    
7383
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7384
                                                                 instance,
7385
                                                                 migration_info,
7386
                                                                 False)
7387
    abort_msg = abort_result.fail_msg
7388
    if abort_msg:
7389
      logging.error("Aborting migration failed on target node %s: %s",
7390
                    target_node, abort_msg)
7391
      # Don't raise an exception here, as we stil have to try to revert the
7392
      # disk status, even if this step failed.
7393

    
7394
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7395
        instance, False, self.live)
7396
    abort_msg = abort_result.fail_msg
7397
    if abort_msg:
7398
      logging.error("Aborting migration failed on source node %s: %s",
7399
                    source_node, abort_msg)
7400

    
7401
  def _ExecMigration(self):
7402
    """Migrate an instance.
7403

7404
    The migrate is done by:
7405
      - change the disks into dual-master mode
7406
      - wait until disks are fully synchronized again
7407
      - migrate the instance
7408
      - change disks on the new secondary node (the old primary) to secondary
7409
      - wait until disks are fully synchronized
7410
      - change disks into single-master mode
7411

7412
    """
7413
    instance = self.instance
7414
    target_node = self.target_node
7415
    source_node = self.source_node
7416

    
7417
    self.feedback_fn("* checking disk consistency between source and target")
7418
    for dev in instance.disks:
7419
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7420
        raise errors.OpExecError("Disk %s is degraded or not fully"
7421
                                 " synchronized on target node,"
7422
                                 " aborting migration" % dev.iv_name)
7423

    
7424
    # First get the migration information from the remote node
7425
    result = self.rpc.call_migration_info(source_node, instance)
7426
    msg = result.fail_msg
7427
    if msg:
7428
      log_err = ("Failed fetching source migration information from %s: %s" %
7429
                 (source_node, msg))
7430
      logging.error(log_err)
7431
      raise errors.OpExecError(log_err)
7432

    
7433
    self.migration_info = migration_info = result.payload
7434

    
7435
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7436
      # Then switch the disks to master/master mode
7437
      self._EnsureSecondary(target_node)
7438
      self._GoStandalone()
7439
      self._GoReconnect(True)
7440
      self._WaitUntilSync()
7441

    
7442
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7443
    result = self.rpc.call_accept_instance(target_node,
7444
                                           instance,
7445
                                           migration_info,
7446
                                           self.nodes_ip[target_node])
7447

    
7448
    msg = result.fail_msg
7449
    if msg:
7450
      logging.error("Instance pre-migration failed, trying to revert"
7451
                    " disk status: %s", msg)
7452
      self.feedback_fn("Pre-migration failed, aborting")
7453
      self._AbortMigration()
7454
      self._RevertDiskStatus()
7455
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7456
                               (instance.name, msg))
7457

    
7458
    self.feedback_fn("* migrating instance to %s" % target_node)
7459
    result = self.rpc.call_instance_migrate(source_node, instance,
7460
                                            self.nodes_ip[target_node],
7461
                                            self.live)
7462
    msg = result.fail_msg
7463
    if msg:
7464
      logging.error("Instance migration failed, trying to revert"
7465
                    " disk status: %s", msg)
7466
      self.feedback_fn("Migration failed, aborting")
7467
      self._AbortMigration()
7468
      self._RevertDiskStatus()
7469
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7470
                               (instance.name, msg))
7471

    
7472
    self.feedback_fn("* starting memory transfer")
7473
    last_feedback = time.time()
7474
    while True:
7475
      result = self.rpc.call_instance_get_migration_status(source_node,
7476
                                                           instance)
7477
      msg = result.fail_msg
7478
      ms = result.payload   # MigrationStatus instance
7479
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7480
        logging.error("Instance migration failed, trying to revert"
7481
                      " disk status: %s", msg)
7482
        self.feedback_fn("Migration failed, aborting")
7483
        self._AbortMigration()
7484
        self._RevertDiskStatus()
7485
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7486
                                 (instance.name, msg))
7487

    
7488
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7489
        self.feedback_fn("* memory transfer complete")
7490
        break
7491

    
7492
      if (utils.TimeoutExpired(last_feedback,
7493
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7494
          ms.transferred_ram is not None):
7495
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7496
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7497
        last_feedback = time.time()
7498

    
7499
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7500

    
7501
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7502
                                                           instance,
7503
                                                           True,
7504
                                                           self.live)
7505
    msg = result.fail_msg
7506
    if msg:
7507
      logging.error("Instance migration succeeded, but finalization failed"
7508
                    " on the source node: %s", msg)
7509
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7510
                               msg)
7511

    
7512
    instance.primary_node = target_node
7513

    
7514
    # distribute new instance config to the other nodes
7515
    self.cfg.Update(instance, self.feedback_fn)
7516

    
7517
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7518
                                                           instance,
7519
                                                           migration_info,
7520
                                                           True)
7521
    msg = result.fail_msg
7522
    if msg:
7523
      logging.error("Instance migration succeeded, but finalization failed"
7524
                    " on the target node: %s", msg)
7525
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7526
                               msg)
7527

    
7528
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7529
      self._EnsureSecondary(source_node)
7530
      self._WaitUntilSync()
7531
      self._GoStandalone()
7532
      self._GoReconnect(False)
7533
      self._WaitUntilSync()
7534

    
7535
    self.feedback_fn("* done")
7536

    
7537
  def _ExecFailover(self):
7538
    """Failover an instance.
7539

7540
    The failover is done by shutting it down on its present node and
7541
    starting it on the secondary.
7542

7543
    """
7544
    instance = self.instance
7545
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7546

    
7547
    source_node = instance.primary_node
7548
    target_node = self.target_node
7549

    
7550
    if instance.admin_up:
7551
      self.feedback_fn("* checking disk consistency between source and target")
7552
      for dev in instance.disks:
7553
        # for drbd, these are drbd over lvm
7554
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7555
          if primary_node.offline:
7556
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7557
                             " target node %s" %
7558
                             (primary_node.name, dev.iv_name, target_node))
7559
          elif not self.ignore_consistency:
7560
            raise errors.OpExecError("Disk %s is degraded on target node,"
7561
                                     " aborting failover" % dev.iv_name)
7562
    else:
7563
      self.feedback_fn("* not checking disk consistency as instance is not"
7564
                       " running")
7565

    
7566
    self.feedback_fn("* shutting down instance on source node")
7567
    logging.info("Shutting down instance %s on node %s",
7568
                 instance.name, source_node)
7569

    
7570
    result = self.rpc.call_instance_shutdown(source_node, instance,
7571
                                             self.shutdown_timeout)
7572
    msg = result.fail_msg
7573
    if msg:
7574
      if self.ignore_consistency or primary_node.offline:
7575
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7576
                           " proceeding anyway; please make sure node"
7577
                           " %s is down; error details: %s",
7578
                           instance.name, source_node, source_node, msg)
7579
      else:
7580
        raise errors.OpExecError("Could not shutdown instance %s on"
7581
                                 " node %s: %s" %
7582
                                 (instance.name, source_node, msg))
7583

    
7584
    self.feedback_fn("* deactivating the instance's disks on source node")
7585
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7586
      raise errors.OpExecError("Can't shut down the instance's disks")
7587

    
7588
    instance.primary_node = target_node
7589
    # distribute new instance config to the other nodes
7590
    self.cfg.Update(instance, self.feedback_fn)
7591

    
7592
    # Only start the instance if it's marked as up
7593
    if instance.admin_up:
7594
      self.feedback_fn("* activating the instance's disks on target node %s" %
7595
                       target_node)
7596
      logging.info("Starting instance %s on node %s",
7597
                   instance.name, target_node)
7598

    
7599
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7600
                                           ignore_secondaries=True)
7601
      if not disks_ok:
7602
        _ShutdownInstanceDisks(self.lu, instance)
7603
        raise errors.OpExecError("Can't activate the instance's disks")
7604

    
7605
      self.feedback_fn("* starting the instance on the target node %s" %
7606
                       target_node)
7607
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7608
                                            False)
7609
      msg = result.fail_msg
7610
      if msg:
7611
        _ShutdownInstanceDisks(self.lu, instance)
7612
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7613
                                 (instance.name, target_node, msg))
7614

    
7615
  def Exec(self, feedback_fn):
7616
    """Perform the migration.
7617

7618
    """
7619
    self.feedback_fn = feedback_fn
7620
    self.source_node = self.instance.primary_node
7621

    
7622
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7623
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7624
      self.target_node = self.instance.secondary_nodes[0]
7625
      # Otherwise self.target_node has been populated either
7626
      # directly, or through an iallocator.
7627

    
7628
    self.all_nodes = [self.source_node, self.target_node]
7629
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7630
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7631

    
7632
    if self.failover:
7633
      feedback_fn("Failover instance %s" % self.instance.name)
7634
      self._ExecFailover()
7635
    else:
7636
      feedback_fn("Migrating instance %s" % self.instance.name)
7637

    
7638
      if self.cleanup:
7639
        return self._ExecCleanup()
7640
      else:
7641
        return self._ExecMigration()
7642

    
7643

    
7644
def _CreateBlockDev(lu, node, instance, device, force_create,
7645
                    info, force_open):
7646
  """Create a tree of block devices on a given node.
7647

7648
  If this device type has to be created on secondaries, create it and
7649
  all its children.
7650

7651
  If not, just recurse to children keeping the same 'force' value.
7652

7653
  @param lu: the lu on whose behalf we execute
7654
  @param node: the node on which to create the device
7655
  @type instance: L{objects.Instance}
7656
  @param instance: the instance which owns the device
7657
  @type device: L{objects.Disk}
7658
  @param device: the device to create
7659
  @type force_create: boolean
7660
  @param force_create: whether to force creation of this device; this
7661
      will be change to True whenever we find a device which has
7662
      CreateOnSecondary() attribute
7663
  @param info: the extra 'metadata' we should attach to the device
7664
      (this will be represented as a LVM tag)
7665
  @type force_open: boolean
7666
  @param force_open: this parameter will be passes to the
7667
      L{backend.BlockdevCreate} function where it specifies
7668
      whether we run on primary or not, and it affects both
7669
      the child assembly and the device own Open() execution
7670

7671
  """
7672
  if device.CreateOnSecondary():
7673
    force_create = True
7674

    
7675
  if device.children:
7676
    for child in device.children:
7677
      _CreateBlockDev(lu, node, instance, child, force_create,
7678
                      info, force_open)
7679

    
7680
  if not force_create:
7681
    return
7682

    
7683
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7684

    
7685

    
7686
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7687
  """Create a single block device on a given node.
7688

7689
  This will not recurse over children of the device, so they must be
7690
  created in advance.
7691

7692
  @param lu: the lu on whose behalf we execute
7693
  @param node: the node on which to create the device
7694
  @type instance: L{objects.Instance}
7695
  @param instance: the instance which owns the device
7696
  @type device: L{objects.Disk}
7697
  @param device: the device to create
7698
  @param info: the extra 'metadata' we should attach to the device
7699
      (this will be represented as a LVM tag)
7700
  @type force_open: boolean
7701
  @param force_open: this parameter will be passes to the
7702
      L{backend.BlockdevCreate} function where it specifies
7703
      whether we run on primary or not, and it affects both
7704
      the child assembly and the device own Open() execution
7705

7706
  """
7707
  lu.cfg.SetDiskID(device, node)
7708
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7709
                                       instance.name, force_open, info)
7710
  result.Raise("Can't create block device %s on"
7711
               " node %s for instance %s" % (device, node, instance.name))
7712
  if device.physical_id is None:
7713
    device.physical_id = result.payload
7714

    
7715

    
7716
def _GenerateUniqueNames(lu, exts):
7717
  """Generate a suitable LV name.
7718

7719
  This will generate a logical volume name for the given instance.
7720

7721
  """
7722
  results = []
7723
  for val in exts:
7724
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7725
    results.append("%s%s" % (new_id, val))
7726
  return results
7727

    
7728

    
7729
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7730
                         iv_name, p_minor, s_minor):
7731
  """Generate a drbd8 device complete with its children.
7732

7733
  """
7734
  assert len(vgnames) == len(names) == 2
7735
  port = lu.cfg.AllocatePort()
7736
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7737
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7738
                          logical_id=(vgnames[0], names[0]))
7739
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7740
                          logical_id=(vgnames[1], names[1]))
7741
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7742
                          logical_id=(primary, secondary, port,
7743
                                      p_minor, s_minor,
7744
                                      shared_secret),
7745
                          children=[dev_data, dev_meta],
7746
                          iv_name=iv_name)
7747
  return drbd_dev
7748

    
7749

    
7750
def _GenerateDiskTemplate(lu, template_name,
7751
                          instance_name, primary_node,
7752
                          secondary_nodes, disk_info,
7753
                          file_storage_dir, file_driver,
7754
                          base_index, feedback_fn):
7755
  """Generate the entire disk layout for a given template type.
7756

7757
  """
7758
  #TODO: compute space requirements
7759

    
7760
  vgname = lu.cfg.GetVGName()
7761
  disk_count = len(disk_info)
7762
  disks = []
7763
  if template_name == constants.DT_DISKLESS:
7764
    pass
7765
  elif template_name == constants.DT_PLAIN:
7766
    if len(secondary_nodes) != 0:
7767
      raise errors.ProgrammerError("Wrong template configuration")
7768

    
7769
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7770
                                      for i in range(disk_count)])
7771
    for idx, disk in enumerate(disk_info):
7772
      disk_index = idx + base_index
7773
      vg = disk.get(constants.IDISK_VG, vgname)
7774
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7775
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7776
                              size=disk[constants.IDISK_SIZE],
7777
                              logical_id=(vg, names[idx]),
7778
                              iv_name="disk/%d" % disk_index,
7779
                              mode=disk[constants.IDISK_MODE])
7780
      disks.append(disk_dev)
7781
  elif template_name == constants.DT_DRBD8:
7782
    if len(secondary_nodes) != 1:
7783
      raise errors.ProgrammerError("Wrong template configuration")
7784
    remote_node = secondary_nodes[0]
7785
    minors = lu.cfg.AllocateDRBDMinor(
7786
      [primary_node, remote_node] * len(disk_info), instance_name)
7787

    
7788
    names = []
7789
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7790
                                               for i in range(disk_count)]):
7791
      names.append(lv_prefix + "_data")
7792
      names.append(lv_prefix + "_meta")
7793
    for idx, disk in enumerate(disk_info):
7794
      disk_index = idx + base_index
7795
      data_vg = disk.get(constants.IDISK_VG, vgname)
7796
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7797
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7798
                                      disk[constants.IDISK_SIZE],
7799
                                      [data_vg, meta_vg],
7800
                                      names[idx * 2:idx * 2 + 2],
7801
                                      "disk/%d" % disk_index,
7802
                                      minors[idx * 2], minors[idx * 2 + 1])
7803
      disk_dev.mode = disk[constants.IDISK_MODE]
7804
      disks.append(disk_dev)
7805
  elif template_name == constants.DT_FILE:
7806
    if len(secondary_nodes) != 0:
7807
      raise errors.ProgrammerError("Wrong template configuration")
7808

    
7809
    opcodes.RequireFileStorage()
7810

    
7811
    for idx, disk in enumerate(disk_info):
7812
      disk_index = idx + base_index
7813
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7814
                              size=disk[constants.IDISK_SIZE],
7815
                              iv_name="disk/%d" % disk_index,
7816
                              logical_id=(file_driver,
7817
                                          "%s/disk%d" % (file_storage_dir,
7818
                                                         disk_index)),
7819
                              mode=disk[constants.IDISK_MODE])
7820
      disks.append(disk_dev)
7821
  elif template_name == constants.DT_SHARED_FILE:
7822
    if len(secondary_nodes) != 0:
7823
      raise errors.ProgrammerError("Wrong template configuration")
7824

    
7825
    opcodes.RequireSharedFileStorage()
7826

    
7827
    for idx, disk in enumerate(disk_info):
7828
      disk_index = idx + base_index
7829
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7830
                              size=disk[constants.IDISK_SIZE],
7831
                              iv_name="disk/%d" % disk_index,
7832
                              logical_id=(file_driver,
7833
                                          "%s/disk%d" % (file_storage_dir,
7834
                                                         disk_index)),
7835
                              mode=disk[constants.IDISK_MODE])
7836
      disks.append(disk_dev)
7837
  elif template_name == constants.DT_BLOCK:
7838
    if len(secondary_nodes) != 0:
7839
      raise errors.ProgrammerError("Wrong template configuration")
7840

    
7841
    for idx, disk in enumerate(disk_info):
7842
      disk_index = idx + base_index
7843
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7844
                              size=disk[constants.IDISK_SIZE],
7845
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7846
                                          disk[constants.IDISK_ADOPT]),
7847
                              iv_name="disk/%d" % disk_index,
7848
                              mode=disk[constants.IDISK_MODE])
7849
      disks.append(disk_dev)
7850

    
7851
  else:
7852
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7853
  return disks
7854

    
7855

    
7856
def _GetInstanceInfoText(instance):
7857
  """Compute that text that should be added to the disk's metadata.
7858

7859
  """
7860
  return "originstname+%s" % instance.name
7861

    
7862

    
7863
def _CalcEta(time_taken, written, total_size):
7864
  """Calculates the ETA based on size written and total size.
7865

7866
  @param time_taken: The time taken so far
7867
  @param written: amount written so far
7868
  @param total_size: The total size of data to be written
7869
  @return: The remaining time in seconds
7870

7871
  """
7872
  avg_time = time_taken / float(written)
7873
  return (total_size - written) * avg_time
7874

    
7875

    
7876
def _WipeDisks(lu, instance):
7877
  """Wipes instance disks.
7878

7879
  @type lu: L{LogicalUnit}
7880
  @param lu: the logical unit on whose behalf we execute
7881
  @type instance: L{objects.Instance}
7882
  @param instance: the instance whose disks we should create
7883
  @return: the success of the wipe
7884

7885
  """
7886
  node = instance.primary_node
7887

    
7888
  for device in instance.disks:
7889
    lu.cfg.SetDiskID(device, node)
7890

    
7891
  logging.info("Pause sync of instance %s disks", instance.name)
7892
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7893

    
7894
  for idx, success in enumerate(result.payload):
7895
    if not success:
7896
      logging.warn("pause-sync of instance %s for disks %d failed",
7897
                   instance.name, idx)
7898

    
7899
  try:
7900
    for idx, device in enumerate(instance.disks):
7901
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7902
      # MAX_WIPE_CHUNK at max
7903
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7904
                            constants.MIN_WIPE_CHUNK_PERCENT)
7905
      # we _must_ make this an int, otherwise rounding errors will
7906
      # occur
7907
      wipe_chunk_size = int(wipe_chunk_size)
7908

    
7909
      lu.LogInfo("* Wiping disk %d", idx)
7910
      logging.info("Wiping disk %d for instance %s, node %s using"
7911
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7912

    
7913
      offset = 0
7914
      size = device.size
7915
      last_output = 0
7916
      start_time = time.time()
7917

    
7918
      while offset < size:
7919
        wipe_size = min(wipe_chunk_size, size - offset)
7920
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7921
                      idx, offset, wipe_size)
7922
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7923
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7924
                     (idx, offset, wipe_size))
7925
        now = time.time()
7926
        offset += wipe_size
7927
        if now - last_output >= 60:
7928
          eta = _CalcEta(now - start_time, offset, size)
7929
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7930
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7931
          last_output = now
7932
  finally:
7933
    logging.info("Resume sync of instance %s disks", instance.name)
7934

    
7935
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7936

    
7937
    for idx, success in enumerate(result.payload):
7938
      if not success:
7939
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7940
                      " look at the status and troubleshoot the issue", idx)
7941
        logging.warn("resume-sync of instance %s for disks %d failed",
7942
                     instance.name, idx)
7943

    
7944

    
7945
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7946
  """Create all disks for an instance.
7947

7948
  This abstracts away some work from AddInstance.
7949

7950
  @type lu: L{LogicalUnit}
7951
  @param lu: the logical unit on whose behalf we execute
7952
  @type instance: L{objects.Instance}
7953
  @param instance: the instance whose disks we should create
7954
  @type to_skip: list
7955
  @param to_skip: list of indices to skip
7956
  @type target_node: string
7957
  @param target_node: if passed, overrides the target node for creation
7958
  @rtype: boolean
7959
  @return: the success of the creation
7960

7961
  """
7962
  info = _GetInstanceInfoText(instance)
7963
  if target_node is None:
7964
    pnode = instance.primary_node
7965
    all_nodes = instance.all_nodes
7966
  else:
7967
    pnode = target_node
7968
    all_nodes = [pnode]
7969

    
7970
  if instance.disk_template in constants.DTS_FILEBASED:
7971
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7972
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7973

    
7974
    result.Raise("Failed to create directory '%s' on"
7975
                 " node %s" % (file_storage_dir, pnode))
7976

    
7977
  # Note: this needs to be kept in sync with adding of disks in
7978
  # LUInstanceSetParams
7979
  for idx, device in enumerate(instance.disks):
7980
    if to_skip and idx in to_skip:
7981
      continue
7982
    logging.info("Creating volume %s for instance %s",
7983
                 device.iv_name, instance.name)
7984
    #HARDCODE
7985
    for node in all_nodes:
7986
      f_create = node == pnode
7987
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7988

    
7989

    
7990
def _RemoveDisks(lu, instance, target_node=None):
7991
  """Remove all disks for an instance.
7992

7993
  This abstracts away some work from `AddInstance()` and
7994
  `RemoveInstance()`. Note that in case some of the devices couldn't
7995
  be removed, the removal will continue with the other ones (compare
7996
  with `_CreateDisks()`).
7997

7998
  @type lu: L{LogicalUnit}
7999
  @param lu: the logical unit on whose behalf we execute
8000
  @type instance: L{objects.Instance}
8001
  @param instance: the instance whose disks we should remove
8002
  @type target_node: string
8003
  @param target_node: used to override the node on which to remove the disks
8004
  @rtype: boolean
8005
  @return: the success of the removal
8006

8007
  """
8008
  logging.info("Removing block devices for instance %s", instance.name)
8009

    
8010
  all_result = True
8011
  for device in instance.disks:
8012
    if target_node:
8013
      edata = [(target_node, device)]
8014
    else:
8015
      edata = device.ComputeNodeTree(instance.primary_node)
8016
    for node, disk in edata:
8017
      lu.cfg.SetDiskID(disk, node)
8018
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8019
      if msg:
8020
        lu.LogWarning("Could not remove block device %s on node %s,"
8021
                      " continuing anyway: %s", device.iv_name, node, msg)
8022
        all_result = False
8023

    
8024
  if instance.disk_template == constants.DT_FILE:
8025
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8026
    if target_node:
8027
      tgt = target_node
8028
    else:
8029
      tgt = instance.primary_node
8030
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8031
    if result.fail_msg:
8032
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8033
                    file_storage_dir, instance.primary_node, result.fail_msg)
8034
      all_result = False
8035

    
8036
  return all_result
8037

    
8038

    
8039
def _ComputeDiskSizePerVG(disk_template, disks):
8040
  """Compute disk size requirements in the volume group
8041

8042
  """
8043
  def _compute(disks, payload):
8044
    """Universal algorithm.
8045

8046
    """
8047
    vgs = {}
8048
    for disk in disks:
8049
      vgs[disk[constants.IDISK_VG]] = \
8050
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8051

    
8052
    return vgs
8053

    
8054
  # Required free disk space as a function of disk and swap space
8055
  req_size_dict = {
8056
    constants.DT_DISKLESS: {},
8057
    constants.DT_PLAIN: _compute(disks, 0),
8058
    # 128 MB are added for drbd metadata for each disk
8059
    constants.DT_DRBD8: _compute(disks, 128),
8060
    constants.DT_FILE: {},
8061
    constants.DT_SHARED_FILE: {},
8062
  }
8063

    
8064
  if disk_template not in req_size_dict:
8065
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8066
                                 " is unknown" % disk_template)
8067

    
8068
  return req_size_dict[disk_template]
8069

    
8070

    
8071
def _ComputeDiskSize(disk_template, disks):
8072
  """Compute disk size requirements in the volume group
8073

8074
  """
8075
  # Required free disk space as a function of disk and swap space
8076
  req_size_dict = {
8077
    constants.DT_DISKLESS: None,
8078
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8079
    # 128 MB are added for drbd metadata for each disk
8080
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8081
    constants.DT_FILE: None,
8082
    constants.DT_SHARED_FILE: 0,
8083
    constants.DT_BLOCK: 0,
8084
  }
8085

    
8086
  if disk_template not in req_size_dict:
8087
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8088
                                 " is unknown" % disk_template)
8089

    
8090
  return req_size_dict[disk_template]
8091

    
8092

    
8093
def _FilterVmNodes(lu, nodenames):
8094
  """Filters out non-vm_capable nodes from a list.
8095

8096
  @type lu: L{LogicalUnit}
8097
  @param lu: the logical unit for which we check
8098
  @type nodenames: list
8099
  @param nodenames: the list of nodes on which we should check
8100
  @rtype: list
8101
  @return: the list of vm-capable nodes
8102

8103
  """
8104
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8105
  return [name for name in nodenames if name not in vm_nodes]
8106

    
8107

    
8108
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8109
  """Hypervisor parameter validation.
8110

8111
  This function abstract the hypervisor parameter validation to be
8112
  used in both instance create and instance modify.
8113

8114
  @type lu: L{LogicalUnit}
8115
  @param lu: the logical unit for which we check
8116
  @type nodenames: list
8117
  @param nodenames: the list of nodes on which we should check
8118
  @type hvname: string
8119
  @param hvname: the name of the hypervisor we should use
8120
  @type hvparams: dict
8121
  @param hvparams: the parameters which we need to check
8122
  @raise errors.OpPrereqError: if the parameters are not valid
8123

8124
  """
8125
  nodenames = _FilterVmNodes(lu, nodenames)
8126
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8127
                                                  hvname,
8128
                                                  hvparams)
8129
  for node in nodenames:
8130
    info = hvinfo[node]
8131
    if info.offline:
8132
      continue
8133
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8134

    
8135

    
8136
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8137
  """OS parameters validation.
8138

8139
  @type lu: L{LogicalUnit}
8140
  @param lu: the logical unit for which we check
8141
  @type required: boolean
8142
  @param required: whether the validation should fail if the OS is not
8143
      found
8144
  @type nodenames: list
8145
  @param nodenames: the list of nodes on which we should check
8146
  @type osname: string
8147
  @param osname: the name of the hypervisor we should use
8148
  @type osparams: dict
8149
  @param osparams: the parameters which we need to check
8150
  @raise errors.OpPrereqError: if the parameters are not valid
8151

8152
  """
8153
  nodenames = _FilterVmNodes(lu, nodenames)
8154
  result = lu.rpc.call_os_validate(required, nodenames, osname,
8155
                                   [constants.OS_VALIDATE_PARAMETERS],
8156
                                   osparams)
8157
  for node, nres in result.items():
8158
    # we don't check for offline cases since this should be run only
8159
    # against the master node and/or an instance's nodes
8160
    nres.Raise("OS Parameters validation failed on node %s" % node)
8161
    if not nres.payload:
8162
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8163
                 osname, node)
8164

    
8165

    
8166
class LUInstanceCreate(LogicalUnit):
8167
  """Create an instance.
8168

8169
  """
8170
  HPATH = "instance-add"
8171
  HTYPE = constants.HTYPE_INSTANCE
8172
  REQ_BGL = False
8173

    
8174
  def CheckArguments(self):
8175
    """Check arguments.
8176

8177
    """
8178
    # do not require name_check to ease forward/backward compatibility
8179
    # for tools
8180
    if self.op.no_install and self.op.start:
8181
      self.LogInfo("No-installation mode selected, disabling startup")
8182
      self.op.start = False
8183
    # validate/normalize the instance name
8184
    self.op.instance_name = \
8185
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8186

    
8187
    if self.op.ip_check and not self.op.name_check:
8188
      # TODO: make the ip check more flexible and not depend on the name check
8189
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8190
                                 " check", errors.ECODE_INVAL)
8191

    
8192
    # check nics' parameter names
8193
    for nic in self.op.nics:
8194
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8195

    
8196
    # check disks. parameter names and consistent adopt/no-adopt strategy
8197
    has_adopt = has_no_adopt = False
8198
    for disk in self.op.disks:
8199
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8200
      if constants.IDISK_ADOPT in disk:
8201
        has_adopt = True
8202
      else:
8203
        has_no_adopt = True
8204
    if has_adopt and has_no_adopt:
8205
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8206
                                 errors.ECODE_INVAL)
8207
    if has_adopt:
8208
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8209
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8210
                                   " '%s' disk template" %
8211
                                   self.op.disk_template,
8212
                                   errors.ECODE_INVAL)
8213
      if self.op.iallocator is not None:
8214
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8215
                                   " iallocator script", errors.ECODE_INVAL)
8216
      if self.op.mode == constants.INSTANCE_IMPORT:
8217
        raise errors.OpPrereqError("Disk adoption not allowed for"
8218
                                   " instance import", errors.ECODE_INVAL)
8219
    else:
8220
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8221
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8222
                                   " but no 'adopt' parameter given" %
8223
                                   self.op.disk_template,
8224
                                   errors.ECODE_INVAL)
8225

    
8226
    self.adopt_disks = has_adopt
8227

    
8228
    # instance name verification
8229
    if self.op.name_check:
8230
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8231
      self.op.instance_name = self.hostname1.name
8232
      # used in CheckPrereq for ip ping check
8233
      self.check_ip = self.hostname1.ip
8234
    else:
8235
      self.check_ip = None
8236

    
8237
    # file storage checks
8238
    if (self.op.file_driver and
8239
        not self.op.file_driver in constants.FILE_DRIVER):
8240
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8241
                                 self.op.file_driver, errors.ECODE_INVAL)
8242

    
8243
    if self.op.disk_template == constants.DT_FILE:
8244
      opcodes.RequireFileStorage()
8245
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8246
      opcodes.RequireSharedFileStorage()
8247

    
8248
    ### Node/iallocator related checks
8249
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8250

    
8251
    if self.op.pnode is not None:
8252
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8253
        if self.op.snode is None:
8254
          raise errors.OpPrereqError("The networked disk templates need"
8255
                                     " a mirror node", errors.ECODE_INVAL)
8256
      elif self.op.snode:
8257
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8258
                        " template")
8259
        self.op.snode = None
8260

    
8261
    self._cds = _GetClusterDomainSecret()
8262

    
8263
    if self.op.mode == constants.INSTANCE_IMPORT:
8264
      # On import force_variant must be True, because if we forced it at
8265
      # initial install, our only chance when importing it back is that it
8266
      # works again!
8267
      self.op.force_variant = True
8268

    
8269
      if self.op.no_install:
8270
        self.LogInfo("No-installation mode has no effect during import")
8271

    
8272
    elif self.op.mode == constants.INSTANCE_CREATE:
8273
      if self.op.os_type is None:
8274
        raise errors.OpPrereqError("No guest OS specified",
8275
                                   errors.ECODE_INVAL)
8276
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8277
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8278
                                   " installation" % self.op.os_type,
8279
                                   errors.ECODE_STATE)
8280
      if self.op.disk_template is None:
8281
        raise errors.OpPrereqError("No disk template specified",
8282
                                   errors.ECODE_INVAL)
8283

    
8284
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8285
      # Check handshake to ensure both clusters have the same domain secret
8286
      src_handshake = self.op.source_handshake
8287
      if not src_handshake:
8288
        raise errors.OpPrereqError("Missing source handshake",
8289
                                   errors.ECODE_INVAL)
8290

    
8291
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8292
                                                           src_handshake)
8293
      if errmsg:
8294
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8295
                                   errors.ECODE_INVAL)
8296

    
8297
      # Load and check source CA
8298
      self.source_x509_ca_pem = self.op.source_x509_ca
8299
      if not self.source_x509_ca_pem:
8300
        raise errors.OpPrereqError("Missing source X509 CA",
8301
                                   errors.ECODE_INVAL)
8302

    
8303
      try:
8304
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8305
                                                    self._cds)
8306
      except OpenSSL.crypto.Error, err:
8307
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8308
                                   (err, ), errors.ECODE_INVAL)
8309

    
8310
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8311
      if errcode is not None:
8312
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8313
                                   errors.ECODE_INVAL)
8314

    
8315
      self.source_x509_ca = cert
8316

    
8317
      src_instance_name = self.op.source_instance_name
8318
      if not src_instance_name:
8319
        raise errors.OpPrereqError("Missing source instance name",
8320
                                   errors.ECODE_INVAL)
8321

    
8322
      self.source_instance_name = \
8323
          netutils.GetHostname(name=src_instance_name).name
8324

    
8325
    else:
8326
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8327
                                 self.op.mode, errors.ECODE_INVAL)
8328

    
8329
  def ExpandNames(self):
8330
    """ExpandNames for CreateInstance.
8331

8332
    Figure out the right locks for instance creation.
8333

8334
    """
8335
    self.needed_locks = {}
8336

    
8337
    instance_name = self.op.instance_name
8338
    # this is just a preventive check, but someone might still add this
8339
    # instance in the meantime, and creation will fail at lock-add time
8340
    if instance_name in self.cfg.GetInstanceList():
8341
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8342
                                 instance_name, errors.ECODE_EXISTS)
8343

    
8344
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8345

    
8346
    if self.op.iallocator:
8347
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8348
    else:
8349
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8350
      nodelist = [self.op.pnode]
8351
      if self.op.snode is not None:
8352
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8353
        nodelist.append(self.op.snode)
8354
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8355

    
8356
    # in case of import lock the source node too
8357
    if self.op.mode == constants.INSTANCE_IMPORT:
8358
      src_node = self.op.src_node
8359
      src_path = self.op.src_path
8360

    
8361
      if src_path is None:
8362
        self.op.src_path = src_path = self.op.instance_name
8363

    
8364
      if src_node is None:
8365
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8366
        self.op.src_node = None
8367
        if os.path.isabs(src_path):
8368
          raise errors.OpPrereqError("Importing an instance from a path"
8369
                                     " requires a source node option",
8370
                                     errors.ECODE_INVAL)
8371
      else:
8372
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8373
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8374
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8375
        if not os.path.isabs(src_path):
8376
          self.op.src_path = src_path = \
8377
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8378

    
8379
  def _RunAllocator(self):
8380
    """Run the allocator based on input opcode.
8381

8382
    """
8383
    nics = [n.ToDict() for n in self.nics]
8384
    ial = IAllocator(self.cfg, self.rpc,
8385
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8386
                     name=self.op.instance_name,
8387
                     disk_template=self.op.disk_template,
8388
                     tags=self.op.tags,
8389
                     os=self.op.os_type,
8390
                     vcpus=self.be_full[constants.BE_VCPUS],
8391
                     memory=self.be_full[constants.BE_MEMORY],
8392
                     disks=self.disks,
8393
                     nics=nics,
8394
                     hypervisor=self.op.hypervisor,
8395
                     )
8396

    
8397
    ial.Run(self.op.iallocator)
8398

    
8399
    if not ial.success:
8400
      raise errors.OpPrereqError("Can't compute nodes using"
8401
                                 " iallocator '%s': %s" %
8402
                                 (self.op.iallocator, ial.info),
8403
                                 errors.ECODE_NORES)
8404
    if len(ial.result) != ial.required_nodes:
8405
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8406
                                 " of nodes (%s), required %s" %
8407
                                 (self.op.iallocator, len(ial.result),
8408
                                  ial.required_nodes), errors.ECODE_FAULT)
8409
    self.op.pnode = ial.result[0]
8410
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8411
                 self.op.instance_name, self.op.iallocator,
8412
                 utils.CommaJoin(ial.result))
8413
    if ial.required_nodes == 2:
8414
      self.op.snode = ial.result[1]
8415

    
8416
  def BuildHooksEnv(self):
8417
    """Build hooks env.
8418

8419
    This runs on master, primary and secondary nodes of the instance.
8420

8421
    """
8422
    env = {
8423
      "ADD_MODE": self.op.mode,
8424
      }
8425
    if self.op.mode == constants.INSTANCE_IMPORT:
8426
      env["SRC_NODE"] = self.op.src_node
8427
      env["SRC_PATH"] = self.op.src_path
8428
      env["SRC_IMAGES"] = self.src_images
8429

    
8430
    env.update(_BuildInstanceHookEnv(
8431
      name=self.op.instance_name,
8432
      primary_node=self.op.pnode,
8433
      secondary_nodes=self.secondaries,
8434
      status=self.op.start,
8435
      os_type=self.op.os_type,
8436
      memory=self.be_full[constants.BE_MEMORY],
8437
      vcpus=self.be_full[constants.BE_VCPUS],
8438
      nics=_NICListToTuple(self, self.nics),
8439
      disk_template=self.op.disk_template,
8440
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8441
             for d in self.disks],
8442
      bep=self.be_full,
8443
      hvp=self.hv_full,
8444
      hypervisor_name=self.op.hypervisor,
8445
      tags=self.op.tags,
8446
    ))
8447

    
8448
    return env
8449

    
8450
  def BuildHooksNodes(self):
8451
    """Build hooks nodes.
8452

8453
    """
8454
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8455
    return nl, nl
8456

    
8457
  def _ReadExportInfo(self):
8458
    """Reads the export information from disk.
8459

8460
    It will override the opcode source node and path with the actual
8461
    information, if these two were not specified before.
8462

8463
    @return: the export information
8464

8465
    """
8466
    assert self.op.mode == constants.INSTANCE_IMPORT
8467

    
8468
    src_node = self.op.src_node
8469
    src_path = self.op.src_path
8470

    
8471
    if src_node is None:
8472
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8473
      exp_list = self.rpc.call_export_list(locked_nodes)
8474
      found = False
8475
      for node in exp_list:
8476
        if exp_list[node].fail_msg:
8477
          continue
8478
        if src_path in exp_list[node].payload:
8479
          found = True
8480
          self.op.src_node = src_node = node
8481
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8482
                                                       src_path)
8483
          break
8484
      if not found:
8485
        raise errors.OpPrereqError("No export found for relative path %s" %
8486
                                    src_path, errors.ECODE_INVAL)
8487

    
8488
    _CheckNodeOnline(self, src_node)
8489
    result = self.rpc.call_export_info(src_node, src_path)
8490
    result.Raise("No export or invalid export found in dir %s" % src_path)
8491

    
8492
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8493
    if not export_info.has_section(constants.INISECT_EXP):
8494
      raise errors.ProgrammerError("Corrupted export config",
8495
                                   errors.ECODE_ENVIRON)
8496

    
8497
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8498
    if (int(ei_version) != constants.EXPORT_VERSION):
8499
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8500
                                 (ei_version, constants.EXPORT_VERSION),
8501
                                 errors.ECODE_ENVIRON)
8502
    return export_info
8503

    
8504
  def _ReadExportParams(self, einfo):
8505
    """Use export parameters as defaults.
8506

8507
    In case the opcode doesn't specify (as in override) some instance
8508
    parameters, then try to use them from the export information, if
8509
    that declares them.
8510

8511
    """
8512
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8513

    
8514
    if self.op.disk_template is None:
8515
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8516
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8517
                                          "disk_template")
8518
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8519
          raise errors.OpPrereqError("Disk template specified in configuration"
8520
                                     " file is not one of the allowed values:"
8521
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8522
      else:
8523
        raise errors.OpPrereqError("No disk template specified and the export"
8524
                                   " is missing the disk_template information",
8525
                                   errors.ECODE_INVAL)
8526

    
8527
    if not self.op.disks:
8528
      disks = []
8529
      # TODO: import the disk iv_name too
8530
      for idx in range(constants.MAX_DISKS):
8531
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8532
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8533
          disks.append({constants.IDISK_SIZE: disk_sz})
8534
      self.op.disks = disks
8535
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8536
        raise errors.OpPrereqError("No disk info specified and the export"
8537
                                   " is missing the disk information",
8538
                                   errors.ECODE_INVAL)
8539

    
8540
    if not self.op.nics:
8541
      nics = []
8542
      for idx in range(constants.MAX_NICS):
8543
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8544
          ndict = {}
8545
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8546
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8547
            ndict[name] = v
8548
          nics.append(ndict)
8549
        else:
8550
          break
8551
      self.op.nics = nics
8552

    
8553
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8554
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8555

    
8556
    if (self.op.hypervisor is None and
8557
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8558
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8559

    
8560
    if einfo.has_section(constants.INISECT_HYP):
8561
      # use the export parameters but do not override the ones
8562
      # specified by the user
8563
      for name, value in einfo.items(constants.INISECT_HYP):
8564
        if name not in self.op.hvparams:
8565
          self.op.hvparams[name] = value
8566

    
8567
    if einfo.has_section(constants.INISECT_BEP):
8568
      # use the parameters, without overriding
8569
      for name, value in einfo.items(constants.INISECT_BEP):
8570
        if name not in self.op.beparams:
8571
          self.op.beparams[name] = value
8572
    else:
8573
      # try to read the parameters old style, from the main section
8574
      for name in constants.BES_PARAMETERS:
8575
        if (name not in self.op.beparams and
8576
            einfo.has_option(constants.INISECT_INS, name)):
8577
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8578

    
8579
    if einfo.has_section(constants.INISECT_OSP):
8580
      # use the parameters, without overriding
8581
      for name, value in einfo.items(constants.INISECT_OSP):
8582
        if name not in self.op.osparams:
8583
          self.op.osparams[name] = value
8584

    
8585
  def _RevertToDefaults(self, cluster):
8586
    """Revert the instance parameters to the default values.
8587

8588
    """
8589
    # hvparams
8590
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8591
    for name in self.op.hvparams.keys():
8592
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8593
        del self.op.hvparams[name]
8594
    # beparams
8595
    be_defs = cluster.SimpleFillBE({})
8596
    for name in self.op.beparams.keys():
8597
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8598
        del self.op.beparams[name]
8599
    # nic params
8600
    nic_defs = cluster.SimpleFillNIC({})
8601
    for nic in self.op.nics:
8602
      for name in constants.NICS_PARAMETERS:
8603
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8604
          del nic[name]
8605
    # osparams
8606
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8607
    for name in self.op.osparams.keys():
8608
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8609
        del self.op.osparams[name]
8610

    
8611
  def _CalculateFileStorageDir(self):
8612
    """Calculate final instance file storage dir.
8613

8614
    """
8615
    # file storage dir calculation/check
8616
    self.instance_file_storage_dir = None
8617
    if self.op.disk_template in constants.DTS_FILEBASED:
8618
      # build the full file storage dir path
8619
      joinargs = []
8620

    
8621
      if self.op.disk_template == constants.DT_SHARED_FILE:
8622
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8623
      else:
8624
        get_fsd_fn = self.cfg.GetFileStorageDir
8625

    
8626
      cfg_storagedir = get_fsd_fn()
8627
      if not cfg_storagedir:
8628
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8629
      joinargs.append(cfg_storagedir)
8630

    
8631
      if self.op.file_storage_dir is not None:
8632
        joinargs.append(self.op.file_storage_dir)
8633

    
8634
      joinargs.append(self.op.instance_name)
8635

    
8636
      # pylint: disable=W0142
8637
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8638

    
8639
  def CheckPrereq(self):
8640
    """Check prerequisites.
8641

8642
    """
8643
    self._CalculateFileStorageDir()
8644

    
8645
    if self.op.mode == constants.INSTANCE_IMPORT:
8646
      export_info = self._ReadExportInfo()
8647
      self._ReadExportParams(export_info)
8648

    
8649
    if (not self.cfg.GetVGName() and
8650
        self.op.disk_template not in constants.DTS_NOT_LVM):
8651
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8652
                                 " instances", errors.ECODE_STATE)
8653

    
8654
    if (self.op.hypervisor is None or
8655
        self.op.hypervisor == constants.VALUE_AUTO):
8656
      self.op.hypervisor = self.cfg.GetHypervisorType()
8657

    
8658
    cluster = self.cfg.GetClusterInfo()
8659
    enabled_hvs = cluster.enabled_hypervisors
8660
    if self.op.hypervisor not in enabled_hvs:
8661
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8662
                                 " cluster (%s)" % (self.op.hypervisor,
8663
                                  ",".join(enabled_hvs)),
8664
                                 errors.ECODE_STATE)
8665

    
8666
    # Check tag validity
8667
    for tag in self.op.tags:
8668
      objects.TaggableObject.ValidateTag(tag)
8669

    
8670
    # check hypervisor parameter syntax (locally)
8671
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8672
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8673
                                      self.op.hvparams)
8674
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8675
    hv_type.CheckParameterSyntax(filled_hvp)
8676
    self.hv_full = filled_hvp
8677
    # check that we don't specify global parameters on an instance
8678
    _CheckGlobalHvParams(self.op.hvparams)
8679

    
8680
    # fill and remember the beparams dict
8681
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
8682
    for param, value in self.op.beparams.iteritems():
8683
      if value == constants.VALUE_AUTO:
8684
        self.op.beparams[param] = default_beparams[param]
8685
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8686
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8687

    
8688
    # build os parameters
8689
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8690

    
8691
    # now that hvp/bep are in final format, let's reset to defaults,
8692
    # if told to do so
8693
    if self.op.identify_defaults:
8694
      self._RevertToDefaults(cluster)
8695

    
8696
    # NIC buildup
8697
    self.nics = []
8698
    for idx, nic in enumerate(self.op.nics):
8699
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8700
      nic_mode = nic_mode_req
8701
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8702
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8703

    
8704
      # in routed mode, for the first nic, the default ip is 'auto'
8705
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8706
        default_ip_mode = constants.VALUE_AUTO
8707
      else:
8708
        default_ip_mode = constants.VALUE_NONE
8709

    
8710
      # ip validity checks
8711
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8712
      if ip is None or ip.lower() == constants.VALUE_NONE:
8713
        nic_ip = None
8714
      elif ip.lower() == constants.VALUE_AUTO:
8715
        if not self.op.name_check:
8716
          raise errors.OpPrereqError("IP address set to auto but name checks"
8717
                                     " have been skipped",
8718
                                     errors.ECODE_INVAL)
8719
        nic_ip = self.hostname1.ip
8720
      else:
8721
        if not netutils.IPAddress.IsValid(ip):
8722
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8723
                                     errors.ECODE_INVAL)
8724
        nic_ip = ip
8725

    
8726
      # TODO: check the ip address for uniqueness
8727
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8728
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8729
                                   errors.ECODE_INVAL)
8730

    
8731
      # MAC address verification
8732
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8733
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8734
        mac = utils.NormalizeAndValidateMac(mac)
8735

    
8736
        try:
8737
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8738
        except errors.ReservationError:
8739
          raise errors.OpPrereqError("MAC address %s already in use"
8740
                                     " in cluster" % mac,
8741
                                     errors.ECODE_NOTUNIQUE)
8742

    
8743
      #  Build nic parameters
8744
      link = nic.get(constants.INIC_LINK, None)
8745
      if link == constants.VALUE_AUTO:
8746
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8747
      nicparams = {}
8748
      if nic_mode_req:
8749
        nicparams[constants.NIC_MODE] = nic_mode
8750
      if link:
8751
        nicparams[constants.NIC_LINK] = link
8752

    
8753
      check_params = cluster.SimpleFillNIC(nicparams)
8754
      objects.NIC.CheckParameterSyntax(check_params)
8755
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8756

    
8757
    # disk checks/pre-build
8758
    default_vg = self.cfg.GetVGName()
8759
    self.disks = []
8760
    for disk in self.op.disks:
8761
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8762
      if mode not in constants.DISK_ACCESS_SET:
8763
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8764
                                   mode, errors.ECODE_INVAL)
8765
      size = disk.get(constants.IDISK_SIZE, None)
8766
      if size is None:
8767
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8768
      try:
8769
        size = int(size)
8770
      except (TypeError, ValueError):
8771
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8772
                                   errors.ECODE_INVAL)
8773

    
8774
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8775
      new_disk = {
8776
        constants.IDISK_SIZE: size,
8777
        constants.IDISK_MODE: mode,
8778
        constants.IDISK_VG: data_vg,
8779
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8780
        }
8781
      if constants.IDISK_ADOPT in disk:
8782
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8783
      self.disks.append(new_disk)
8784

    
8785
    if self.op.mode == constants.INSTANCE_IMPORT:
8786
      disk_images = []
8787
      for idx in range(len(self.disks)):
8788
        option = "disk%d_dump" % idx
8789
        if export_info.has_option(constants.INISECT_INS, option):
8790
          # FIXME: are the old os-es, disk sizes, etc. useful?
8791
          export_name = export_info.get(constants.INISECT_INS, option)
8792
          image = utils.PathJoin(self.op.src_path, export_name)
8793
          disk_images.append(image)
8794
        else:
8795
          disk_images.append(False)
8796

    
8797
      self.src_images = disk_images
8798

    
8799
      old_name = export_info.get(constants.INISECT_INS, "name")
8800
      if self.op.instance_name == old_name:
8801
        for idx, nic in enumerate(self.nics):
8802
          if nic.mac == constants.VALUE_AUTO:
8803
            nic_mac_ini = "nic%d_mac" % idx
8804
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8805

    
8806
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8807

    
8808
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8809
    if self.op.ip_check:
8810
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8811
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8812
                                   (self.check_ip, self.op.instance_name),
8813
                                   errors.ECODE_NOTUNIQUE)
8814

    
8815
    #### mac address generation
8816
    # By generating here the mac address both the allocator and the hooks get
8817
    # the real final mac address rather than the 'auto' or 'generate' value.
8818
    # There is a race condition between the generation and the instance object
8819
    # creation, which means that we know the mac is valid now, but we're not
8820
    # sure it will be when we actually add the instance. If things go bad
8821
    # adding the instance will abort because of a duplicate mac, and the
8822
    # creation job will fail.
8823
    for nic in self.nics:
8824
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8825
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8826

    
8827
    #### allocator run
8828

    
8829
    if self.op.iallocator is not None:
8830
      self._RunAllocator()
8831

    
8832
    #### node related checks
8833

    
8834
    # check primary node
8835
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8836
    assert self.pnode is not None, \
8837
      "Cannot retrieve locked node %s" % self.op.pnode
8838
    if pnode.offline:
8839
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8840
                                 pnode.name, errors.ECODE_STATE)
8841
    if pnode.drained:
8842
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8843
                                 pnode.name, errors.ECODE_STATE)
8844
    if not pnode.vm_capable:
8845
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8846
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8847

    
8848
    self.secondaries = []
8849

    
8850
    # mirror node verification
8851
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8852
      if self.op.snode == pnode.name:
8853
        raise errors.OpPrereqError("The secondary node cannot be the"
8854
                                   " primary node", errors.ECODE_INVAL)
8855
      _CheckNodeOnline(self, self.op.snode)
8856
      _CheckNodeNotDrained(self, self.op.snode)
8857
      _CheckNodeVmCapable(self, self.op.snode)
8858
      self.secondaries.append(self.op.snode)
8859

    
8860
    nodenames = [pnode.name] + self.secondaries
8861

    
8862
    if not self.adopt_disks:
8863
      # Check lv size requirements, if not adopting
8864
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8865
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8866

    
8867
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8868
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8869
                                disk[constants.IDISK_ADOPT])
8870
                     for disk in self.disks])
8871
      if len(all_lvs) != len(self.disks):
8872
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8873
                                   errors.ECODE_INVAL)
8874
      for lv_name in all_lvs:
8875
        try:
8876
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8877
          # to ReserveLV uses the same syntax
8878
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8879
        except errors.ReservationError:
8880
          raise errors.OpPrereqError("LV named %s used by another instance" %
8881
                                     lv_name, errors.ECODE_NOTUNIQUE)
8882

    
8883
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8884
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8885

    
8886
      node_lvs = self.rpc.call_lv_list([pnode.name],
8887
                                       vg_names.payload.keys())[pnode.name]
8888
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8889
      node_lvs = node_lvs.payload
8890

    
8891
      delta = all_lvs.difference(node_lvs.keys())
8892
      if delta:
8893
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8894
                                   utils.CommaJoin(delta),
8895
                                   errors.ECODE_INVAL)
8896
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8897
      if online_lvs:
8898
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8899
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8900
                                   errors.ECODE_STATE)
8901
      # update the size of disk based on what is found
8902
      for dsk in self.disks:
8903
        dsk[constants.IDISK_SIZE] = \
8904
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8905
                                        dsk[constants.IDISK_ADOPT])][0]))
8906

    
8907
    elif self.op.disk_template == constants.DT_BLOCK:
8908
      # Normalize and de-duplicate device paths
8909
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8910
                       for disk in self.disks])
8911
      if len(all_disks) != len(self.disks):
8912
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8913
                                   errors.ECODE_INVAL)
8914
      baddisks = [d for d in all_disks
8915
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8916
      if baddisks:
8917
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8918
                                   " cannot be adopted" %
8919
                                   (", ".join(baddisks),
8920
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8921
                                   errors.ECODE_INVAL)
8922

    
8923
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8924
                                            list(all_disks))[pnode.name]
8925
      node_disks.Raise("Cannot get block device information from node %s" %
8926
                       pnode.name)
8927
      node_disks = node_disks.payload
8928
      delta = all_disks.difference(node_disks.keys())
8929
      if delta:
8930
        raise errors.OpPrereqError("Missing block device(s): %s" %
8931
                                   utils.CommaJoin(delta),
8932
                                   errors.ECODE_INVAL)
8933
      for dsk in self.disks:
8934
        dsk[constants.IDISK_SIZE] = \
8935
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8936

    
8937
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8938

    
8939
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8940
    # check OS parameters (remotely)
8941
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8942

    
8943
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8944

    
8945
    # memory check on primary node
8946
    if self.op.start:
8947
      _CheckNodeFreeMemory(self, self.pnode.name,
8948
                           "creating instance %s" % self.op.instance_name,
8949
                           self.be_full[constants.BE_MEMORY],
8950
                           self.op.hypervisor)
8951

    
8952
    self.dry_run_result = list(nodenames)
8953

    
8954
  def Exec(self, feedback_fn):
8955
    """Create and add the instance to the cluster.
8956

8957
    """
8958
    instance = self.op.instance_name
8959
    pnode_name = self.pnode.name
8960

    
8961
    ht_kind = self.op.hypervisor
8962
    if ht_kind in constants.HTS_REQ_PORT:
8963
      network_port = self.cfg.AllocatePort()
8964
    else:
8965
      network_port = None
8966

    
8967
    disks = _GenerateDiskTemplate(self,
8968
                                  self.op.disk_template,
8969
                                  instance, pnode_name,
8970
                                  self.secondaries,
8971
                                  self.disks,
8972
                                  self.instance_file_storage_dir,
8973
                                  self.op.file_driver,
8974
                                  0,
8975
                                  feedback_fn)
8976

    
8977
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8978
                            primary_node=pnode_name,
8979
                            nics=self.nics, disks=disks,
8980
                            disk_template=self.op.disk_template,
8981
                            admin_up=False,
8982
                            network_port=network_port,
8983
                            beparams=self.op.beparams,
8984
                            hvparams=self.op.hvparams,
8985
                            hypervisor=self.op.hypervisor,
8986
                            osparams=self.op.osparams,
8987
                            )
8988

    
8989
    if self.op.tags:
8990
      for tag in self.op.tags:
8991
        iobj.AddTag(tag)
8992

    
8993
    if self.adopt_disks:
8994
      if self.op.disk_template == constants.DT_PLAIN:
8995
        # rename LVs to the newly-generated names; we need to construct
8996
        # 'fake' LV disks with the old data, plus the new unique_id
8997
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8998
        rename_to = []
8999
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9000
          rename_to.append(t_dsk.logical_id)
9001
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9002
          self.cfg.SetDiskID(t_dsk, pnode_name)
9003
        result = self.rpc.call_blockdev_rename(pnode_name,
9004
                                               zip(tmp_disks, rename_to))
9005
        result.Raise("Failed to rename adoped LVs")
9006
    else:
9007
      feedback_fn("* creating instance disks...")
9008
      try:
9009
        _CreateDisks(self, iobj)
9010
      except errors.OpExecError:
9011
        self.LogWarning("Device creation failed, reverting...")
9012
        try:
9013
          _RemoveDisks(self, iobj)
9014
        finally:
9015
          self.cfg.ReleaseDRBDMinors(instance)
9016
          raise
9017

    
9018
    feedback_fn("adding instance %s to cluster config" % instance)
9019

    
9020
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9021

    
9022
    # Declare that we don't want to remove the instance lock anymore, as we've
9023
    # added the instance to the config
9024
    del self.remove_locks[locking.LEVEL_INSTANCE]
9025

    
9026
    if self.op.mode == constants.INSTANCE_IMPORT:
9027
      # Release unused nodes
9028
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9029
    else:
9030
      # Release all nodes
9031
      _ReleaseLocks(self, locking.LEVEL_NODE)
9032

    
9033
    disk_abort = False
9034
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9035
      feedback_fn("* wiping instance disks...")
9036
      try:
9037
        _WipeDisks(self, iobj)
9038
      except errors.OpExecError, err:
9039
        logging.exception("Wiping disks failed")
9040
        self.LogWarning("Wiping instance disks failed (%s)", err)
9041
        disk_abort = True
9042

    
9043
    if disk_abort:
9044
      # Something is already wrong with the disks, don't do anything else
9045
      pass
9046
    elif self.op.wait_for_sync:
9047
      disk_abort = not _WaitForSync(self, iobj)
9048
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9049
      # make sure the disks are not degraded (still sync-ing is ok)
9050
      feedback_fn("* checking mirrors status")
9051
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9052
    else:
9053
      disk_abort = False
9054

    
9055
    if disk_abort:
9056
      _RemoveDisks(self, iobj)
9057
      self.cfg.RemoveInstance(iobj.name)
9058
      # Make sure the instance lock gets removed
9059
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9060
      raise errors.OpExecError("There are some degraded disks for"
9061
                               " this instance")
9062

    
9063
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9064
      if self.op.mode == constants.INSTANCE_CREATE:
9065
        if not self.op.no_install:
9066
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9067
                        not self.op.wait_for_sync)
9068
          if pause_sync:
9069
            feedback_fn("* pausing disk sync to install instance OS")
9070
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9071
                                                              iobj.disks, True)
9072
            for idx, success in enumerate(result.payload):
9073
              if not success:
9074
                logging.warn("pause-sync of instance %s for disk %d failed",
9075
                             instance, idx)
9076

    
9077
          feedback_fn("* running the instance OS create scripts...")
9078
          # FIXME: pass debug option from opcode to backend
9079
          os_add_result = \
9080
            self.rpc.call_instance_os_add(pnode_name, iobj, False,
9081
                                          self.op.debug_level)
9082
          if pause_sync:
9083
            feedback_fn("* resuming disk sync")
9084
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9085
                                                              iobj.disks, False)
9086
            for idx, success in enumerate(result.payload):
9087
              if not success:
9088
                logging.warn("resume-sync of instance %s for disk %d failed",
9089
                             instance, idx)
9090

    
9091
          os_add_result.Raise("Could not add os for instance %s"
9092
                              " on node %s" % (instance, pnode_name))
9093

    
9094
      elif self.op.mode == constants.INSTANCE_IMPORT:
9095
        feedback_fn("* running the instance OS import scripts...")
9096

    
9097
        transfers = []
9098

    
9099
        for idx, image in enumerate(self.src_images):
9100
          if not image:
9101
            continue
9102

    
9103
          # FIXME: pass debug option from opcode to backend
9104
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9105
                                             constants.IEIO_FILE, (image, ),
9106
                                             constants.IEIO_SCRIPT,
9107
                                             (iobj.disks[idx], idx),
9108
                                             None)
9109
          transfers.append(dt)
9110

    
9111
        import_result = \
9112
          masterd.instance.TransferInstanceData(self, feedback_fn,
9113
                                                self.op.src_node, pnode_name,
9114
                                                self.pnode.secondary_ip,
9115
                                                iobj, transfers)
9116
        if not compat.all(import_result):
9117
          self.LogWarning("Some disks for instance %s on node %s were not"
9118
                          " imported successfully" % (instance, pnode_name))
9119

    
9120
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9121
        feedback_fn("* preparing remote import...")
9122
        # The source cluster will stop the instance before attempting to make a
9123
        # connection. In some cases stopping an instance can take a long time,
9124
        # hence the shutdown timeout is added to the connection timeout.
9125
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9126
                           self.op.source_shutdown_timeout)
9127
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9128

    
9129
        assert iobj.primary_node == self.pnode.name
9130
        disk_results = \
9131
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9132
                                        self.source_x509_ca,
9133
                                        self._cds, timeouts)
9134
        if not compat.all(disk_results):
9135
          # TODO: Should the instance still be started, even if some disks
9136
          # failed to import (valid for local imports, too)?
9137
          self.LogWarning("Some disks for instance %s on node %s were not"
9138
                          " imported successfully" % (instance, pnode_name))
9139

    
9140
        # Run rename script on newly imported instance
9141
        assert iobj.name == instance
9142
        feedback_fn("Running rename script for %s" % instance)
9143
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9144
                                                   self.source_instance_name,
9145
                                                   self.op.debug_level)
9146
        if result.fail_msg:
9147
          self.LogWarning("Failed to run rename script for %s on node"
9148
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9149

    
9150
      else:
9151
        # also checked in the prereq part
9152
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9153
                                     % self.op.mode)
9154

    
9155
    if self.op.start:
9156
      iobj.admin_up = True
9157
      self.cfg.Update(iobj, feedback_fn)
9158
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9159
      feedback_fn("* starting instance...")
9160
      result = self.rpc.call_instance_start(pnode_name, iobj,
9161
                                            None, None, False)
9162
      result.Raise("Could not start instance")
9163

    
9164
    return list(iobj.all_nodes)
9165

    
9166

    
9167
class LUInstanceConsole(NoHooksLU):
9168
  """Connect to an instance's console.
9169

9170
  This is somewhat special in that it returns the command line that
9171
  you need to run on the master node in order to connect to the
9172
  console.
9173

9174
  """
9175
  REQ_BGL = False
9176

    
9177
  def ExpandNames(self):
9178
    self._ExpandAndLockInstance()
9179

    
9180
  def CheckPrereq(self):
9181
    """Check prerequisites.
9182

9183
    This checks that the instance is in the cluster.
9184

9185
    """
9186
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9187
    assert self.instance is not None, \
9188
      "Cannot retrieve locked instance %s" % self.op.instance_name
9189
    _CheckNodeOnline(self, self.instance.primary_node)
9190

    
9191
  def Exec(self, feedback_fn):
9192
    """Connect to the console of an instance
9193

9194
    """
9195
    instance = self.instance
9196
    node = instance.primary_node
9197

    
9198
    node_insts = self.rpc.call_instance_list([node],
9199
                                             [instance.hypervisor])[node]
9200
    node_insts.Raise("Can't get node information from %s" % node)
9201

    
9202
    if instance.name not in node_insts.payload:
9203
      if instance.admin_up:
9204
        state = constants.INSTST_ERRORDOWN
9205
      else:
9206
        state = constants.INSTST_ADMINDOWN
9207
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9208
                               (instance.name, state))
9209

    
9210
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9211

    
9212
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9213

    
9214

    
9215
def _GetInstanceConsole(cluster, instance):
9216
  """Returns console information for an instance.
9217

9218
  @type cluster: L{objects.Cluster}
9219
  @type instance: L{objects.Instance}
9220
  @rtype: dict
9221

9222
  """
9223
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9224
  # beparams and hvparams are passed separately, to avoid editing the
9225
  # instance and then saving the defaults in the instance itself.
9226
  hvparams = cluster.FillHV(instance)
9227
  beparams = cluster.FillBE(instance)
9228
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9229

    
9230
  assert console.instance == instance.name
9231
  assert console.Validate()
9232

    
9233
  return console.ToDict()
9234

    
9235

    
9236
class LUInstanceReplaceDisks(LogicalUnit):
9237
  """Replace the disks of an instance.
9238

9239
  """
9240
  HPATH = "mirrors-replace"
9241
  HTYPE = constants.HTYPE_INSTANCE
9242
  REQ_BGL = False
9243

    
9244
  def CheckArguments(self):
9245
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9246
                                  self.op.iallocator)
9247

    
9248
  def ExpandNames(self):
9249
    self._ExpandAndLockInstance()
9250

    
9251
    assert locking.LEVEL_NODE not in self.needed_locks
9252
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9253

    
9254
    assert self.op.iallocator is None or self.op.remote_node is None, \
9255
      "Conflicting options"
9256

    
9257
    if self.op.remote_node is not None:
9258
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9259

    
9260
      # Warning: do not remove the locking of the new secondary here
9261
      # unless DRBD8.AddChildren is changed to work in parallel;
9262
      # currently it doesn't since parallel invocations of
9263
      # FindUnusedMinor will conflict
9264
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9265
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9266
    else:
9267
      self.needed_locks[locking.LEVEL_NODE] = []
9268
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9269

    
9270
      if self.op.iallocator is not None:
9271
        # iallocator will select a new node in the same group
9272
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9273

    
9274
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9275
                                   self.op.iallocator, self.op.remote_node,
9276
                                   self.op.disks, False, self.op.early_release)
9277

    
9278
    self.tasklets = [self.replacer]
9279

    
9280
  def DeclareLocks(self, level):
9281
    if level == locking.LEVEL_NODEGROUP:
9282
      assert self.op.remote_node is None
9283
      assert self.op.iallocator is not None
9284
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9285

    
9286
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9287
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9288
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9289

    
9290
    elif level == locking.LEVEL_NODE:
9291
      if self.op.iallocator is not None:
9292
        assert self.op.remote_node is None
9293
        assert not self.needed_locks[locking.LEVEL_NODE]
9294

    
9295
        # Lock member nodes of all locked groups
9296
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9297
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9298
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9299
      else:
9300
        self._LockInstancesNodes()
9301

    
9302
  def BuildHooksEnv(self):
9303
    """Build hooks env.
9304

9305
    This runs on the master, the primary and all the secondaries.
9306

9307
    """
9308
    instance = self.replacer.instance
9309
    env = {
9310
      "MODE": self.op.mode,
9311
      "NEW_SECONDARY": self.op.remote_node,
9312
      "OLD_SECONDARY": instance.secondary_nodes[0],
9313
      }
9314
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9315
    return env
9316

    
9317
  def BuildHooksNodes(self):
9318
    """Build hooks nodes.
9319

9320
    """
9321
    instance = self.replacer.instance
9322
    nl = [
9323
      self.cfg.GetMasterNode(),
9324
      instance.primary_node,
9325
      ]
9326
    if self.op.remote_node is not None:
9327
      nl.append(self.op.remote_node)
9328
    return nl, nl
9329

    
9330
  def CheckPrereq(self):
9331
    """Check prerequisites.
9332

9333
    """
9334
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9335
            self.op.iallocator is None)
9336

    
9337
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9338
    if owned_groups:
9339
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9340

    
9341
    return LogicalUnit.CheckPrereq(self)
9342

    
9343

    
9344
class TLReplaceDisks(Tasklet):
9345
  """Replaces disks for an instance.
9346

9347
  Note: Locking is not within the scope of this class.
9348

9349
  """
9350
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9351
               disks, delay_iallocator, early_release):
9352
    """Initializes this class.
9353

9354
    """
9355
    Tasklet.__init__(self, lu)
9356

    
9357
    # Parameters
9358
    self.instance_name = instance_name
9359
    self.mode = mode
9360
    self.iallocator_name = iallocator_name
9361
    self.remote_node = remote_node
9362
    self.disks = disks
9363
    self.delay_iallocator = delay_iallocator
9364
    self.early_release = early_release
9365

    
9366
    # Runtime data
9367
    self.instance = None
9368
    self.new_node = None
9369
    self.target_node = None
9370
    self.other_node = None
9371
    self.remote_node_info = None
9372
    self.node_secondary_ip = None
9373

    
9374
  @staticmethod
9375
  def CheckArguments(mode, remote_node, iallocator):
9376
    """Helper function for users of this class.
9377

9378
    """
9379
    # check for valid parameter combination
9380
    if mode == constants.REPLACE_DISK_CHG:
9381
      if remote_node is None and iallocator is None:
9382
        raise errors.OpPrereqError("When changing the secondary either an"
9383
                                   " iallocator script must be used or the"
9384
                                   " new node given", errors.ECODE_INVAL)
9385

    
9386
      if remote_node is not None and iallocator is not None:
9387
        raise errors.OpPrereqError("Give either the iallocator or the new"
9388
                                   " secondary, not both", errors.ECODE_INVAL)
9389

    
9390
    elif remote_node is not None or iallocator is not None:
9391
      # Not replacing the secondary
9392
      raise errors.OpPrereqError("The iallocator and new node options can"
9393
                                 " only be used when changing the"
9394
                                 " secondary node", errors.ECODE_INVAL)
9395

    
9396
  @staticmethod
9397
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9398
    """Compute a new secondary node using an IAllocator.
9399

9400
    """
9401
    ial = IAllocator(lu.cfg, lu.rpc,
9402
                     mode=constants.IALLOCATOR_MODE_RELOC,
9403
                     name=instance_name,
9404
                     relocate_from=list(relocate_from))
9405

    
9406
    ial.Run(iallocator_name)
9407

    
9408
    if not ial.success:
9409
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9410
                                 " %s" % (iallocator_name, ial.info),
9411
                                 errors.ECODE_NORES)
9412

    
9413
    if len(ial.result) != ial.required_nodes:
9414
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9415
                                 " of nodes (%s), required %s" %
9416
                                 (iallocator_name,
9417
                                  len(ial.result), ial.required_nodes),
9418
                                 errors.ECODE_FAULT)
9419

    
9420
    remote_node_name = ial.result[0]
9421

    
9422
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9423
               instance_name, remote_node_name)
9424

    
9425
    return remote_node_name
9426

    
9427
  def _FindFaultyDisks(self, node_name):
9428
    """Wrapper for L{_FindFaultyInstanceDisks}.
9429

9430
    """
9431
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9432
                                    node_name, True)
9433

    
9434
  def _CheckDisksActivated(self, instance):
9435
    """Checks if the instance disks are activated.
9436

9437
    @param instance: The instance to check disks
9438
    @return: True if they are activated, False otherwise
9439

9440
    """
9441
    nodes = instance.all_nodes
9442

    
9443
    for idx, dev in enumerate(instance.disks):
9444
      for node in nodes:
9445
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9446
        self.cfg.SetDiskID(dev, node)
9447

    
9448
        result = self.rpc.call_blockdev_find(node, dev)
9449

    
9450
        if result.offline:
9451
          continue
9452
        elif result.fail_msg or not result.payload:
9453
          return False
9454

    
9455
    return True
9456

    
9457
  def CheckPrereq(self):
9458
    """Check prerequisites.
9459

9460
    This checks that the instance is in the cluster.
9461

9462
    """
9463
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9464
    assert instance is not None, \
9465
      "Cannot retrieve locked instance %s" % self.instance_name
9466

    
9467
    if instance.disk_template != constants.DT_DRBD8:
9468
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9469
                                 " instances", errors.ECODE_INVAL)
9470

    
9471
    if len(instance.secondary_nodes) != 1:
9472
      raise errors.OpPrereqError("The instance has a strange layout,"
9473
                                 " expected one secondary but found %d" %
9474
                                 len(instance.secondary_nodes),
9475
                                 errors.ECODE_FAULT)
9476

    
9477
    if not self.delay_iallocator:
9478
      self._CheckPrereq2()
9479

    
9480
  def _CheckPrereq2(self):
9481
    """Check prerequisites, second part.
9482

9483
    This function should always be part of CheckPrereq. It was separated and is
9484
    now called from Exec because during node evacuation iallocator was only
9485
    called with an unmodified cluster model, not taking planned changes into
9486
    account.
9487

9488
    """
9489
    instance = self.instance
9490
    secondary_node = instance.secondary_nodes[0]
9491

    
9492
    if self.iallocator_name is None:
9493
      remote_node = self.remote_node
9494
    else:
9495
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9496
                                       instance.name, instance.secondary_nodes)
9497

    
9498
    if remote_node is None:
9499
      self.remote_node_info = None
9500
    else:
9501
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9502
             "Remote node '%s' is not locked" % remote_node
9503

    
9504
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9505
      assert self.remote_node_info is not None, \
9506
        "Cannot retrieve locked node %s" % remote_node
9507

    
9508
    if remote_node == self.instance.primary_node:
9509
      raise errors.OpPrereqError("The specified node is the primary node of"
9510
                                 " the instance", errors.ECODE_INVAL)
9511

    
9512
    if remote_node == secondary_node:
9513
      raise errors.OpPrereqError("The specified node is already the"
9514
                                 " secondary node of the instance",
9515
                                 errors.ECODE_INVAL)
9516

    
9517
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9518
                                    constants.REPLACE_DISK_CHG):
9519
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9520
                                 errors.ECODE_INVAL)
9521

    
9522
    if self.mode == constants.REPLACE_DISK_AUTO:
9523
      if not self._CheckDisksActivated(instance):
9524
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9525
                                   " first" % self.instance_name,
9526
                                   errors.ECODE_STATE)
9527
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9528
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9529

    
9530
      if faulty_primary and faulty_secondary:
9531
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9532
                                   " one node and can not be repaired"
9533
                                   " automatically" % self.instance_name,
9534
                                   errors.ECODE_STATE)
9535

    
9536
      if faulty_primary:
9537
        self.disks = faulty_primary
9538
        self.target_node = instance.primary_node
9539
        self.other_node = secondary_node
9540
        check_nodes = [self.target_node, self.other_node]
9541
      elif faulty_secondary:
9542
        self.disks = faulty_secondary
9543
        self.target_node = secondary_node
9544
        self.other_node = instance.primary_node
9545
        check_nodes = [self.target_node, self.other_node]
9546
      else:
9547
        self.disks = []
9548
        check_nodes = []
9549

    
9550
    else:
9551
      # Non-automatic modes
9552
      if self.mode == constants.REPLACE_DISK_PRI:
9553
        self.target_node = instance.primary_node
9554
        self.other_node = secondary_node
9555
        check_nodes = [self.target_node, self.other_node]
9556

    
9557
      elif self.mode == constants.REPLACE_DISK_SEC:
9558
        self.target_node = secondary_node
9559
        self.other_node = instance.primary_node
9560
        check_nodes = [self.target_node, self.other_node]
9561

    
9562
      elif self.mode == constants.REPLACE_DISK_CHG:
9563
        self.new_node = remote_node
9564
        self.other_node = instance.primary_node
9565
        self.target_node = secondary_node
9566
        check_nodes = [self.new_node, self.other_node]
9567

    
9568
        _CheckNodeNotDrained(self.lu, remote_node)
9569
        _CheckNodeVmCapable(self.lu, remote_node)
9570

    
9571
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9572
        assert old_node_info is not None
9573
        if old_node_info.offline and not self.early_release:
9574
          # doesn't make sense to delay the release
9575
          self.early_release = True
9576
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9577
                          " early-release mode", secondary_node)
9578

    
9579
      else:
9580
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9581
                                     self.mode)
9582

    
9583
      # If not specified all disks should be replaced
9584
      if not self.disks:
9585
        self.disks = range(len(self.instance.disks))
9586

    
9587
    for node in check_nodes:
9588
      _CheckNodeOnline(self.lu, node)
9589

    
9590
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9591
                                                          self.other_node,
9592
                                                          self.target_node]
9593
                              if node_name is not None)
9594

    
9595
    # Release unneeded node locks
9596
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9597

    
9598
    # Release any owned node group
9599
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9600
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9601

    
9602
    # Check whether disks are valid
9603
    for disk_idx in self.disks:
9604
      instance.FindDisk(disk_idx)
9605

    
9606
    # Get secondary node IP addresses
9607
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9608
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9609

    
9610
  def Exec(self, feedback_fn):
9611
    """Execute disk replacement.
9612

9613
    This dispatches the disk replacement to the appropriate handler.
9614

9615
    """
9616
    if self.delay_iallocator:
9617
      self._CheckPrereq2()
9618

    
9619
    if __debug__:
9620
      # Verify owned locks before starting operation
9621
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9622
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9623
          ("Incorrect node locks, owning %s, expected %s" %
9624
           (owned_nodes, self.node_secondary_ip.keys()))
9625

    
9626
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9627
      assert list(owned_instances) == [self.instance_name], \
9628
          "Instance '%s' not locked" % self.instance_name
9629

    
9630
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9631
          "Should not own any node group lock at this point"
9632

    
9633
    if not self.disks:
9634
      feedback_fn("No disks need replacement")
9635
      return
9636

    
9637
    feedback_fn("Replacing disk(s) %s for %s" %
9638
                (utils.CommaJoin(self.disks), self.instance.name))
9639

    
9640
    activate_disks = (not self.instance.admin_up)
9641

    
9642
    # Activate the instance disks if we're replacing them on a down instance
9643
    if activate_disks:
9644
      _StartInstanceDisks(self.lu, self.instance, True)
9645

    
9646
    try:
9647
      # Should we replace the secondary node?
9648
      if self.new_node is not None:
9649
        fn = self._ExecDrbd8Secondary
9650
      else:
9651
        fn = self._ExecDrbd8DiskOnly
9652

    
9653
      result = fn(feedback_fn)
9654
    finally:
9655
      # Deactivate the instance disks if we're replacing them on a
9656
      # down instance
9657
      if activate_disks:
9658
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9659

    
9660
    if __debug__:
9661
      # Verify owned locks
9662
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9663
      nodes = frozenset(self.node_secondary_ip)
9664
      assert ((self.early_release and not owned_nodes) or
9665
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9666
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9667
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9668

    
9669
    return result
9670

    
9671
  def _CheckVolumeGroup(self, nodes):
9672
    self.lu.LogInfo("Checking volume groups")
9673

    
9674
    vgname = self.cfg.GetVGName()
9675

    
9676
    # Make sure volume group exists on all involved nodes
9677
    results = self.rpc.call_vg_list(nodes)
9678
    if not results:
9679
      raise errors.OpExecError("Can't list volume groups on the nodes")
9680

    
9681
    for node in nodes:
9682
      res = results[node]
9683
      res.Raise("Error checking node %s" % node)
9684
      if vgname not in res.payload:
9685
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9686
                                 (vgname, node))
9687

    
9688
  def _CheckDisksExistence(self, nodes):
9689
    # Check disk existence
9690
    for idx, dev in enumerate(self.instance.disks):
9691
      if idx not in self.disks:
9692
        continue
9693

    
9694
      for node in nodes:
9695
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9696
        self.cfg.SetDiskID(dev, node)
9697

    
9698
        result = self.rpc.call_blockdev_find(node, dev)
9699

    
9700
        msg = result.fail_msg
9701
        if msg or not result.payload:
9702
          if not msg:
9703
            msg = "disk not found"
9704
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9705
                                   (idx, node, msg))
9706

    
9707
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9708
    for idx, dev in enumerate(self.instance.disks):
9709
      if idx not in self.disks:
9710
        continue
9711

    
9712
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9713
                      (idx, node_name))
9714

    
9715
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9716
                                   ldisk=ldisk):
9717
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9718
                                 " replace disks for instance %s" %
9719
                                 (node_name, self.instance.name))
9720

    
9721
  def _CreateNewStorage(self, node_name):
9722
    """Create new storage on the primary or secondary node.
9723

9724
    This is only used for same-node replaces, not for changing the
9725
    secondary node, hence we don't want to modify the existing disk.
9726

9727
    """
9728
    iv_names = {}
9729

    
9730
    for idx, dev in enumerate(self.instance.disks):
9731
      if idx not in self.disks:
9732
        continue
9733

    
9734
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9735

    
9736
      self.cfg.SetDiskID(dev, node_name)
9737

    
9738
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9739
      names = _GenerateUniqueNames(self.lu, lv_names)
9740

    
9741
      vg_data = dev.children[0].logical_id[0]
9742
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9743
                             logical_id=(vg_data, names[0]))
9744
      vg_meta = dev.children[1].logical_id[0]
9745
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9746
                             logical_id=(vg_meta, names[1]))
9747

    
9748
      new_lvs = [lv_data, lv_meta]
9749
      old_lvs = [child.Copy() for child in dev.children]
9750
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9751

    
9752
      # we pass force_create=True to force the LVM creation
9753
      for new_lv in new_lvs:
9754
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9755
                        _GetInstanceInfoText(self.instance), False)
9756

    
9757
    return iv_names
9758

    
9759
  def _CheckDevices(self, node_name, iv_names):
9760
    for name, (dev, _, _) in iv_names.iteritems():
9761
      self.cfg.SetDiskID(dev, node_name)
9762

    
9763
      result = self.rpc.call_blockdev_find(node_name, dev)
9764

    
9765
      msg = result.fail_msg
9766
      if msg or not result.payload:
9767
        if not msg:
9768
          msg = "disk not found"
9769
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9770
                                 (name, msg))
9771

    
9772
      if result.payload.is_degraded:
9773
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9774

    
9775
  def _RemoveOldStorage(self, node_name, iv_names):
9776
    for name, (_, old_lvs, _) in iv_names.iteritems():
9777
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9778

    
9779
      for lv in old_lvs:
9780
        self.cfg.SetDiskID(lv, node_name)
9781

    
9782
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9783
        if msg:
9784
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9785
                             hint="remove unused LVs manually")
9786

    
9787
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9788
    """Replace a disk on the primary or secondary for DRBD 8.
9789

9790
    The algorithm for replace is quite complicated:
9791

9792
      1. for each disk to be replaced:
9793

9794
        1. create new LVs on the target node with unique names
9795
        1. detach old LVs from the drbd device
9796
        1. rename old LVs to name_replaced.<time_t>
9797
        1. rename new LVs to old LVs
9798
        1. attach the new LVs (with the old names now) to the drbd device
9799

9800
      1. wait for sync across all devices
9801

9802
      1. for each modified disk:
9803

9804
        1. remove old LVs (which have the name name_replaces.<time_t>)
9805

9806
    Failures are not very well handled.
9807

9808
    """
9809
    steps_total = 6
9810

    
9811
    # Step: check device activation
9812
    self.lu.LogStep(1, steps_total, "Check device existence")
9813
    self._CheckDisksExistence([self.other_node, self.target_node])
9814
    self._CheckVolumeGroup([self.target_node, self.other_node])
9815

    
9816
    # Step: check other node consistency
9817
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9818
    self._CheckDisksConsistency(self.other_node,
9819
                                self.other_node == self.instance.primary_node,
9820
                                False)
9821

    
9822
    # Step: create new storage
9823
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9824
    iv_names = self._CreateNewStorage(self.target_node)
9825

    
9826
    # Step: for each lv, detach+rename*2+attach
9827
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9828
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9829
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9830

    
9831
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9832
                                                     old_lvs)
9833
      result.Raise("Can't detach drbd from local storage on node"
9834
                   " %s for device %s" % (self.target_node, dev.iv_name))
9835
      #dev.children = []
9836
      #cfg.Update(instance)
9837

    
9838
      # ok, we created the new LVs, so now we know we have the needed
9839
      # storage; as such, we proceed on the target node to rename
9840
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9841
      # using the assumption that logical_id == physical_id (which in
9842
      # turn is the unique_id on that node)
9843

    
9844
      # FIXME(iustin): use a better name for the replaced LVs
9845
      temp_suffix = int(time.time())
9846
      ren_fn = lambda d, suff: (d.physical_id[0],
9847
                                d.physical_id[1] + "_replaced-%s" % suff)
9848

    
9849
      # Build the rename list based on what LVs exist on the node
9850
      rename_old_to_new = []
9851
      for to_ren in old_lvs:
9852
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9853
        if not result.fail_msg and result.payload:
9854
          # device exists
9855
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9856

    
9857
      self.lu.LogInfo("Renaming the old LVs on the target node")
9858
      result = self.rpc.call_blockdev_rename(self.target_node,
9859
                                             rename_old_to_new)
9860
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9861

    
9862
      # Now we rename the new LVs to the old LVs
9863
      self.lu.LogInfo("Renaming the new LVs on the target node")
9864
      rename_new_to_old = [(new, old.physical_id)
9865
                           for old, new in zip(old_lvs, new_lvs)]
9866
      result = self.rpc.call_blockdev_rename(self.target_node,
9867
                                             rename_new_to_old)
9868
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9869

    
9870
      # Intermediate steps of in memory modifications
9871
      for old, new in zip(old_lvs, new_lvs):
9872
        new.logical_id = old.logical_id
9873
        self.cfg.SetDiskID(new, self.target_node)
9874

    
9875
      # We need to modify old_lvs so that removal later removes the
9876
      # right LVs, not the newly added ones; note that old_lvs is a
9877
      # copy here
9878
      for disk in old_lvs:
9879
        disk.logical_id = ren_fn(disk, temp_suffix)
9880
        self.cfg.SetDiskID(disk, self.target_node)
9881

    
9882
      # Now that the new lvs have the old name, we can add them to the device
9883
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9884
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9885
                                                  new_lvs)
9886
      msg = result.fail_msg
9887
      if msg:
9888
        for new_lv in new_lvs:
9889
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9890
                                               new_lv).fail_msg
9891
          if msg2:
9892
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9893
                               hint=("cleanup manually the unused logical"
9894
                                     "volumes"))
9895
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9896

    
9897
    cstep = 5
9898
    if self.early_release:
9899
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9900
      cstep += 1
9901
      self._RemoveOldStorage(self.target_node, iv_names)
9902
      # WARNING: we release both node locks here, do not do other RPCs
9903
      # than WaitForSync to the primary node
9904
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9905
                    names=[self.target_node, self.other_node])
9906

    
9907
    # Wait for sync
9908
    # This can fail as the old devices are degraded and _WaitForSync
9909
    # does a combined result over all disks, so we don't check its return value
9910
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9911
    cstep += 1
9912
    _WaitForSync(self.lu, self.instance)
9913

    
9914
    # Check all devices manually
9915
    self._CheckDevices(self.instance.primary_node, iv_names)
9916

    
9917
    # Step: remove old storage
9918
    if not self.early_release:
9919
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9920
      cstep += 1
9921
      self._RemoveOldStorage(self.target_node, iv_names)
9922

    
9923
  def _ExecDrbd8Secondary(self, feedback_fn):
9924
    """Replace the secondary node for DRBD 8.
9925

9926
    The algorithm for replace is quite complicated:
9927
      - for all disks of the instance:
9928
        - create new LVs on the new node with same names
9929
        - shutdown the drbd device on the old secondary
9930
        - disconnect the drbd network on the primary
9931
        - create the drbd device on the new secondary
9932
        - network attach the drbd on the primary, using an artifice:
9933
          the drbd code for Attach() will connect to the network if it
9934
          finds a device which is connected to the good local disks but
9935
          not network enabled
9936
      - wait for sync across all devices
9937
      - remove all disks from the old secondary
9938

9939
    Failures are not very well handled.
9940

9941
    """
9942
    steps_total = 6
9943

    
9944
    pnode = self.instance.primary_node
9945

    
9946
    # Step: check device activation
9947
    self.lu.LogStep(1, steps_total, "Check device existence")
9948
    self._CheckDisksExistence([self.instance.primary_node])
9949
    self._CheckVolumeGroup([self.instance.primary_node])
9950

    
9951
    # Step: check other node consistency
9952
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9953
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9954

    
9955
    # Step: create new storage
9956
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9957
    for idx, dev in enumerate(self.instance.disks):
9958
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9959
                      (self.new_node, idx))
9960
      # we pass force_create=True to force LVM creation
9961
      for new_lv in dev.children:
9962
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9963
                        _GetInstanceInfoText(self.instance), False)
9964

    
9965
    # Step 4: dbrd minors and drbd setups changes
9966
    # after this, we must manually remove the drbd minors on both the
9967
    # error and the success paths
9968
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9969
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9970
                                         for dev in self.instance.disks],
9971
                                        self.instance.name)
9972
    logging.debug("Allocated minors %r", minors)
9973

    
9974
    iv_names = {}
9975
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9976
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9977
                      (self.new_node, idx))
9978
      # create new devices on new_node; note that we create two IDs:
9979
      # one without port, so the drbd will be activated without
9980
      # networking information on the new node at this stage, and one
9981
      # with network, for the latter activation in step 4
9982
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9983
      if self.instance.primary_node == o_node1:
9984
        p_minor = o_minor1
9985
      else:
9986
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9987
        p_minor = o_minor2
9988

    
9989
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9990
                      p_minor, new_minor, o_secret)
9991
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9992
                    p_minor, new_minor, o_secret)
9993

    
9994
      iv_names[idx] = (dev, dev.children, new_net_id)
9995
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9996
                    new_net_id)
9997
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9998
                              logical_id=new_alone_id,
9999
                              children=dev.children,
10000
                              size=dev.size)
10001
      try:
10002
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10003
                              _GetInstanceInfoText(self.instance), False)
10004
      except errors.GenericError:
10005
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10006
        raise
10007

    
10008
    # We have new devices, shutdown the drbd on the old secondary
10009
    for idx, dev in enumerate(self.instance.disks):
10010
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10011
      self.cfg.SetDiskID(dev, self.target_node)
10012
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10013
      if msg:
10014
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10015
                           "node: %s" % (idx, msg),
10016
                           hint=("Please cleanup this device manually as"
10017
                                 " soon as possible"))
10018

    
10019
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10020
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10021
                                               self.instance.disks)[pnode]
10022

    
10023
    msg = result.fail_msg
10024
    if msg:
10025
      # detaches didn't succeed (unlikely)
10026
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10027
      raise errors.OpExecError("Can't detach the disks from the network on"
10028
                               " old node: %s" % (msg,))
10029

    
10030
    # if we managed to detach at least one, we update all the disks of
10031
    # the instance to point to the new secondary
10032
    self.lu.LogInfo("Updating instance configuration")
10033
    for dev, _, new_logical_id in iv_names.itervalues():
10034
      dev.logical_id = new_logical_id
10035
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10036

    
10037
    self.cfg.Update(self.instance, feedback_fn)
10038

    
10039
    # and now perform the drbd attach
10040
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10041
                    " (standalone => connected)")
10042
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10043
                                            self.new_node],
10044
                                           self.node_secondary_ip,
10045
                                           self.instance.disks,
10046
                                           self.instance.name,
10047
                                           False)
10048
    for to_node, to_result in result.items():
10049
      msg = to_result.fail_msg
10050
      if msg:
10051
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10052
                           to_node, msg,
10053
                           hint=("please do a gnt-instance info to see the"
10054
                                 " status of disks"))
10055
    cstep = 5
10056
    if self.early_release:
10057
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10058
      cstep += 1
10059
      self._RemoveOldStorage(self.target_node, iv_names)
10060
      # WARNING: we release all node locks here, do not do other RPCs
10061
      # than WaitForSync to the primary node
10062
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10063
                    names=[self.instance.primary_node,
10064
                           self.target_node,
10065
                           self.new_node])
10066

    
10067
    # Wait for sync
10068
    # This can fail as the old devices are degraded and _WaitForSync
10069
    # does a combined result over all disks, so we don't check its return value
10070
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10071
    cstep += 1
10072
    _WaitForSync(self.lu, self.instance)
10073

    
10074
    # Check all devices manually
10075
    self._CheckDevices(self.instance.primary_node, iv_names)
10076

    
10077
    # Step: remove old storage
10078
    if not self.early_release:
10079
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10080
      self._RemoveOldStorage(self.target_node, iv_names)
10081

    
10082

    
10083
class LURepairNodeStorage(NoHooksLU):
10084
  """Repairs the volume group on a node.
10085

10086
  """
10087
  REQ_BGL = False
10088

    
10089
  def CheckArguments(self):
10090
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10091

    
10092
    storage_type = self.op.storage_type
10093

    
10094
    if (constants.SO_FIX_CONSISTENCY not in
10095
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10096
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10097
                                 " repaired" % storage_type,
10098
                                 errors.ECODE_INVAL)
10099

    
10100
  def ExpandNames(self):
10101
    self.needed_locks = {
10102
      locking.LEVEL_NODE: [self.op.node_name],
10103
      }
10104

    
10105
  def _CheckFaultyDisks(self, instance, node_name):
10106
    """Ensure faulty disks abort the opcode or at least warn."""
10107
    try:
10108
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10109
                                  node_name, True):
10110
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10111
                                   " node '%s'" % (instance.name, node_name),
10112
                                   errors.ECODE_STATE)
10113
    except errors.OpPrereqError, err:
10114
      if self.op.ignore_consistency:
10115
        self.proc.LogWarning(str(err.args[0]))
10116
      else:
10117
        raise
10118

    
10119
  def CheckPrereq(self):
10120
    """Check prerequisites.
10121

10122
    """
10123
    # Check whether any instance on this node has faulty disks
10124
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10125
      if not inst.admin_up:
10126
        continue
10127
      check_nodes = set(inst.all_nodes)
10128
      check_nodes.discard(self.op.node_name)
10129
      for inst_node_name in check_nodes:
10130
        self._CheckFaultyDisks(inst, inst_node_name)
10131

    
10132
  def Exec(self, feedback_fn):
10133
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10134
                (self.op.name, self.op.node_name))
10135

    
10136
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10137
    result = self.rpc.call_storage_execute(self.op.node_name,
10138
                                           self.op.storage_type, st_args,
10139
                                           self.op.name,
10140
                                           constants.SO_FIX_CONSISTENCY)
10141
    result.Raise("Failed to repair storage unit '%s' on %s" %
10142
                 (self.op.name, self.op.node_name))
10143

    
10144

    
10145
class LUNodeEvacuate(NoHooksLU):
10146
  """Evacuates instances off a list of nodes.
10147

10148
  """
10149
  REQ_BGL = False
10150

    
10151
  def CheckArguments(self):
10152
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10153

    
10154
  def ExpandNames(self):
10155
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10156

    
10157
    if self.op.remote_node is not None:
10158
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10159
      assert self.op.remote_node
10160

    
10161
      if self.op.remote_node == self.op.node_name:
10162
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10163
                                   " secondary node", errors.ECODE_INVAL)
10164

    
10165
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10166
        raise errors.OpPrereqError("Without the use of an iallocator only"
10167
                                   " secondary instances can be evacuated",
10168
                                   errors.ECODE_INVAL)
10169

    
10170
    # Declare locks
10171
    self.share_locks = _ShareAll()
10172
    self.needed_locks = {
10173
      locking.LEVEL_INSTANCE: [],
10174
      locking.LEVEL_NODEGROUP: [],
10175
      locking.LEVEL_NODE: [],
10176
      }
10177

    
10178
    if self.op.remote_node is None:
10179
      # Iallocator will choose any node(s) in the same group
10180
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10181
    else:
10182
      group_nodes = frozenset([self.op.remote_node])
10183

    
10184
    # Determine nodes to be locked
10185
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10186

    
10187
  def _DetermineInstances(self):
10188
    """Builds list of instances to operate on.
10189

10190
    """
10191
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10192

    
10193
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10194
      # Primary instances only
10195
      inst_fn = _GetNodePrimaryInstances
10196
      assert self.op.remote_node is None, \
10197
        "Evacuating primary instances requires iallocator"
10198
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10199
      # Secondary instances only
10200
      inst_fn = _GetNodeSecondaryInstances
10201
    else:
10202
      # All instances
10203
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10204
      inst_fn = _GetNodeInstances
10205

    
10206
    return inst_fn(self.cfg, self.op.node_name)
10207

    
10208
  def DeclareLocks(self, level):
10209
    if level == locking.LEVEL_INSTANCE:
10210
      # Lock instances optimistically, needs verification once node and group
10211
      # locks have been acquired
10212
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10213
        set(i.name for i in self._DetermineInstances())
10214

    
10215
    elif level == locking.LEVEL_NODEGROUP:
10216
      # Lock node groups optimistically, needs verification once nodes have
10217
      # been acquired
10218
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10219
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10220

    
10221
    elif level == locking.LEVEL_NODE:
10222
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10223

    
10224
  def CheckPrereq(self):
10225
    # Verify locks
10226
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10227
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10228
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10229

    
10230
    assert owned_nodes == self.lock_nodes
10231

    
10232
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10233
    if owned_groups != wanted_groups:
10234
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10235
                               " current groups are '%s', used to be '%s'" %
10236
                               (utils.CommaJoin(wanted_groups),
10237
                                utils.CommaJoin(owned_groups)))
10238

    
10239
    # Determine affected instances
10240
    self.instances = self._DetermineInstances()
10241
    self.instance_names = [i.name for i in self.instances]
10242

    
10243
    if set(self.instance_names) != owned_instances:
10244
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10245
                               " were acquired, current instances are '%s',"
10246
                               " used to be '%s'" %
10247
                               (self.op.node_name,
10248
                                utils.CommaJoin(self.instance_names),
10249
                                utils.CommaJoin(owned_instances)))
10250

    
10251
    if self.instance_names:
10252
      self.LogInfo("Evacuating instances from node '%s': %s",
10253
                   self.op.node_name,
10254
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10255
    else:
10256
      self.LogInfo("No instances to evacuate from node '%s'",
10257
                   self.op.node_name)
10258

    
10259
    if self.op.remote_node is not None:
10260
      for i in self.instances:
10261
        if i.primary_node == self.op.remote_node:
10262
          raise errors.OpPrereqError("Node %s is the primary node of"
10263
                                     " instance %s, cannot use it as"
10264
                                     " secondary" %
10265
                                     (self.op.remote_node, i.name),
10266
                                     errors.ECODE_INVAL)
10267

    
10268
  def Exec(self, feedback_fn):
10269
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10270

    
10271
    if not self.instance_names:
10272
      # No instances to evacuate
10273
      jobs = []
10274

    
10275
    elif self.op.iallocator is not None:
10276
      # TODO: Implement relocation to other group
10277
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10278
                       evac_mode=self.op.mode,
10279
                       instances=list(self.instance_names))
10280

    
10281
      ial.Run(self.op.iallocator)
10282

    
10283
      if not ial.success:
10284
        raise errors.OpPrereqError("Can't compute node evacuation using"
10285
                                   " iallocator '%s': %s" %
10286
                                   (self.op.iallocator, ial.info),
10287
                                   errors.ECODE_NORES)
10288

    
10289
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10290

    
10291
    elif self.op.remote_node is not None:
10292
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10293
      jobs = [
10294
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10295
                                        remote_node=self.op.remote_node,
10296
                                        disks=[],
10297
                                        mode=constants.REPLACE_DISK_CHG,
10298
                                        early_release=self.op.early_release)]
10299
        for instance_name in self.instance_names
10300
        ]
10301

    
10302
    else:
10303
      raise errors.ProgrammerError("No iallocator or remote node")
10304

    
10305
    return ResultWithJobs(jobs)
10306

    
10307

    
10308
def _SetOpEarlyRelease(early_release, op):
10309
  """Sets C{early_release} flag on opcodes if available.
10310

10311
  """
10312
  try:
10313
    op.early_release = early_release
10314
  except AttributeError:
10315
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10316

    
10317
  return op
10318

    
10319

    
10320
def _NodeEvacDest(use_nodes, group, nodes):
10321
  """Returns group or nodes depending on caller's choice.
10322

10323
  """
10324
  if use_nodes:
10325
    return utils.CommaJoin(nodes)
10326
  else:
10327
    return group
10328

    
10329

    
10330
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10331
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10332

10333
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10334
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10335

10336
  @type lu: L{LogicalUnit}
10337
  @param lu: Logical unit instance
10338
  @type alloc_result: tuple/list
10339
  @param alloc_result: Result from iallocator
10340
  @type early_release: bool
10341
  @param early_release: Whether to release locks early if possible
10342
  @type use_nodes: bool
10343
  @param use_nodes: Whether to display node names instead of groups
10344

10345
  """
10346
  (moved, failed, jobs) = alloc_result
10347

    
10348
  if failed:
10349
    lu.LogWarning("Unable to evacuate instances %s",
10350
                  utils.CommaJoin("%s (%s)" % (name, reason)
10351
                                  for (name, reason) in failed))
10352

    
10353
  if moved:
10354
    lu.LogInfo("Instances to be moved: %s",
10355
               utils.CommaJoin("%s (to %s)" %
10356
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10357
                               for (name, group, nodes) in moved))
10358

    
10359
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10360
              map(opcodes.OpCode.LoadOpCode, ops))
10361
          for ops in jobs]
10362

    
10363

    
10364
class LUInstanceGrowDisk(LogicalUnit):
10365
  """Grow a disk of an instance.
10366

10367
  """
10368
  HPATH = "disk-grow"
10369
  HTYPE = constants.HTYPE_INSTANCE
10370
  REQ_BGL = False
10371

    
10372
  def ExpandNames(self):
10373
    self._ExpandAndLockInstance()
10374
    self.needed_locks[locking.LEVEL_NODE] = []
10375
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10376

    
10377
  def DeclareLocks(self, level):
10378
    if level == locking.LEVEL_NODE:
10379
      self._LockInstancesNodes()
10380

    
10381
  def BuildHooksEnv(self):
10382
    """Build hooks env.
10383

10384
    This runs on the master, the primary and all the secondaries.
10385

10386
    """
10387
    env = {
10388
      "DISK": self.op.disk,
10389
      "AMOUNT": self.op.amount,
10390
      }
10391
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10392
    return env
10393

    
10394
  def BuildHooksNodes(self):
10395
    """Build hooks nodes.
10396

10397
    """
10398
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10399
    return (nl, nl)
10400

    
10401
  def CheckPrereq(self):
10402
    """Check prerequisites.
10403

10404
    This checks that the instance is in the cluster.
10405

10406
    """
10407
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10408
    assert instance is not None, \
10409
      "Cannot retrieve locked instance %s" % self.op.instance_name
10410
    nodenames = list(instance.all_nodes)
10411
    for node in nodenames:
10412
      _CheckNodeOnline(self, node)
10413

    
10414
    self.instance = instance
10415

    
10416
    if instance.disk_template not in constants.DTS_GROWABLE:
10417
      raise errors.OpPrereqError("Instance's disk layout does not support"
10418
                                 " growing", errors.ECODE_INVAL)
10419

    
10420
    self.disk = instance.FindDisk(self.op.disk)
10421

    
10422
    if instance.disk_template not in (constants.DT_FILE,
10423
                                      constants.DT_SHARED_FILE):
10424
      # TODO: check the free disk space for file, when that feature will be
10425
      # supported
10426
      _CheckNodesFreeDiskPerVG(self, nodenames,
10427
                               self.disk.ComputeGrowth(self.op.amount))
10428

    
10429
  def Exec(self, feedback_fn):
10430
    """Execute disk grow.
10431

10432
    """
10433
    instance = self.instance
10434
    disk = self.disk
10435

    
10436
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10437
    if not disks_ok:
10438
      raise errors.OpExecError("Cannot activate block device to grow")
10439

    
10440
    # First run all grow ops in dry-run mode
10441
    for node in instance.all_nodes:
10442
      self.cfg.SetDiskID(disk, node)
10443
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10444
      result.Raise("Grow request failed to node %s" % node)
10445

    
10446
    # We know that (as far as we can test) operations across different
10447
    # nodes will succeed, time to run it for real
10448
    for node in instance.all_nodes:
10449
      self.cfg.SetDiskID(disk, node)
10450
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10451
      result.Raise("Grow request failed to node %s" % node)
10452

    
10453
      # TODO: Rewrite code to work properly
10454
      # DRBD goes into sync mode for a short amount of time after executing the
10455
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10456
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10457
      # time is a work-around.
10458
      time.sleep(5)
10459

    
10460
    disk.RecordGrow(self.op.amount)
10461
    self.cfg.Update(instance, feedback_fn)
10462
    if self.op.wait_for_sync:
10463
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10464
      if disk_abort:
10465
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10466
                             " status; please check the instance")
10467
      if not instance.admin_up:
10468
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10469
    elif not instance.admin_up:
10470
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10471
                           " not supposed to be running because no wait for"
10472
                           " sync mode was requested")
10473

    
10474

    
10475
class LUInstanceQueryData(NoHooksLU):
10476
  """Query runtime instance data.
10477

10478
  """
10479
  REQ_BGL = False
10480

    
10481
  def ExpandNames(self):
10482
    self.needed_locks = {}
10483

    
10484
    # Use locking if requested or when non-static information is wanted
10485
    if not (self.op.static or self.op.use_locking):
10486
      self.LogWarning("Non-static data requested, locks need to be acquired")
10487
      self.op.use_locking = True
10488

    
10489
    if self.op.instances or not self.op.use_locking:
10490
      # Expand instance names right here
10491
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10492
    else:
10493
      # Will use acquired locks
10494
      self.wanted_names = None
10495

    
10496
    if self.op.use_locking:
10497
      self.share_locks = _ShareAll()
10498

    
10499
      if self.wanted_names is None:
10500
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10501
      else:
10502
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10503

    
10504
      self.needed_locks[locking.LEVEL_NODE] = []
10505
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10506

    
10507
  def DeclareLocks(self, level):
10508
    if self.op.use_locking and level == locking.LEVEL_NODE:
10509
      self._LockInstancesNodes()
10510

    
10511
  def CheckPrereq(self):
10512
    """Check prerequisites.
10513

10514
    This only checks the optional instance list against the existing names.
10515

10516
    """
10517
    if self.wanted_names is None:
10518
      assert self.op.use_locking, "Locking was not used"
10519
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10520

    
10521
    self.wanted_instances = \
10522
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10523

    
10524
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10525
    """Returns the status of a block device
10526

10527
    """
10528
    if self.op.static or not node:
10529
      return None
10530

    
10531
    self.cfg.SetDiskID(dev, node)
10532

    
10533
    result = self.rpc.call_blockdev_find(node, dev)
10534
    if result.offline:
10535
      return None
10536

    
10537
    result.Raise("Can't compute disk status for %s" % instance_name)
10538

    
10539
    status = result.payload
10540
    if status is None:
10541
      return None
10542

    
10543
    return (status.dev_path, status.major, status.minor,
10544
            status.sync_percent, status.estimated_time,
10545
            status.is_degraded, status.ldisk_status)
10546

    
10547
  def _ComputeDiskStatus(self, instance, snode, dev):
10548
    """Compute block device status.
10549

10550
    """
10551
    if dev.dev_type in constants.LDS_DRBD:
10552
      # we change the snode then (otherwise we use the one passed in)
10553
      if dev.logical_id[0] == instance.primary_node:
10554
        snode = dev.logical_id[1]
10555
      else:
10556
        snode = dev.logical_id[0]
10557

    
10558
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10559
                                              instance.name, dev)
10560
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10561

    
10562
    if dev.children:
10563
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10564
                                        instance, snode),
10565
                         dev.children)
10566
    else:
10567
      dev_children = []
10568

    
10569
    return {
10570
      "iv_name": dev.iv_name,
10571
      "dev_type": dev.dev_type,
10572
      "logical_id": dev.logical_id,
10573
      "physical_id": dev.physical_id,
10574
      "pstatus": dev_pstatus,
10575
      "sstatus": dev_sstatus,
10576
      "children": dev_children,
10577
      "mode": dev.mode,
10578
      "size": dev.size,
10579
      }
10580

    
10581
  def Exec(self, feedback_fn):
10582
    """Gather and return data"""
10583
    result = {}
10584

    
10585
    cluster = self.cfg.GetClusterInfo()
10586

    
10587
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10588
                                          for i in self.wanted_instances)
10589
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10590
      if self.op.static or pnode.offline:
10591
        remote_state = None
10592
        if pnode.offline:
10593
          self.LogWarning("Primary node %s is marked offline, returning static"
10594
                          " information only for instance %s" %
10595
                          (pnode.name, instance.name))
10596
      else:
10597
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10598
                                                  instance.name,
10599
                                                  instance.hypervisor)
10600
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10601
        remote_info = remote_info.payload
10602
        if remote_info and "state" in remote_info:
10603
          remote_state = "up"
10604
        else:
10605
          remote_state = "down"
10606

    
10607
      if instance.admin_up:
10608
        config_state = "up"
10609
      else:
10610
        config_state = "down"
10611

    
10612
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10613
                  instance.disks)
10614

    
10615
      result[instance.name] = {
10616
        "name": instance.name,
10617
        "config_state": config_state,
10618
        "run_state": remote_state,
10619
        "pnode": instance.primary_node,
10620
        "snodes": instance.secondary_nodes,
10621
        "os": instance.os,
10622
        # this happens to be the same format used for hooks
10623
        "nics": _NICListToTuple(self, instance.nics),
10624
        "disk_template": instance.disk_template,
10625
        "disks": disks,
10626
        "hypervisor": instance.hypervisor,
10627
        "network_port": instance.network_port,
10628
        "hv_instance": instance.hvparams,
10629
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10630
        "be_instance": instance.beparams,
10631
        "be_actual": cluster.FillBE(instance),
10632
        "os_instance": instance.osparams,
10633
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10634
        "serial_no": instance.serial_no,
10635
        "mtime": instance.mtime,
10636
        "ctime": instance.ctime,
10637
        "uuid": instance.uuid,
10638
        }
10639

    
10640
    return result
10641

    
10642

    
10643
class LUInstanceSetParams(LogicalUnit):
10644
  """Modifies an instances's parameters.
10645

10646
  """
10647
  HPATH = "instance-modify"
10648
  HTYPE = constants.HTYPE_INSTANCE
10649
  REQ_BGL = False
10650

    
10651
  def CheckArguments(self):
10652
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10653
            self.op.hvparams or self.op.beparams or self.op.os_name):
10654
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10655

    
10656
    if self.op.hvparams:
10657
      _CheckGlobalHvParams(self.op.hvparams)
10658

    
10659
    # Disk validation
10660
    disk_addremove = 0
10661
    for disk_op, disk_dict in self.op.disks:
10662
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10663
      if disk_op == constants.DDM_REMOVE:
10664
        disk_addremove += 1
10665
        continue
10666
      elif disk_op == constants.DDM_ADD:
10667
        disk_addremove += 1
10668
      else:
10669
        if not isinstance(disk_op, int):
10670
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10671
        if not isinstance(disk_dict, dict):
10672
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10673
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10674

    
10675
      if disk_op == constants.DDM_ADD:
10676
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10677
        if mode not in constants.DISK_ACCESS_SET:
10678
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10679
                                     errors.ECODE_INVAL)
10680
        size = disk_dict.get(constants.IDISK_SIZE, None)
10681
        if size is None:
10682
          raise errors.OpPrereqError("Required disk parameter size missing",
10683
                                     errors.ECODE_INVAL)
10684
        try:
10685
          size = int(size)
10686
        except (TypeError, ValueError), err:
10687
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10688
                                     str(err), errors.ECODE_INVAL)
10689
        disk_dict[constants.IDISK_SIZE] = size
10690
      else:
10691
        # modification of disk
10692
        if constants.IDISK_SIZE in disk_dict:
10693
          raise errors.OpPrereqError("Disk size change not possible, use"
10694
                                     " grow-disk", errors.ECODE_INVAL)
10695

    
10696
    if disk_addremove > 1:
10697
      raise errors.OpPrereqError("Only one disk add or remove operation"
10698
                                 " supported at a time", errors.ECODE_INVAL)
10699

    
10700
    if self.op.disks and self.op.disk_template is not None:
10701
      raise errors.OpPrereqError("Disk template conversion and other disk"
10702
                                 " changes not supported at the same time",
10703
                                 errors.ECODE_INVAL)
10704

    
10705
    if (self.op.disk_template and
10706
        self.op.disk_template in constants.DTS_INT_MIRROR and
10707
        self.op.remote_node is None):
10708
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10709
                                 " one requires specifying a secondary node",
10710
                                 errors.ECODE_INVAL)
10711

    
10712
    # NIC validation
10713
    nic_addremove = 0
10714
    for nic_op, nic_dict in self.op.nics:
10715
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10716
      if nic_op == constants.DDM_REMOVE:
10717
        nic_addremove += 1
10718
        continue
10719
      elif nic_op == constants.DDM_ADD:
10720
        nic_addremove += 1
10721
      else:
10722
        if not isinstance(nic_op, int):
10723
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10724
        if not isinstance(nic_dict, dict):
10725
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10726
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10727

    
10728
      # nic_dict should be a dict
10729
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10730
      if nic_ip is not None:
10731
        if nic_ip.lower() == constants.VALUE_NONE:
10732
          nic_dict[constants.INIC_IP] = None
10733
        else:
10734
          if not netutils.IPAddress.IsValid(nic_ip):
10735
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10736
                                       errors.ECODE_INVAL)
10737

    
10738
      nic_bridge = nic_dict.get("bridge", None)
10739
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10740
      if nic_bridge and nic_link:
10741
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10742
                                   " at the same time", errors.ECODE_INVAL)
10743
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10744
        nic_dict["bridge"] = None
10745
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10746
        nic_dict[constants.INIC_LINK] = None
10747

    
10748
      if nic_op == constants.DDM_ADD:
10749
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10750
        if nic_mac is None:
10751
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10752

    
10753
      if constants.INIC_MAC in nic_dict:
10754
        nic_mac = nic_dict[constants.INIC_MAC]
10755
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10756
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10757

    
10758
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10759
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10760
                                     " modifying an existing nic",
10761
                                     errors.ECODE_INVAL)
10762

    
10763
    if nic_addremove > 1:
10764
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10765
                                 " supported at a time", errors.ECODE_INVAL)
10766

    
10767
  def ExpandNames(self):
10768
    self._ExpandAndLockInstance()
10769
    self.needed_locks[locking.LEVEL_NODE] = []
10770
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10771

    
10772
  def DeclareLocks(self, level):
10773
    if level == locking.LEVEL_NODE:
10774
      self._LockInstancesNodes()
10775
      if self.op.disk_template and self.op.remote_node:
10776
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10777
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10778

    
10779
  def BuildHooksEnv(self):
10780
    """Build hooks env.
10781

10782
    This runs on the master, primary and secondaries.
10783

10784
    """
10785
    args = dict()
10786
    if constants.BE_MEMORY in self.be_new:
10787
      args["memory"] = self.be_new[constants.BE_MEMORY]
10788
    if constants.BE_VCPUS in self.be_new:
10789
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10790
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10791
    # information at all.
10792
    if self.op.nics:
10793
      args["nics"] = []
10794
      nic_override = dict(self.op.nics)
10795
      for idx, nic in enumerate(self.instance.nics):
10796
        if idx in nic_override:
10797
          this_nic_override = nic_override[idx]
10798
        else:
10799
          this_nic_override = {}
10800
        if constants.INIC_IP in this_nic_override:
10801
          ip = this_nic_override[constants.INIC_IP]
10802
        else:
10803
          ip = nic.ip
10804
        if constants.INIC_MAC in this_nic_override:
10805
          mac = this_nic_override[constants.INIC_MAC]
10806
        else:
10807
          mac = nic.mac
10808
        if idx in self.nic_pnew:
10809
          nicparams = self.nic_pnew[idx]
10810
        else:
10811
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10812
        mode = nicparams[constants.NIC_MODE]
10813
        link = nicparams[constants.NIC_LINK]
10814
        args["nics"].append((ip, mac, mode, link))
10815
      if constants.DDM_ADD in nic_override:
10816
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10817
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10818
        nicparams = self.nic_pnew[constants.DDM_ADD]
10819
        mode = nicparams[constants.NIC_MODE]
10820
        link = nicparams[constants.NIC_LINK]
10821
        args["nics"].append((ip, mac, mode, link))
10822
      elif constants.DDM_REMOVE in nic_override:
10823
        del args["nics"][-1]
10824

    
10825
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10826
    if self.op.disk_template:
10827
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10828

    
10829
    return env
10830

    
10831
  def BuildHooksNodes(self):
10832
    """Build hooks nodes.
10833

10834
    """
10835
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10836
    return (nl, nl)
10837

    
10838
  def CheckPrereq(self):
10839
    """Check prerequisites.
10840

10841
    This only checks the instance list against the existing names.
10842

10843
    """
10844
    # checking the new params on the primary/secondary nodes
10845

    
10846
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10847
    cluster = self.cluster = self.cfg.GetClusterInfo()
10848
    assert self.instance is not None, \
10849
      "Cannot retrieve locked instance %s" % self.op.instance_name
10850
    pnode = instance.primary_node
10851
    nodelist = list(instance.all_nodes)
10852

    
10853
    # OS change
10854
    if self.op.os_name and not self.op.force:
10855
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10856
                      self.op.force_variant)
10857
      instance_os = self.op.os_name
10858
    else:
10859
      instance_os = instance.os
10860

    
10861
    if self.op.disk_template:
10862
      if instance.disk_template == self.op.disk_template:
10863
        raise errors.OpPrereqError("Instance already has disk template %s" %
10864
                                   instance.disk_template, errors.ECODE_INVAL)
10865

    
10866
      if (instance.disk_template,
10867
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10868
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10869
                                   " %s to %s" % (instance.disk_template,
10870
                                                  self.op.disk_template),
10871
                                   errors.ECODE_INVAL)
10872
      _CheckInstanceDown(self, instance, "cannot change disk template")
10873
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10874
        if self.op.remote_node == pnode:
10875
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10876
                                     " as the primary node of the instance" %
10877
                                     self.op.remote_node, errors.ECODE_STATE)
10878
        _CheckNodeOnline(self, self.op.remote_node)
10879
        _CheckNodeNotDrained(self, self.op.remote_node)
10880
        # FIXME: here we assume that the old instance type is DT_PLAIN
10881
        assert instance.disk_template == constants.DT_PLAIN
10882
        disks = [{constants.IDISK_SIZE: d.size,
10883
                  constants.IDISK_VG: d.logical_id[0]}
10884
                 for d in instance.disks]
10885
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10886
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10887

    
10888
    # hvparams processing
10889
    if self.op.hvparams:
10890
      hv_type = instance.hypervisor
10891
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10892
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10893
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10894

    
10895
      # local check
10896
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10897
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10898
      self.hv_proposed = self.hv_new = hv_new # the new actual values
10899
      self.hv_inst = i_hvdict # the new dict (without defaults)
10900
    else:
10901
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
10902
                                              instance.hvparams)
10903
      self.hv_new = self.hv_inst = {}
10904

    
10905
    # beparams processing
10906
    if self.op.beparams:
10907
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10908
                                   use_none=True)
10909
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10910
      be_new = cluster.SimpleFillBE(i_bedict)
10911
      self.be_proposed = self.be_new = be_new # the new actual values
10912
      self.be_inst = i_bedict # the new dict (without defaults)
10913
    else:
10914
      self.be_new = self.be_inst = {}
10915
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
10916
    be_old = cluster.FillBE(instance)
10917

    
10918
    # CPU param validation -- checking every time a paramtere is
10919
    # changed to cover all cases where either CPU mask or vcpus have
10920
    # changed
10921
    if (constants.BE_VCPUS in self.be_proposed and
10922
        constants.HV_CPU_MASK in self.hv_proposed):
10923
      cpu_list = \
10924
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
10925
      # Verify mask is consistent with number of vCPUs. Can skip this
10926
      # test if only 1 entry in the CPU mask, which means same mask
10927
      # is applied to all vCPUs.
10928
      if (len(cpu_list) > 1 and
10929
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
10930
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
10931
                                   " CPU mask [%s]" %
10932
                                   (self.be_proposed[constants.BE_VCPUS],
10933
                                    self.hv_proposed[constants.HV_CPU_MASK]),
10934
                                   errors.ECODE_INVAL)
10935

    
10936
      # Only perform this test if a new CPU mask is given
10937
      if constants.HV_CPU_MASK in self.hv_new:
10938
        # Calculate the largest CPU number requested
10939
        max_requested_cpu = max(map(max, cpu_list))
10940
        # Check that all of the instance's nodes have enough physical CPUs to
10941
        # satisfy the requested CPU mask
10942
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
10943
                                max_requested_cpu + 1, instance.hypervisor)
10944

    
10945
    # osparams processing
10946
    if self.op.osparams:
10947
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10948
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10949
      self.os_inst = i_osdict # the new dict (without defaults)
10950
    else:
10951
      self.os_inst = {}
10952

    
10953
    self.warn = []
10954

    
10955
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10956
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10957
      mem_check_list = [pnode]
10958
      if be_new[constants.BE_AUTO_BALANCE]:
10959
        # either we changed auto_balance to yes or it was from before
10960
        mem_check_list.extend(instance.secondary_nodes)
10961
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10962
                                                  instance.hypervisor)
10963
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10964
                                         instance.hypervisor)
10965
      pninfo = nodeinfo[pnode]
10966
      msg = pninfo.fail_msg
10967
      if msg:
10968
        # Assume the primary node is unreachable and go ahead
10969
        self.warn.append("Can't get info from primary node %s: %s" %
10970
                         (pnode, msg))
10971
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
10972
        self.warn.append("Node data from primary node %s doesn't contain"
10973
                         " free memory information" % pnode)
10974
      elif instance_info.fail_msg:
10975
        self.warn.append("Can't get instance runtime information: %s" %
10976
                        instance_info.fail_msg)
10977
      else:
10978
        if instance_info.payload:
10979
          current_mem = int(instance_info.payload["memory"])
10980
        else:
10981
          # Assume instance not running
10982
          # (there is a slight race condition here, but it's not very probable,
10983
          # and we have no other way to check)
10984
          current_mem = 0
10985
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10986
                    pninfo.payload["memory_free"])
10987
        if miss_mem > 0:
10988
          raise errors.OpPrereqError("This change will prevent the instance"
10989
                                     " from starting, due to %d MB of memory"
10990
                                     " missing on its primary node" % miss_mem,
10991
                                     errors.ECODE_NORES)
10992

    
10993
      if be_new[constants.BE_AUTO_BALANCE]:
10994
        for node, nres in nodeinfo.items():
10995
          if node not in instance.secondary_nodes:
10996
            continue
10997
          nres.Raise("Can't get info from secondary node %s" % node,
10998
                     prereq=True, ecode=errors.ECODE_STATE)
10999
          if not isinstance(nres.payload.get("memory_free", None), int):
11000
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11001
                                       " memory information" % node,
11002
                                       errors.ECODE_STATE)
11003
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11004
            raise errors.OpPrereqError("This change will prevent the instance"
11005
                                       " from failover to its secondary node"
11006
                                       " %s, due to not enough memory" % node,
11007
                                       errors.ECODE_STATE)
11008

    
11009
    # NIC processing
11010
    self.nic_pnew = {}
11011
    self.nic_pinst = {}
11012
    for nic_op, nic_dict in self.op.nics:
11013
      if nic_op == constants.DDM_REMOVE:
11014
        if not instance.nics:
11015
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11016
                                     errors.ECODE_INVAL)
11017
        continue
11018
      if nic_op != constants.DDM_ADD:
11019
        # an existing nic
11020
        if not instance.nics:
11021
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11022
                                     " no NICs" % nic_op,
11023
                                     errors.ECODE_INVAL)
11024
        if nic_op < 0 or nic_op >= len(instance.nics):
11025
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11026
                                     " are 0 to %d" %
11027
                                     (nic_op, len(instance.nics) - 1),
11028
                                     errors.ECODE_INVAL)
11029
        old_nic_params = instance.nics[nic_op].nicparams
11030
        old_nic_ip = instance.nics[nic_op].ip
11031
      else:
11032
        old_nic_params = {}
11033
        old_nic_ip = None
11034

    
11035
      update_params_dict = dict([(key, nic_dict[key])
11036
                                 for key in constants.NICS_PARAMETERS
11037
                                 if key in nic_dict])
11038

    
11039
      if "bridge" in nic_dict:
11040
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11041

    
11042
      new_nic_params = _GetUpdatedParams(old_nic_params,
11043
                                         update_params_dict)
11044
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11045
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11046
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11047
      self.nic_pinst[nic_op] = new_nic_params
11048
      self.nic_pnew[nic_op] = new_filled_nic_params
11049
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11050

    
11051
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11052
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11053
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11054
        if msg:
11055
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11056
          if self.op.force:
11057
            self.warn.append(msg)
11058
          else:
11059
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11060
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11061
        if constants.INIC_IP in nic_dict:
11062
          nic_ip = nic_dict[constants.INIC_IP]
11063
        else:
11064
          nic_ip = old_nic_ip
11065
        if nic_ip is None:
11066
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11067
                                     " on a routed nic", errors.ECODE_INVAL)
11068
      if constants.INIC_MAC in nic_dict:
11069
        nic_mac = nic_dict[constants.INIC_MAC]
11070
        if nic_mac is None:
11071
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11072
                                     errors.ECODE_INVAL)
11073
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11074
          # otherwise generate the mac
11075
          nic_dict[constants.INIC_MAC] = \
11076
            self.cfg.GenerateMAC(self.proc.GetECId())
11077
        else:
11078
          # or validate/reserve the current one
11079
          try:
11080
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11081
          except errors.ReservationError:
11082
            raise errors.OpPrereqError("MAC address %s already in use"
11083
                                       " in cluster" % nic_mac,
11084
                                       errors.ECODE_NOTUNIQUE)
11085

    
11086
    # DISK processing
11087
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11088
      raise errors.OpPrereqError("Disk operations not supported for"
11089
                                 " diskless instances",
11090
                                 errors.ECODE_INVAL)
11091
    for disk_op, _ in self.op.disks:
11092
      if disk_op == constants.DDM_REMOVE:
11093
        if len(instance.disks) == 1:
11094
          raise errors.OpPrereqError("Cannot remove the last disk of"
11095
                                     " an instance", errors.ECODE_INVAL)
11096
        _CheckInstanceDown(self, instance, "cannot remove disks")
11097

    
11098
      if (disk_op == constants.DDM_ADD and
11099
          len(instance.disks) >= constants.MAX_DISKS):
11100
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11101
                                   " add more" % constants.MAX_DISKS,
11102
                                   errors.ECODE_STATE)
11103
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11104
        # an existing disk
11105
        if disk_op < 0 or disk_op >= len(instance.disks):
11106
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11107
                                     " are 0 to %d" %
11108
                                     (disk_op, len(instance.disks)),
11109
                                     errors.ECODE_INVAL)
11110

    
11111
    return
11112

    
11113
  def _ConvertPlainToDrbd(self, feedback_fn):
11114
    """Converts an instance from plain to drbd.
11115

11116
    """
11117
    feedback_fn("Converting template to drbd")
11118
    instance = self.instance
11119
    pnode = instance.primary_node
11120
    snode = self.op.remote_node
11121

    
11122
    # create a fake disk info for _GenerateDiskTemplate
11123
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11124
                  constants.IDISK_VG: d.logical_id[0]}
11125
                 for d in instance.disks]
11126
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11127
                                      instance.name, pnode, [snode],
11128
                                      disk_info, None, None, 0, feedback_fn)
11129
    info = _GetInstanceInfoText(instance)
11130
    feedback_fn("Creating aditional volumes...")
11131
    # first, create the missing data and meta devices
11132
    for disk in new_disks:
11133
      # unfortunately this is... not too nice
11134
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11135
                            info, True)
11136
      for child in disk.children:
11137
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11138
    # at this stage, all new LVs have been created, we can rename the
11139
    # old ones
11140
    feedback_fn("Renaming original volumes...")
11141
    rename_list = [(o, n.children[0].logical_id)
11142
                   for (o, n) in zip(instance.disks, new_disks)]
11143
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11144
    result.Raise("Failed to rename original LVs")
11145

    
11146
    feedback_fn("Initializing DRBD devices...")
11147
    # all child devices are in place, we can now create the DRBD devices
11148
    for disk in new_disks:
11149
      for node in [pnode, snode]:
11150
        f_create = node == pnode
11151
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11152

    
11153
    # at this point, the instance has been modified
11154
    instance.disk_template = constants.DT_DRBD8
11155
    instance.disks = new_disks
11156
    self.cfg.Update(instance, feedback_fn)
11157

    
11158
    # disks are created, waiting for sync
11159
    disk_abort = not _WaitForSync(self, instance,
11160
                                  oneshot=not self.op.wait_for_sync)
11161
    if disk_abort:
11162
      raise errors.OpExecError("There are some degraded disks for"
11163
                               " this instance, please cleanup manually")
11164

    
11165
  def _ConvertDrbdToPlain(self, feedback_fn):
11166
    """Converts an instance from drbd to plain.
11167

11168
    """
11169
    instance = self.instance
11170
    assert len(instance.secondary_nodes) == 1
11171
    pnode = instance.primary_node
11172
    snode = instance.secondary_nodes[0]
11173
    feedback_fn("Converting template to plain")
11174

    
11175
    old_disks = instance.disks
11176
    new_disks = [d.children[0] for d in old_disks]
11177

    
11178
    # copy over size and mode
11179
    for parent, child in zip(old_disks, new_disks):
11180
      child.size = parent.size
11181
      child.mode = parent.mode
11182

    
11183
    # update instance structure
11184
    instance.disks = new_disks
11185
    instance.disk_template = constants.DT_PLAIN
11186
    self.cfg.Update(instance, feedback_fn)
11187

    
11188
    feedback_fn("Removing volumes on the secondary node...")
11189
    for disk in old_disks:
11190
      self.cfg.SetDiskID(disk, snode)
11191
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11192
      if msg:
11193
        self.LogWarning("Could not remove block device %s on node %s,"
11194
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11195

    
11196
    feedback_fn("Removing unneeded volumes on the primary node...")
11197
    for idx, disk in enumerate(old_disks):
11198
      meta = disk.children[1]
11199
      self.cfg.SetDiskID(meta, pnode)
11200
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11201
      if msg:
11202
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11203
                        " continuing anyway: %s", idx, pnode, msg)
11204

    
11205
  def Exec(self, feedback_fn):
11206
    """Modifies an instance.
11207

11208
    All parameters take effect only at the next restart of the instance.
11209

11210
    """
11211
    # Process here the warnings from CheckPrereq, as we don't have a
11212
    # feedback_fn there.
11213
    for warn in self.warn:
11214
      feedback_fn("WARNING: %s" % warn)
11215

    
11216
    result = []
11217
    instance = self.instance
11218
    # disk changes
11219
    for disk_op, disk_dict in self.op.disks:
11220
      if disk_op == constants.DDM_REMOVE:
11221
        # remove the last disk
11222
        device = instance.disks.pop()
11223
        device_idx = len(instance.disks)
11224
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11225
          self.cfg.SetDiskID(disk, node)
11226
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11227
          if msg:
11228
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11229
                            " continuing anyway", device_idx, node, msg)
11230
        result.append(("disk/%d" % device_idx, "remove"))
11231
      elif disk_op == constants.DDM_ADD:
11232
        # add a new disk
11233
        if instance.disk_template in (constants.DT_FILE,
11234
                                        constants.DT_SHARED_FILE):
11235
          file_driver, file_path = instance.disks[0].logical_id
11236
          file_path = os.path.dirname(file_path)
11237
        else:
11238
          file_driver = file_path = None
11239
        disk_idx_base = len(instance.disks)
11240
        new_disk = _GenerateDiskTemplate(self,
11241
                                         instance.disk_template,
11242
                                         instance.name, instance.primary_node,
11243
                                         instance.secondary_nodes,
11244
                                         [disk_dict],
11245
                                         file_path,
11246
                                         file_driver,
11247
                                         disk_idx_base, feedback_fn)[0]
11248
        instance.disks.append(new_disk)
11249
        info = _GetInstanceInfoText(instance)
11250

    
11251
        logging.info("Creating volume %s for instance %s",
11252
                     new_disk.iv_name, instance.name)
11253
        # Note: this needs to be kept in sync with _CreateDisks
11254
        #HARDCODE
11255
        for node in instance.all_nodes:
11256
          f_create = node == instance.primary_node
11257
          try:
11258
            _CreateBlockDev(self, node, instance, new_disk,
11259
                            f_create, info, f_create)
11260
          except errors.OpExecError, err:
11261
            self.LogWarning("Failed to create volume %s (%s) on"
11262
                            " node %s: %s",
11263
                            new_disk.iv_name, new_disk, node, err)
11264
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11265
                       (new_disk.size, new_disk.mode)))
11266
      else:
11267
        # change a given disk
11268
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11269
        result.append(("disk.mode/%d" % disk_op,
11270
                       disk_dict[constants.IDISK_MODE]))
11271

    
11272
    if self.op.disk_template:
11273
      r_shut = _ShutdownInstanceDisks(self, instance)
11274
      if not r_shut:
11275
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11276
                                 " proceed with disk template conversion")
11277
      mode = (instance.disk_template, self.op.disk_template)
11278
      try:
11279
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11280
      except:
11281
        self.cfg.ReleaseDRBDMinors(instance.name)
11282
        raise
11283
      result.append(("disk_template", self.op.disk_template))
11284

    
11285
    # NIC changes
11286
    for nic_op, nic_dict in self.op.nics:
11287
      if nic_op == constants.DDM_REMOVE:
11288
        # remove the last nic
11289
        del instance.nics[-1]
11290
        result.append(("nic.%d" % len(instance.nics), "remove"))
11291
      elif nic_op == constants.DDM_ADD:
11292
        # mac and bridge should be set, by now
11293
        mac = nic_dict[constants.INIC_MAC]
11294
        ip = nic_dict.get(constants.INIC_IP, None)
11295
        nicparams = self.nic_pinst[constants.DDM_ADD]
11296
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11297
        instance.nics.append(new_nic)
11298
        result.append(("nic.%d" % (len(instance.nics) - 1),
11299
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11300
                       (new_nic.mac, new_nic.ip,
11301
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11302
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11303
                       )))
11304
      else:
11305
        for key in (constants.INIC_MAC, constants.INIC_IP):
11306
          if key in nic_dict:
11307
            setattr(instance.nics[nic_op], key, nic_dict[key])
11308
        if nic_op in self.nic_pinst:
11309
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11310
        for key, val in nic_dict.iteritems():
11311
          result.append(("nic.%s/%d" % (key, nic_op), val))
11312

    
11313
    # hvparams changes
11314
    if self.op.hvparams:
11315
      instance.hvparams = self.hv_inst
11316
      for key, val in self.op.hvparams.iteritems():
11317
        result.append(("hv/%s" % key, val))
11318

    
11319
    # beparams changes
11320
    if self.op.beparams:
11321
      instance.beparams = self.be_inst
11322
      for key, val in self.op.beparams.iteritems():
11323
        result.append(("be/%s" % key, val))
11324

    
11325
    # OS change
11326
    if self.op.os_name:
11327
      instance.os = self.op.os_name
11328

    
11329
    # osparams changes
11330
    if self.op.osparams:
11331
      instance.osparams = self.os_inst
11332
      for key, val in self.op.osparams.iteritems():
11333
        result.append(("os/%s" % key, val))
11334

    
11335
    self.cfg.Update(instance, feedback_fn)
11336

    
11337
    return result
11338

    
11339
  _DISK_CONVERSIONS = {
11340
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11341
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11342
    }
11343

    
11344

    
11345
class LUInstanceChangeGroup(LogicalUnit):
11346
  HPATH = "instance-change-group"
11347
  HTYPE = constants.HTYPE_INSTANCE
11348
  REQ_BGL = False
11349

    
11350
  def ExpandNames(self):
11351
    self.share_locks = _ShareAll()
11352
    self.needed_locks = {
11353
      locking.LEVEL_NODEGROUP: [],
11354
      locking.LEVEL_NODE: [],
11355
      }
11356

    
11357
    self._ExpandAndLockInstance()
11358

    
11359
    if self.op.target_groups:
11360
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11361
                                  self.op.target_groups)
11362
    else:
11363
      self.req_target_uuids = None
11364

    
11365
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11366

    
11367
  def DeclareLocks(self, level):
11368
    if level == locking.LEVEL_NODEGROUP:
11369
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11370

    
11371
      if self.req_target_uuids:
11372
        lock_groups = set(self.req_target_uuids)
11373

    
11374
        # Lock all groups used by instance optimistically; this requires going
11375
        # via the node before it's locked, requiring verification later on
11376
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11377
        lock_groups.update(instance_groups)
11378
      else:
11379
        # No target groups, need to lock all of them
11380
        lock_groups = locking.ALL_SET
11381

    
11382
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11383

    
11384
    elif level == locking.LEVEL_NODE:
11385
      if self.req_target_uuids:
11386
        # Lock all nodes used by instances
11387
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11388
        self._LockInstancesNodes()
11389

    
11390
        # Lock all nodes in all potential target groups
11391
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11392
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11393
        member_nodes = [node_name
11394
                        for group in lock_groups
11395
                        for node_name in self.cfg.GetNodeGroup(group).members]
11396
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11397
      else:
11398
        # Lock all nodes as all groups are potential targets
11399
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11400

    
11401
  def CheckPrereq(self):
11402
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11403
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11404
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11405

    
11406
    assert (self.req_target_uuids is None or
11407
            owned_groups.issuperset(self.req_target_uuids))
11408
    assert owned_instances == set([self.op.instance_name])
11409

    
11410
    # Get instance information
11411
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11412

    
11413
    # Check if node groups for locked instance are still correct
11414
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11415
      ("Instance %s's nodes changed while we kept the lock" %
11416
       self.op.instance_name)
11417

    
11418
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11419
                                           owned_groups)
11420

    
11421
    if self.req_target_uuids:
11422
      # User requested specific target groups
11423
      self.target_uuids = self.req_target_uuids
11424
    else:
11425
      # All groups except those used by the instance are potential targets
11426
      self.target_uuids = owned_groups - inst_groups
11427

    
11428
    conflicting_groups = self.target_uuids & inst_groups
11429
    if conflicting_groups:
11430
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11431
                                 " used by the instance '%s'" %
11432
                                 (utils.CommaJoin(conflicting_groups),
11433
                                  self.op.instance_name),
11434
                                 errors.ECODE_INVAL)
11435

    
11436
    if not self.target_uuids:
11437
      raise errors.OpPrereqError("There are no possible target groups",
11438
                                 errors.ECODE_INVAL)
11439

    
11440
  def BuildHooksEnv(self):
11441
    """Build hooks env.
11442

11443
    """
11444
    assert self.target_uuids
11445

    
11446
    env = {
11447
      "TARGET_GROUPS": " ".join(self.target_uuids),
11448
      }
11449

    
11450
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11451

    
11452
    return env
11453

    
11454
  def BuildHooksNodes(self):
11455
    """Build hooks nodes.
11456

11457
    """
11458
    mn = self.cfg.GetMasterNode()
11459
    return ([mn], [mn])
11460

    
11461
  def Exec(self, feedback_fn):
11462
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11463

    
11464
    assert instances == [self.op.instance_name], "Instance not locked"
11465

    
11466
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11467
                     instances=instances, target_groups=list(self.target_uuids))
11468

    
11469
    ial.Run(self.op.iallocator)
11470

    
11471
    if not ial.success:
11472
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11473
                                 " instance '%s' using iallocator '%s': %s" %
11474
                                 (self.op.instance_name, self.op.iallocator,
11475
                                  ial.info),
11476
                                 errors.ECODE_NORES)
11477

    
11478
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11479

    
11480
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11481
                 " instance '%s'", len(jobs), self.op.instance_name)
11482

    
11483
    return ResultWithJobs(jobs)
11484

    
11485

    
11486
class LUBackupQuery(NoHooksLU):
11487
  """Query the exports list
11488

11489
  """
11490
  REQ_BGL = False
11491

    
11492
  def ExpandNames(self):
11493
    self.needed_locks = {}
11494
    self.share_locks[locking.LEVEL_NODE] = 1
11495
    if not self.op.nodes:
11496
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11497
    else:
11498
      self.needed_locks[locking.LEVEL_NODE] = \
11499
        _GetWantedNodes(self, self.op.nodes)
11500

    
11501
  def Exec(self, feedback_fn):
11502
    """Compute the list of all the exported system images.
11503

11504
    @rtype: dict
11505
    @return: a dictionary with the structure node->(export-list)
11506
        where export-list is a list of the instances exported on
11507
        that node.
11508

11509
    """
11510
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11511
    rpcresult = self.rpc.call_export_list(self.nodes)
11512
    result = {}
11513
    for node in rpcresult:
11514
      if rpcresult[node].fail_msg:
11515
        result[node] = False
11516
      else:
11517
        result[node] = rpcresult[node].payload
11518

    
11519
    return result
11520

    
11521

    
11522
class LUBackupPrepare(NoHooksLU):
11523
  """Prepares an instance for an export and returns useful information.
11524

11525
  """
11526
  REQ_BGL = False
11527

    
11528
  def ExpandNames(self):
11529
    self._ExpandAndLockInstance()
11530

    
11531
  def CheckPrereq(self):
11532
    """Check prerequisites.
11533

11534
    """
11535
    instance_name = self.op.instance_name
11536

    
11537
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11538
    assert self.instance is not None, \
11539
          "Cannot retrieve locked instance %s" % self.op.instance_name
11540
    _CheckNodeOnline(self, self.instance.primary_node)
11541

    
11542
    self._cds = _GetClusterDomainSecret()
11543

    
11544
  def Exec(self, feedback_fn):
11545
    """Prepares an instance for an export.
11546

11547
    """
11548
    instance = self.instance
11549

    
11550
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11551
      salt = utils.GenerateSecret(8)
11552

    
11553
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11554
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11555
                                              constants.RIE_CERT_VALIDITY)
11556
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11557

    
11558
      (name, cert_pem) = result.payload
11559

    
11560
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11561
                                             cert_pem)
11562

    
11563
      return {
11564
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11565
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11566
                          salt),
11567
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11568
        }
11569

    
11570
    return None
11571

    
11572

    
11573
class LUBackupExport(LogicalUnit):
11574
  """Export an instance to an image in the cluster.
11575

11576
  """
11577
  HPATH = "instance-export"
11578
  HTYPE = constants.HTYPE_INSTANCE
11579
  REQ_BGL = False
11580

    
11581
  def CheckArguments(self):
11582
    """Check the arguments.
11583

11584
    """
11585
    self.x509_key_name = self.op.x509_key_name
11586
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11587

    
11588
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11589
      if not self.x509_key_name:
11590
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11591
                                   errors.ECODE_INVAL)
11592

    
11593
      if not self.dest_x509_ca_pem:
11594
        raise errors.OpPrereqError("Missing destination X509 CA",
11595
                                   errors.ECODE_INVAL)
11596

    
11597
  def ExpandNames(self):
11598
    self._ExpandAndLockInstance()
11599

    
11600
    # Lock all nodes for local exports
11601
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11602
      # FIXME: lock only instance primary and destination node
11603
      #
11604
      # Sad but true, for now we have do lock all nodes, as we don't know where
11605
      # the previous export might be, and in this LU we search for it and
11606
      # remove it from its current node. In the future we could fix this by:
11607
      #  - making a tasklet to search (share-lock all), then create the
11608
      #    new one, then one to remove, after
11609
      #  - removing the removal operation altogether
11610
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11611

    
11612
  def DeclareLocks(self, level):
11613
    """Last minute lock declaration."""
11614
    # All nodes are locked anyway, so nothing to do here.
11615

    
11616
  def BuildHooksEnv(self):
11617
    """Build hooks env.
11618

11619
    This will run on the master, primary node and target node.
11620

11621
    """
11622
    env = {
11623
      "EXPORT_MODE": self.op.mode,
11624
      "EXPORT_NODE": self.op.target_node,
11625
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11626
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11627
      # TODO: Generic function for boolean env variables
11628
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11629
      }
11630

    
11631
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11632

    
11633
    return env
11634

    
11635
  def BuildHooksNodes(self):
11636
    """Build hooks nodes.
11637

11638
    """
11639
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11640

    
11641
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11642
      nl.append(self.op.target_node)
11643

    
11644
    return (nl, nl)
11645

    
11646
  def CheckPrereq(self):
11647
    """Check prerequisites.
11648

11649
    This checks that the instance and node names are valid.
11650

11651
    """
11652
    instance_name = self.op.instance_name
11653

    
11654
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11655
    assert self.instance is not None, \
11656
          "Cannot retrieve locked instance %s" % self.op.instance_name
11657
    _CheckNodeOnline(self, self.instance.primary_node)
11658

    
11659
    if (self.op.remove_instance and self.instance.admin_up and
11660
        not self.op.shutdown):
11661
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11662
                                 " down before")
11663

    
11664
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11665
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11666
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11667
      assert self.dst_node is not None
11668

    
11669
      _CheckNodeOnline(self, self.dst_node.name)
11670
      _CheckNodeNotDrained(self, self.dst_node.name)
11671

    
11672
      self._cds = None
11673
      self.dest_disk_info = None
11674
      self.dest_x509_ca = None
11675

    
11676
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11677
      self.dst_node = None
11678

    
11679
      if len(self.op.target_node) != len(self.instance.disks):
11680
        raise errors.OpPrereqError(("Received destination information for %s"
11681
                                    " disks, but instance %s has %s disks") %
11682
                                   (len(self.op.target_node), instance_name,
11683
                                    len(self.instance.disks)),
11684
                                   errors.ECODE_INVAL)
11685

    
11686
      cds = _GetClusterDomainSecret()
11687

    
11688
      # Check X509 key name
11689
      try:
11690
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11691
      except (TypeError, ValueError), err:
11692
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11693

    
11694
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11695
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11696
                                   errors.ECODE_INVAL)
11697

    
11698
      # Load and verify CA
11699
      try:
11700
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11701
      except OpenSSL.crypto.Error, err:
11702
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11703
                                   (err, ), errors.ECODE_INVAL)
11704

    
11705
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11706
      if errcode is not None:
11707
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11708
                                   (msg, ), errors.ECODE_INVAL)
11709

    
11710
      self.dest_x509_ca = cert
11711

    
11712
      # Verify target information
11713
      disk_info = []
11714
      for idx, disk_data in enumerate(self.op.target_node):
11715
        try:
11716
          (host, port, magic) = \
11717
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11718
        except errors.GenericError, err:
11719
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11720
                                     (idx, err), errors.ECODE_INVAL)
11721

    
11722
        disk_info.append((host, port, magic))
11723

    
11724
      assert len(disk_info) == len(self.op.target_node)
11725
      self.dest_disk_info = disk_info
11726

    
11727
    else:
11728
      raise errors.ProgrammerError("Unhandled export mode %r" %
11729
                                   self.op.mode)
11730

    
11731
    # instance disk type verification
11732
    # TODO: Implement export support for file-based disks
11733
    for disk in self.instance.disks:
11734
      if disk.dev_type == constants.LD_FILE:
11735
        raise errors.OpPrereqError("Export not supported for instances with"
11736
                                   " file-based disks", errors.ECODE_INVAL)
11737

    
11738
  def _CleanupExports(self, feedback_fn):
11739
    """Removes exports of current instance from all other nodes.
11740

11741
    If an instance in a cluster with nodes A..D was exported to node C, its
11742
    exports will be removed from the nodes A, B and D.
11743

11744
    """
11745
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11746

    
11747
    nodelist = self.cfg.GetNodeList()
11748
    nodelist.remove(self.dst_node.name)
11749

    
11750
    # on one-node clusters nodelist will be empty after the removal
11751
    # if we proceed the backup would be removed because OpBackupQuery
11752
    # substitutes an empty list with the full cluster node list.
11753
    iname = self.instance.name
11754
    if nodelist:
11755
      feedback_fn("Removing old exports for instance %s" % iname)
11756
      exportlist = self.rpc.call_export_list(nodelist)
11757
      for node in exportlist:
11758
        if exportlist[node].fail_msg:
11759
          continue
11760
        if iname in exportlist[node].payload:
11761
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11762
          if msg:
11763
            self.LogWarning("Could not remove older export for instance %s"
11764
                            " on node %s: %s", iname, node, msg)
11765

    
11766
  def Exec(self, feedback_fn):
11767
    """Export an instance to an image in the cluster.
11768

11769
    """
11770
    assert self.op.mode in constants.EXPORT_MODES
11771

    
11772
    instance = self.instance
11773
    src_node = instance.primary_node
11774

    
11775
    if self.op.shutdown:
11776
      # shutdown the instance, but not the disks
11777
      feedback_fn("Shutting down instance %s" % instance.name)
11778
      result = self.rpc.call_instance_shutdown(src_node, instance,
11779
                                               self.op.shutdown_timeout)
11780
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11781
      result.Raise("Could not shutdown instance %s on"
11782
                   " node %s" % (instance.name, src_node))
11783

    
11784
    # set the disks ID correctly since call_instance_start needs the
11785
    # correct drbd minor to create the symlinks
11786
    for disk in instance.disks:
11787
      self.cfg.SetDiskID(disk, src_node)
11788

    
11789
    activate_disks = (not instance.admin_up)
11790

    
11791
    if activate_disks:
11792
      # Activate the instance disks if we'exporting a stopped instance
11793
      feedback_fn("Activating disks for %s" % instance.name)
11794
      _StartInstanceDisks(self, instance, None)
11795

    
11796
    try:
11797
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11798
                                                     instance)
11799

    
11800
      helper.CreateSnapshots()
11801
      try:
11802
        if (self.op.shutdown and instance.admin_up and
11803
            not self.op.remove_instance):
11804
          assert not activate_disks
11805
          feedback_fn("Starting instance %s" % instance.name)
11806
          result = self.rpc.call_instance_start(src_node, instance,
11807
                                                None, None, False)
11808
          msg = result.fail_msg
11809
          if msg:
11810
            feedback_fn("Failed to start instance: %s" % msg)
11811
            _ShutdownInstanceDisks(self, instance)
11812
            raise errors.OpExecError("Could not start instance: %s" % msg)
11813

    
11814
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11815
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11816
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11817
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11818
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11819

    
11820
          (key_name, _, _) = self.x509_key_name
11821

    
11822
          dest_ca_pem = \
11823
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11824
                                            self.dest_x509_ca)
11825

    
11826
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11827
                                                     key_name, dest_ca_pem,
11828
                                                     timeouts)
11829
      finally:
11830
        helper.Cleanup()
11831

    
11832
      # Check for backwards compatibility
11833
      assert len(dresults) == len(instance.disks)
11834
      assert compat.all(isinstance(i, bool) for i in dresults), \
11835
             "Not all results are boolean: %r" % dresults
11836

    
11837
    finally:
11838
      if activate_disks:
11839
        feedback_fn("Deactivating disks for %s" % instance.name)
11840
        _ShutdownInstanceDisks(self, instance)
11841

    
11842
    if not (compat.all(dresults) and fin_resu):
11843
      failures = []
11844
      if not fin_resu:
11845
        failures.append("export finalization")
11846
      if not compat.all(dresults):
11847
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11848
                               if not dsk)
11849
        failures.append("disk export: disk(s) %s" % fdsk)
11850

    
11851
      raise errors.OpExecError("Export failed, errors in %s" %
11852
                               utils.CommaJoin(failures))
11853

    
11854
    # At this point, the export was successful, we can cleanup/finish
11855

    
11856
    # Remove instance if requested
11857
    if self.op.remove_instance:
11858
      feedback_fn("Removing instance %s" % instance.name)
11859
      _RemoveInstance(self, feedback_fn, instance,
11860
                      self.op.ignore_remove_failures)
11861

    
11862
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11863
      self._CleanupExports(feedback_fn)
11864

    
11865
    return fin_resu, dresults
11866

    
11867

    
11868
class LUBackupRemove(NoHooksLU):
11869
  """Remove exports related to the named instance.
11870

11871
  """
11872
  REQ_BGL = False
11873

    
11874
  def ExpandNames(self):
11875
    self.needed_locks = {}
11876
    # We need all nodes to be locked in order for RemoveExport to work, but we
11877
    # don't need to lock the instance itself, as nothing will happen to it (and
11878
    # we can remove exports also for a removed instance)
11879
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11880

    
11881
  def Exec(self, feedback_fn):
11882
    """Remove any export.
11883

11884
    """
11885
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11886
    # If the instance was not found we'll try with the name that was passed in.
11887
    # This will only work if it was an FQDN, though.
11888
    fqdn_warn = False
11889
    if not instance_name:
11890
      fqdn_warn = True
11891
      instance_name = self.op.instance_name
11892

    
11893
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11894
    exportlist = self.rpc.call_export_list(locked_nodes)
11895
    found = False
11896
    for node in exportlist:
11897
      msg = exportlist[node].fail_msg
11898
      if msg:
11899
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11900
        continue
11901
      if instance_name in exportlist[node].payload:
11902
        found = True
11903
        result = self.rpc.call_export_remove(node, instance_name)
11904
        msg = result.fail_msg
11905
        if msg:
11906
          logging.error("Could not remove export for instance %s"
11907
                        " on node %s: %s", instance_name, node, msg)
11908

    
11909
    if fqdn_warn and not found:
11910
      feedback_fn("Export not found. If trying to remove an export belonging"
11911
                  " to a deleted instance please use its Fully Qualified"
11912
                  " Domain Name.")
11913

    
11914

    
11915
class LUGroupAdd(LogicalUnit):
11916
  """Logical unit for creating node groups.
11917

11918
  """
11919
  HPATH = "group-add"
11920
  HTYPE = constants.HTYPE_GROUP
11921
  REQ_BGL = False
11922

    
11923
  def ExpandNames(self):
11924
    # We need the new group's UUID here so that we can create and acquire the
11925
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11926
    # that it should not check whether the UUID exists in the configuration.
11927
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11928
    self.needed_locks = {}
11929
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11930

    
11931
  def CheckPrereq(self):
11932
    """Check prerequisites.
11933

11934
    This checks that the given group name is not an existing node group
11935
    already.
11936

11937
    """
11938
    try:
11939
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11940
    except errors.OpPrereqError:
11941
      pass
11942
    else:
11943
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11944
                                 " node group (UUID: %s)" %
11945
                                 (self.op.group_name, existing_uuid),
11946
                                 errors.ECODE_EXISTS)
11947

    
11948
    if self.op.ndparams:
11949
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11950

    
11951
  def BuildHooksEnv(self):
11952
    """Build hooks env.
11953

11954
    """
11955
    return {
11956
      "GROUP_NAME": self.op.group_name,
11957
      }
11958

    
11959
  def BuildHooksNodes(self):
11960
    """Build hooks nodes.
11961

11962
    """
11963
    mn = self.cfg.GetMasterNode()
11964
    return ([mn], [mn])
11965

    
11966
  def Exec(self, feedback_fn):
11967
    """Add the node group to the cluster.
11968

11969
    """
11970
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11971
                                  uuid=self.group_uuid,
11972
                                  alloc_policy=self.op.alloc_policy,
11973
                                  ndparams=self.op.ndparams)
11974

    
11975
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11976
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11977

    
11978

    
11979
class LUGroupAssignNodes(NoHooksLU):
11980
  """Logical unit for assigning nodes to groups.
11981

11982
  """
11983
  REQ_BGL = False
11984

    
11985
  def ExpandNames(self):
11986
    # These raise errors.OpPrereqError on their own:
11987
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11988
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11989

    
11990
    # We want to lock all the affected nodes and groups. We have readily
11991
    # available the list of nodes, and the *destination* group. To gather the
11992
    # list of "source" groups, we need to fetch node information later on.
11993
    self.needed_locks = {
11994
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11995
      locking.LEVEL_NODE: self.op.nodes,
11996
      }
11997

    
11998
  def DeclareLocks(self, level):
11999
    if level == locking.LEVEL_NODEGROUP:
12000
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12001

    
12002
      # Try to get all affected nodes' groups without having the group or node
12003
      # lock yet. Needs verification later in the code flow.
12004
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12005

    
12006
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12007

    
12008
  def CheckPrereq(self):
12009
    """Check prerequisites.
12010

12011
    """
12012
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12013
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12014
            frozenset(self.op.nodes))
12015

    
12016
    expected_locks = (set([self.group_uuid]) |
12017
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12018
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12019
    if actual_locks != expected_locks:
12020
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12021
                               " current groups are '%s', used to be '%s'" %
12022
                               (utils.CommaJoin(expected_locks),
12023
                                utils.CommaJoin(actual_locks)))
12024

    
12025
    self.node_data = self.cfg.GetAllNodesInfo()
12026
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12027
    instance_data = self.cfg.GetAllInstancesInfo()
12028

    
12029
    if self.group is None:
12030
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12031
                               (self.op.group_name, self.group_uuid))
12032

    
12033
    (new_splits, previous_splits) = \
12034
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12035
                                             for node in self.op.nodes],
12036
                                            self.node_data, instance_data)
12037

    
12038
    if new_splits:
12039
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12040

    
12041
      if not self.op.force:
12042
        raise errors.OpExecError("The following instances get split by this"
12043
                                 " change and --force was not given: %s" %
12044
                                 fmt_new_splits)
12045
      else:
12046
        self.LogWarning("This operation will split the following instances: %s",
12047
                        fmt_new_splits)
12048

    
12049
        if previous_splits:
12050
          self.LogWarning("In addition, these already-split instances continue"
12051
                          " to be split across groups: %s",
12052
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12053

    
12054
  def Exec(self, feedback_fn):
12055
    """Assign nodes to a new group.
12056

12057
    """
12058
    for node in self.op.nodes:
12059
      self.node_data[node].group = self.group_uuid
12060

    
12061
    # FIXME: Depends on side-effects of modifying the result of
12062
    # C{cfg.GetAllNodesInfo}
12063

    
12064
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12065

    
12066
  @staticmethod
12067
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12068
    """Check for split instances after a node assignment.
12069

12070
    This method considers a series of node assignments as an atomic operation,
12071
    and returns information about split instances after applying the set of
12072
    changes.
12073

12074
    In particular, it returns information about newly split instances, and
12075
    instances that were already split, and remain so after the change.
12076

12077
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12078
    considered.
12079

12080
    @type changes: list of (node_name, new_group_uuid) pairs.
12081
    @param changes: list of node assignments to consider.
12082
    @param node_data: a dict with data for all nodes
12083
    @param instance_data: a dict with all instances to consider
12084
    @rtype: a two-tuple
12085
    @return: a list of instances that were previously okay and result split as a
12086
      consequence of this change, and a list of instances that were previously
12087
      split and this change does not fix.
12088

12089
    """
12090
    changed_nodes = dict((node, group) for node, group in changes
12091
                         if node_data[node].group != group)
12092

    
12093
    all_split_instances = set()
12094
    previously_split_instances = set()
12095

    
12096
    def InstanceNodes(instance):
12097
      return [instance.primary_node] + list(instance.secondary_nodes)
12098

    
12099
    for inst in instance_data.values():
12100
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12101
        continue
12102

    
12103
      instance_nodes = InstanceNodes(inst)
12104

    
12105
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12106
        previously_split_instances.add(inst.name)
12107

    
12108
      if len(set(changed_nodes.get(node, node_data[node].group)
12109
                 for node in instance_nodes)) > 1:
12110
        all_split_instances.add(inst.name)
12111

    
12112
    return (list(all_split_instances - previously_split_instances),
12113
            list(previously_split_instances & all_split_instances))
12114

    
12115

    
12116
class _GroupQuery(_QueryBase):
12117
  FIELDS = query.GROUP_FIELDS
12118

    
12119
  def ExpandNames(self, lu):
12120
    lu.needed_locks = {}
12121

    
12122
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12123
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12124

    
12125
    if not self.names:
12126
      self.wanted = [name_to_uuid[name]
12127
                     for name in utils.NiceSort(name_to_uuid.keys())]
12128
    else:
12129
      # Accept names to be either names or UUIDs.
12130
      missing = []
12131
      self.wanted = []
12132
      all_uuid = frozenset(self._all_groups.keys())
12133

    
12134
      for name in self.names:
12135
        if name in all_uuid:
12136
          self.wanted.append(name)
12137
        elif name in name_to_uuid:
12138
          self.wanted.append(name_to_uuid[name])
12139
        else:
12140
          missing.append(name)
12141

    
12142
      if missing:
12143
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12144
                                   utils.CommaJoin(missing),
12145
                                   errors.ECODE_NOENT)
12146

    
12147
  def DeclareLocks(self, lu, level):
12148
    pass
12149

    
12150
  def _GetQueryData(self, lu):
12151
    """Computes the list of node groups and their attributes.
12152

12153
    """
12154
    do_nodes = query.GQ_NODE in self.requested_data
12155
    do_instances = query.GQ_INST in self.requested_data
12156

    
12157
    group_to_nodes = None
12158
    group_to_instances = None
12159

    
12160
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12161
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12162
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12163
    # instance->node. Hence, we will need to process nodes even if we only need
12164
    # instance information.
12165
    if do_nodes or do_instances:
12166
      all_nodes = lu.cfg.GetAllNodesInfo()
12167
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12168
      node_to_group = {}
12169

    
12170
      for node in all_nodes.values():
12171
        if node.group in group_to_nodes:
12172
          group_to_nodes[node.group].append(node.name)
12173
          node_to_group[node.name] = node.group
12174

    
12175
      if do_instances:
12176
        all_instances = lu.cfg.GetAllInstancesInfo()
12177
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12178

    
12179
        for instance in all_instances.values():
12180
          node = instance.primary_node
12181
          if node in node_to_group:
12182
            group_to_instances[node_to_group[node]].append(instance.name)
12183

    
12184
        if not do_nodes:
12185
          # Do not pass on node information if it was not requested.
12186
          group_to_nodes = None
12187

    
12188
    return query.GroupQueryData([self._all_groups[uuid]
12189
                                 for uuid in self.wanted],
12190
                                group_to_nodes, group_to_instances)
12191

    
12192

    
12193
class LUGroupQuery(NoHooksLU):
12194
  """Logical unit for querying node groups.
12195

12196
  """
12197
  REQ_BGL = False
12198

    
12199
  def CheckArguments(self):
12200
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12201
                          self.op.output_fields, False)
12202

    
12203
  def ExpandNames(self):
12204
    self.gq.ExpandNames(self)
12205

    
12206
  def DeclareLocks(self, level):
12207
    self.gq.DeclareLocks(self, level)
12208

    
12209
  def Exec(self, feedback_fn):
12210
    return self.gq.OldStyleQuery(self)
12211

    
12212

    
12213
class LUGroupSetParams(LogicalUnit):
12214
  """Modifies the parameters of a node group.
12215

12216
  """
12217
  HPATH = "group-modify"
12218
  HTYPE = constants.HTYPE_GROUP
12219
  REQ_BGL = False
12220

    
12221
  def CheckArguments(self):
12222
    all_changes = [
12223
      self.op.ndparams,
12224
      self.op.alloc_policy,
12225
      ]
12226

    
12227
    if all_changes.count(None) == len(all_changes):
12228
      raise errors.OpPrereqError("Please pass at least one modification",
12229
                                 errors.ECODE_INVAL)
12230

    
12231
  def ExpandNames(self):
12232
    # This raises errors.OpPrereqError on its own:
12233
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12234

    
12235
    self.needed_locks = {
12236
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12237
      }
12238

    
12239
  def CheckPrereq(self):
12240
    """Check prerequisites.
12241

12242
    """
12243
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12244

    
12245
    if self.group is None:
12246
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12247
                               (self.op.group_name, self.group_uuid))
12248

    
12249
    if self.op.ndparams:
12250
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12251
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12252
      self.new_ndparams = new_ndparams
12253

    
12254
  def BuildHooksEnv(self):
12255
    """Build hooks env.
12256

12257
    """
12258
    return {
12259
      "GROUP_NAME": self.op.group_name,
12260
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12261
      }
12262

    
12263
  def BuildHooksNodes(self):
12264
    """Build hooks nodes.
12265

12266
    """
12267
    mn = self.cfg.GetMasterNode()
12268
    return ([mn], [mn])
12269

    
12270
  def Exec(self, feedback_fn):
12271
    """Modifies the node group.
12272

12273
    """
12274
    result = []
12275

    
12276
    if self.op.ndparams:
12277
      self.group.ndparams = self.new_ndparams
12278
      result.append(("ndparams", str(self.group.ndparams)))
12279

    
12280
    if self.op.alloc_policy:
12281
      self.group.alloc_policy = self.op.alloc_policy
12282

    
12283
    self.cfg.Update(self.group, feedback_fn)
12284
    return result
12285

    
12286

    
12287
class LUGroupRemove(LogicalUnit):
12288
  HPATH = "group-remove"
12289
  HTYPE = constants.HTYPE_GROUP
12290
  REQ_BGL = False
12291

    
12292
  def ExpandNames(self):
12293
    # This will raises errors.OpPrereqError on its own:
12294
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12295
    self.needed_locks = {
12296
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12297
      }
12298

    
12299
  def CheckPrereq(self):
12300
    """Check prerequisites.
12301

12302
    This checks that the given group name exists as a node group, that is
12303
    empty (i.e., contains no nodes), and that is not the last group of the
12304
    cluster.
12305

12306
    """
12307
    # Verify that the group is empty.
12308
    group_nodes = [node.name
12309
                   for node in self.cfg.GetAllNodesInfo().values()
12310
                   if node.group == self.group_uuid]
12311

    
12312
    if group_nodes:
12313
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12314
                                 " nodes: %s" %
12315
                                 (self.op.group_name,
12316
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12317
                                 errors.ECODE_STATE)
12318

    
12319
    # Verify the cluster would not be left group-less.
12320
    if len(self.cfg.GetNodeGroupList()) == 1:
12321
      raise errors.OpPrereqError("Group '%s' is the only group,"
12322
                                 " cannot be removed" %
12323
                                 self.op.group_name,
12324
                                 errors.ECODE_STATE)
12325

    
12326
  def BuildHooksEnv(self):
12327
    """Build hooks env.
12328

12329
    """
12330
    return {
12331
      "GROUP_NAME": self.op.group_name,
12332
      }
12333

    
12334
  def BuildHooksNodes(self):
12335
    """Build hooks nodes.
12336

12337
    """
12338
    mn = self.cfg.GetMasterNode()
12339
    return ([mn], [mn])
12340

    
12341
  def Exec(self, feedback_fn):
12342
    """Remove the node group.
12343

12344
    """
12345
    try:
12346
      self.cfg.RemoveNodeGroup(self.group_uuid)
12347
    except errors.ConfigurationError:
12348
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12349
                               (self.op.group_name, self.group_uuid))
12350

    
12351
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12352

    
12353

    
12354
class LUGroupRename(LogicalUnit):
12355
  HPATH = "group-rename"
12356
  HTYPE = constants.HTYPE_GROUP
12357
  REQ_BGL = False
12358

    
12359
  def ExpandNames(self):
12360
    # This raises errors.OpPrereqError on its own:
12361
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12362

    
12363
    self.needed_locks = {
12364
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12365
      }
12366

    
12367
  def CheckPrereq(self):
12368
    """Check prerequisites.
12369

12370
    Ensures requested new name is not yet used.
12371

12372
    """
12373
    try:
12374
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12375
    except errors.OpPrereqError:
12376
      pass
12377
    else:
12378
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12379
                                 " node group (UUID: %s)" %
12380
                                 (self.op.new_name, new_name_uuid),
12381
                                 errors.ECODE_EXISTS)
12382

    
12383
  def BuildHooksEnv(self):
12384
    """Build hooks env.
12385

12386
    """
12387
    return {
12388
      "OLD_NAME": self.op.group_name,
12389
      "NEW_NAME": self.op.new_name,
12390
      }
12391

    
12392
  def BuildHooksNodes(self):
12393
    """Build hooks nodes.
12394

12395
    """
12396
    mn = self.cfg.GetMasterNode()
12397

    
12398
    all_nodes = self.cfg.GetAllNodesInfo()
12399
    all_nodes.pop(mn, None)
12400

    
12401
    run_nodes = [mn]
12402
    run_nodes.extend(node.name for node in all_nodes.values()
12403
                     if node.group == self.group_uuid)
12404

    
12405
    return (run_nodes, run_nodes)
12406

    
12407
  def Exec(self, feedback_fn):
12408
    """Rename the node group.
12409

12410
    """
12411
    group = self.cfg.GetNodeGroup(self.group_uuid)
12412

    
12413
    if group is None:
12414
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12415
                               (self.op.group_name, self.group_uuid))
12416

    
12417
    group.name = self.op.new_name
12418
    self.cfg.Update(group, feedback_fn)
12419

    
12420
    return self.op.new_name
12421

    
12422

    
12423
class LUGroupEvacuate(LogicalUnit):
12424
  HPATH = "group-evacuate"
12425
  HTYPE = constants.HTYPE_GROUP
12426
  REQ_BGL = False
12427

    
12428
  def ExpandNames(self):
12429
    # This raises errors.OpPrereqError on its own:
12430
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12431

    
12432
    if self.op.target_groups:
12433
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12434
                                  self.op.target_groups)
12435
    else:
12436
      self.req_target_uuids = []
12437

    
12438
    if self.group_uuid in self.req_target_uuids:
12439
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12440
                                 " as a target group (targets are %s)" %
12441
                                 (self.group_uuid,
12442
                                  utils.CommaJoin(self.req_target_uuids)),
12443
                                 errors.ECODE_INVAL)
12444

    
12445
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12446

    
12447
    self.share_locks = _ShareAll()
12448
    self.needed_locks = {
12449
      locking.LEVEL_INSTANCE: [],
12450
      locking.LEVEL_NODEGROUP: [],
12451
      locking.LEVEL_NODE: [],
12452
      }
12453

    
12454
  def DeclareLocks(self, level):
12455
    if level == locking.LEVEL_INSTANCE:
12456
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12457

    
12458
      # Lock instances optimistically, needs verification once node and group
12459
      # locks have been acquired
12460
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12461
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12462

    
12463
    elif level == locking.LEVEL_NODEGROUP:
12464
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12465

    
12466
      if self.req_target_uuids:
12467
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12468

    
12469
        # Lock all groups used by instances optimistically; this requires going
12470
        # via the node before it's locked, requiring verification later on
12471
        lock_groups.update(group_uuid
12472
                           for instance_name in
12473
                             self.owned_locks(locking.LEVEL_INSTANCE)
12474
                           for group_uuid in
12475
                             self.cfg.GetInstanceNodeGroups(instance_name))
12476
      else:
12477
        # No target groups, need to lock all of them
12478
        lock_groups = locking.ALL_SET
12479

    
12480
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12481

    
12482
    elif level == locking.LEVEL_NODE:
12483
      # This will only lock the nodes in the group to be evacuated which
12484
      # contain actual instances
12485
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12486
      self._LockInstancesNodes()
12487

    
12488
      # Lock all nodes in group to be evacuated and target groups
12489
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12490
      assert self.group_uuid in owned_groups
12491
      member_nodes = [node_name
12492
                      for group in owned_groups
12493
                      for node_name in self.cfg.GetNodeGroup(group).members]
12494
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12495

    
12496
  def CheckPrereq(self):
12497
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12498
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12499
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12500

    
12501
    assert owned_groups.issuperset(self.req_target_uuids)
12502
    assert self.group_uuid in owned_groups
12503

    
12504
    # Check if locked instances are still correct
12505
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12506

    
12507
    # Get instance information
12508
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12509

    
12510
    # Check if node groups for locked instances are still correct
12511
    for instance_name in owned_instances:
12512
      inst = self.instances[instance_name]
12513
      assert owned_nodes.issuperset(inst.all_nodes), \
12514
        "Instance %s's nodes changed while we kept the lock" % instance_name
12515

    
12516
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12517
                                             owned_groups)
12518

    
12519
      assert self.group_uuid in inst_groups, \
12520
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12521

    
12522
    if self.req_target_uuids:
12523
      # User requested specific target groups
12524
      self.target_uuids = self.req_target_uuids
12525
    else:
12526
      # All groups except the one to be evacuated are potential targets
12527
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12528
                           if group_uuid != self.group_uuid]
12529

    
12530
      if not self.target_uuids:
12531
        raise errors.OpPrereqError("There are no possible target groups",
12532
                                   errors.ECODE_INVAL)
12533

    
12534
  def BuildHooksEnv(self):
12535
    """Build hooks env.
12536

12537
    """
12538
    return {
12539
      "GROUP_NAME": self.op.group_name,
12540
      "TARGET_GROUPS": " ".join(self.target_uuids),
12541
      }
12542

    
12543
  def BuildHooksNodes(self):
12544
    """Build hooks nodes.
12545

12546
    """
12547
    mn = self.cfg.GetMasterNode()
12548

    
12549
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12550

    
12551
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12552

    
12553
    return (run_nodes, run_nodes)
12554

    
12555
  def Exec(self, feedback_fn):
12556
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12557

    
12558
    assert self.group_uuid not in self.target_uuids
12559

    
12560
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12561
                     instances=instances, target_groups=self.target_uuids)
12562

    
12563
    ial.Run(self.op.iallocator)
12564

    
12565
    if not ial.success:
12566
      raise errors.OpPrereqError("Can't compute group evacuation using"
12567
                                 " iallocator '%s': %s" %
12568
                                 (self.op.iallocator, ial.info),
12569
                                 errors.ECODE_NORES)
12570

    
12571
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12572

    
12573
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12574
                 len(jobs), self.op.group_name)
12575

    
12576
    return ResultWithJobs(jobs)
12577

    
12578

    
12579
class TagsLU(NoHooksLU): # pylint: disable=W0223
12580
  """Generic tags LU.
12581

12582
  This is an abstract class which is the parent of all the other tags LUs.
12583

12584
  """
12585
  def ExpandNames(self):
12586
    self.group_uuid = None
12587
    self.needed_locks = {}
12588
    if self.op.kind == constants.TAG_NODE:
12589
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12590
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12591
    elif self.op.kind == constants.TAG_INSTANCE:
12592
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12593
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12594
    elif self.op.kind == constants.TAG_NODEGROUP:
12595
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12596

    
12597
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12598
    # not possible to acquire the BGL based on opcode parameters)
12599

    
12600
  def CheckPrereq(self):
12601
    """Check prerequisites.
12602

12603
    """
12604
    if self.op.kind == constants.TAG_CLUSTER:
12605
      self.target = self.cfg.GetClusterInfo()
12606
    elif self.op.kind == constants.TAG_NODE:
12607
      self.target = self.cfg.GetNodeInfo(self.op.name)
12608
    elif self.op.kind == constants.TAG_INSTANCE:
12609
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12610
    elif self.op.kind == constants.TAG_NODEGROUP:
12611
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12612
    else:
12613
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12614
                                 str(self.op.kind), errors.ECODE_INVAL)
12615

    
12616

    
12617
class LUTagsGet(TagsLU):
12618
  """Returns the tags of a given object.
12619

12620
  """
12621
  REQ_BGL = False
12622

    
12623
  def ExpandNames(self):
12624
    TagsLU.ExpandNames(self)
12625

    
12626
    # Share locks as this is only a read operation
12627
    self.share_locks = _ShareAll()
12628

    
12629
  def Exec(self, feedback_fn):
12630
    """Returns the tag list.
12631

12632
    """
12633
    return list(self.target.GetTags())
12634

    
12635

    
12636
class LUTagsSearch(NoHooksLU):
12637
  """Searches the tags for a given pattern.
12638

12639
  """
12640
  REQ_BGL = False
12641

    
12642
  def ExpandNames(self):
12643
    self.needed_locks = {}
12644

    
12645
  def CheckPrereq(self):
12646
    """Check prerequisites.
12647

12648
    This checks the pattern passed for validity by compiling it.
12649

12650
    """
12651
    try:
12652
      self.re = re.compile(self.op.pattern)
12653
    except re.error, err:
12654
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12655
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12656

    
12657
  def Exec(self, feedback_fn):
12658
    """Returns the tag list.
12659

12660
    """
12661
    cfg = self.cfg
12662
    tgts = [("/cluster", cfg.GetClusterInfo())]
12663
    ilist = cfg.GetAllInstancesInfo().values()
12664
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12665
    nlist = cfg.GetAllNodesInfo().values()
12666
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12667
    tgts.extend(("/nodegroup/%s" % n.name, n)
12668
                for n in cfg.GetAllNodeGroupsInfo().values())
12669
    results = []
12670
    for path, target in tgts:
12671
      for tag in target.GetTags():
12672
        if self.re.search(tag):
12673
          results.append((path, tag))
12674
    return results
12675

    
12676

    
12677
class LUTagsSet(TagsLU):
12678
  """Sets a tag on a given object.
12679

12680
  """
12681
  REQ_BGL = False
12682

    
12683
  def CheckPrereq(self):
12684
    """Check prerequisites.
12685

12686
    This checks the type and length of the tag name and value.
12687

12688
    """
12689
    TagsLU.CheckPrereq(self)
12690
    for tag in self.op.tags:
12691
      objects.TaggableObject.ValidateTag(tag)
12692

    
12693
  def Exec(self, feedback_fn):
12694
    """Sets the tag.
12695

12696
    """
12697
    try:
12698
      for tag in self.op.tags:
12699
        self.target.AddTag(tag)
12700
    except errors.TagError, err:
12701
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12702
    self.cfg.Update(self.target, feedback_fn)
12703

    
12704

    
12705
class LUTagsDel(TagsLU):
12706
  """Delete a list of tags from a given object.
12707

12708
  """
12709
  REQ_BGL = False
12710

    
12711
  def CheckPrereq(self):
12712
    """Check prerequisites.
12713

12714
    This checks that we have the given tag.
12715

12716
    """
12717
    TagsLU.CheckPrereq(self)
12718
    for tag in self.op.tags:
12719
      objects.TaggableObject.ValidateTag(tag)
12720
    del_tags = frozenset(self.op.tags)
12721
    cur_tags = self.target.GetTags()
12722

    
12723
    diff_tags = del_tags - cur_tags
12724
    if diff_tags:
12725
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12726
      raise errors.OpPrereqError("Tag(s) %s not found" %
12727
                                 (utils.CommaJoin(diff_names), ),
12728
                                 errors.ECODE_NOENT)
12729

    
12730
  def Exec(self, feedback_fn):
12731
    """Remove the tag from the object.
12732

12733
    """
12734
    for tag in self.op.tags:
12735
      self.target.RemoveTag(tag)
12736
    self.cfg.Update(self.target, feedback_fn)
12737

    
12738

    
12739
class LUTestDelay(NoHooksLU):
12740
  """Sleep for a specified amount of time.
12741

12742
  This LU sleeps on the master and/or nodes for a specified amount of
12743
  time.
12744

12745
  """
12746
  REQ_BGL = False
12747

    
12748
  def ExpandNames(self):
12749
    """Expand names and set required locks.
12750

12751
    This expands the node list, if any.
12752

12753
    """
12754
    self.needed_locks = {}
12755
    if self.op.on_nodes:
12756
      # _GetWantedNodes can be used here, but is not always appropriate to use
12757
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12758
      # more information.
12759
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12760
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12761

    
12762
  def _TestDelay(self):
12763
    """Do the actual sleep.
12764

12765
    """
12766
    if self.op.on_master:
12767
      if not utils.TestDelay(self.op.duration):
12768
        raise errors.OpExecError("Error during master delay test")
12769
    if self.op.on_nodes:
12770
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12771
      for node, node_result in result.items():
12772
        node_result.Raise("Failure during rpc call to node %s" % node)
12773

    
12774
  def Exec(self, feedback_fn):
12775
    """Execute the test delay opcode, with the wanted repetitions.
12776

12777
    """
12778
    if self.op.repeat == 0:
12779
      self._TestDelay()
12780
    else:
12781
      top_value = self.op.repeat - 1
12782
      for i in range(self.op.repeat):
12783
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12784
        self._TestDelay()
12785

    
12786

    
12787
class LUTestJqueue(NoHooksLU):
12788
  """Utility LU to test some aspects of the job queue.
12789

12790
  """
12791
  REQ_BGL = False
12792

    
12793
  # Must be lower than default timeout for WaitForJobChange to see whether it
12794
  # notices changed jobs
12795
  _CLIENT_CONNECT_TIMEOUT = 20.0
12796
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12797

    
12798
  @classmethod
12799
  def _NotifyUsingSocket(cls, cb, errcls):
12800
    """Opens a Unix socket and waits for another program to connect.
12801

12802
    @type cb: callable
12803
    @param cb: Callback to send socket name to client
12804
    @type errcls: class
12805
    @param errcls: Exception class to use for errors
12806

12807
    """
12808
    # Using a temporary directory as there's no easy way to create temporary
12809
    # sockets without writing a custom loop around tempfile.mktemp and
12810
    # socket.bind
12811
    tmpdir = tempfile.mkdtemp()
12812
    try:
12813
      tmpsock = utils.PathJoin(tmpdir, "sock")
12814

    
12815
      logging.debug("Creating temporary socket at %s", tmpsock)
12816
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12817
      try:
12818
        sock.bind(tmpsock)
12819
        sock.listen(1)
12820

    
12821
        # Send details to client
12822
        cb(tmpsock)
12823

    
12824
        # Wait for client to connect before continuing
12825
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12826
        try:
12827
          (conn, _) = sock.accept()
12828
        except socket.error, err:
12829
          raise errcls("Client didn't connect in time (%s)" % err)
12830
      finally:
12831
        sock.close()
12832
    finally:
12833
      # Remove as soon as client is connected
12834
      shutil.rmtree(tmpdir)
12835

    
12836
    # Wait for client to close
12837
    try:
12838
      try:
12839
        # pylint: disable=E1101
12840
        # Instance of '_socketobject' has no ... member
12841
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12842
        conn.recv(1)
12843
      except socket.error, err:
12844
        raise errcls("Client failed to confirm notification (%s)" % err)
12845
    finally:
12846
      conn.close()
12847

    
12848
  def _SendNotification(self, test, arg, sockname):
12849
    """Sends a notification to the client.
12850

12851
    @type test: string
12852
    @param test: Test name
12853
    @param arg: Test argument (depends on test)
12854
    @type sockname: string
12855
    @param sockname: Socket path
12856

12857
    """
12858
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12859

    
12860
  def _Notify(self, prereq, test, arg):
12861
    """Notifies the client of a test.
12862

12863
    @type prereq: bool
12864
    @param prereq: Whether this is a prereq-phase test
12865
    @type test: string
12866
    @param test: Test name
12867
    @param arg: Test argument (depends on test)
12868

12869
    """
12870
    if prereq:
12871
      errcls = errors.OpPrereqError
12872
    else:
12873
      errcls = errors.OpExecError
12874

    
12875
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12876
                                                  test, arg),
12877
                                   errcls)
12878

    
12879
  def CheckArguments(self):
12880
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12881
    self.expandnames_calls = 0
12882

    
12883
  def ExpandNames(self):
12884
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12885
    if checkargs_calls < 1:
12886
      raise errors.ProgrammerError("CheckArguments was not called")
12887

    
12888
    self.expandnames_calls += 1
12889

    
12890
    if self.op.notify_waitlock:
12891
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12892

    
12893
    self.LogInfo("Expanding names")
12894

    
12895
    # Get lock on master node (just to get a lock, not for a particular reason)
12896
    self.needed_locks = {
12897
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12898
      }
12899

    
12900
  def Exec(self, feedback_fn):
12901
    if self.expandnames_calls < 1:
12902
      raise errors.ProgrammerError("ExpandNames was not called")
12903

    
12904
    if self.op.notify_exec:
12905
      self._Notify(False, constants.JQT_EXEC, None)
12906

    
12907
    self.LogInfo("Executing")
12908

    
12909
    if self.op.log_messages:
12910
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12911
      for idx, msg in enumerate(self.op.log_messages):
12912
        self.LogInfo("Sending log message %s", idx + 1)
12913
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12914
        # Report how many test messages have been sent
12915
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12916

    
12917
    if self.op.fail:
12918
      raise errors.OpExecError("Opcode failure was requested")
12919

    
12920
    return True
12921

    
12922

    
12923
class IAllocator(object):
12924
  """IAllocator framework.
12925

12926
  An IAllocator instance has three sets of attributes:
12927
    - cfg that is needed to query the cluster
12928
    - input data (all members of the _KEYS class attribute are required)
12929
    - four buffer attributes (in|out_data|text), that represent the
12930
      input (to the external script) in text and data structure format,
12931
      and the output from it, again in two formats
12932
    - the result variables from the script (success, info, nodes) for
12933
      easy usage
12934

12935
  """
12936
  # pylint: disable=R0902
12937
  # lots of instance attributes
12938

    
12939
  def __init__(self, cfg, rpc, mode, **kwargs):
12940
    self.cfg = cfg
12941
    self.rpc = rpc
12942
    # init buffer variables
12943
    self.in_text = self.out_text = self.in_data = self.out_data = None
12944
    # init all input fields so that pylint is happy
12945
    self.mode = mode
12946
    self.memory = self.disks = self.disk_template = None
12947
    self.os = self.tags = self.nics = self.vcpus = None
12948
    self.hypervisor = None
12949
    self.relocate_from = None
12950
    self.name = None
12951
    self.instances = None
12952
    self.evac_mode = None
12953
    self.target_groups = []
12954
    # computed fields
12955
    self.required_nodes = None
12956
    # init result fields
12957
    self.success = self.info = self.result = None
12958

    
12959
    try:
12960
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12961
    except KeyError:
12962
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12963
                                   " IAllocator" % self.mode)
12964

    
12965
    keyset = [n for (n, _) in keydata]
12966

    
12967
    for key in kwargs:
12968
      if key not in keyset:
12969
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12970
                                     " IAllocator" % key)
12971
      setattr(self, key, kwargs[key])
12972

    
12973
    for key in keyset:
12974
      if key not in kwargs:
12975
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12976
                                     " IAllocator" % key)
12977
    self._BuildInputData(compat.partial(fn, self), keydata)
12978

    
12979
  def _ComputeClusterData(self):
12980
    """Compute the generic allocator input data.
12981

12982
    This is the data that is independent of the actual operation.
12983

12984
    """
12985
    cfg = self.cfg
12986
    cluster_info = cfg.GetClusterInfo()
12987
    # cluster data
12988
    data = {
12989
      "version": constants.IALLOCATOR_VERSION,
12990
      "cluster_name": cfg.GetClusterName(),
12991
      "cluster_tags": list(cluster_info.GetTags()),
12992
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12993
      # we don't have job IDs
12994
      }
12995
    ninfo = cfg.GetAllNodesInfo()
12996
    iinfo = cfg.GetAllInstancesInfo().values()
12997
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12998

    
12999
    # node data
13000
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13001

    
13002
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13003
      hypervisor_name = self.hypervisor
13004
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13005
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13006
    else:
13007
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13008

    
13009
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13010
                                        hypervisor_name)
13011
    node_iinfo = \
13012
      self.rpc.call_all_instances_info(node_list,
13013
                                       cluster_info.enabled_hypervisors)
13014

    
13015
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13016

    
13017
    config_ndata = self._ComputeBasicNodeData(ninfo)
13018
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13019
                                                 i_list, config_ndata)
13020
    assert len(data["nodes"]) == len(ninfo), \
13021
        "Incomplete node data computed"
13022

    
13023
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13024

    
13025
    self.in_data = data
13026

    
13027
  @staticmethod
13028
  def _ComputeNodeGroupData(cfg):
13029
    """Compute node groups data.
13030

13031
    """
13032
    ng = dict((guuid, {
13033
      "name": gdata.name,
13034
      "alloc_policy": gdata.alloc_policy,
13035
      })
13036
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13037

    
13038
    return ng
13039

    
13040
  @staticmethod
13041
  def _ComputeBasicNodeData(node_cfg):
13042
    """Compute global node data.
13043

13044
    @rtype: dict
13045
    @returns: a dict of name: (node dict, node config)
13046

13047
    """
13048
    # fill in static (config-based) values
13049
    node_results = dict((ninfo.name, {
13050
      "tags": list(ninfo.GetTags()),
13051
      "primary_ip": ninfo.primary_ip,
13052
      "secondary_ip": ninfo.secondary_ip,
13053
      "offline": ninfo.offline,
13054
      "drained": ninfo.drained,
13055
      "master_candidate": ninfo.master_candidate,
13056
      "group": ninfo.group,
13057
      "master_capable": ninfo.master_capable,
13058
      "vm_capable": ninfo.vm_capable,
13059
      })
13060
      for ninfo in node_cfg.values())
13061

    
13062
    return node_results
13063

    
13064
  @staticmethod
13065
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13066
                              node_results):
13067
    """Compute global node data.
13068

13069
    @param node_results: the basic node structures as filled from the config
13070

13071
    """
13072
    # make a copy of the current dict
13073
    node_results = dict(node_results)
13074
    for nname, nresult in node_data.items():
13075
      assert nname in node_results, "Missing basic data for node %s" % nname
13076
      ninfo = node_cfg[nname]
13077

    
13078
      if not (ninfo.offline or ninfo.drained):
13079
        nresult.Raise("Can't get data for node %s" % nname)
13080
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13081
                                nname)
13082
        remote_info = nresult.payload
13083

    
13084
        for attr in ["memory_total", "memory_free", "memory_dom0",
13085
                     "vg_size", "vg_free", "cpu_total"]:
13086
          if attr not in remote_info:
13087
            raise errors.OpExecError("Node '%s' didn't return attribute"
13088
                                     " '%s'" % (nname, attr))
13089
          if not isinstance(remote_info[attr], int):
13090
            raise errors.OpExecError("Node '%s' returned invalid value"
13091
                                     " for '%s': %s" %
13092
                                     (nname, attr, remote_info[attr]))
13093
        # compute memory used by primary instances
13094
        i_p_mem = i_p_up_mem = 0
13095
        for iinfo, beinfo in i_list:
13096
          if iinfo.primary_node == nname:
13097
            i_p_mem += beinfo[constants.BE_MEMORY]
13098
            if iinfo.name not in node_iinfo[nname].payload:
13099
              i_used_mem = 0
13100
            else:
13101
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13102
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13103
            remote_info["memory_free"] -= max(0, i_mem_diff)
13104

    
13105
            if iinfo.admin_up:
13106
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13107

    
13108
        # compute memory used by instances
13109
        pnr_dyn = {
13110
          "total_memory": remote_info["memory_total"],
13111
          "reserved_memory": remote_info["memory_dom0"],
13112
          "free_memory": remote_info["memory_free"],
13113
          "total_disk": remote_info["vg_size"],
13114
          "free_disk": remote_info["vg_free"],
13115
          "total_cpus": remote_info["cpu_total"],
13116
          "i_pri_memory": i_p_mem,
13117
          "i_pri_up_memory": i_p_up_mem,
13118
          }
13119
        pnr_dyn.update(node_results[nname])
13120
        node_results[nname] = pnr_dyn
13121

    
13122
    return node_results
13123

    
13124
  @staticmethod
13125
  def _ComputeInstanceData(cluster_info, i_list):
13126
    """Compute global instance data.
13127

13128
    """
13129
    instance_data = {}
13130
    for iinfo, beinfo in i_list:
13131
      nic_data = []
13132
      for nic in iinfo.nics:
13133
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13134
        nic_dict = {
13135
          "mac": nic.mac,
13136
          "ip": nic.ip,
13137
          "mode": filled_params[constants.NIC_MODE],
13138
          "link": filled_params[constants.NIC_LINK],
13139
          }
13140
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13141
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13142
        nic_data.append(nic_dict)
13143
      pir = {
13144
        "tags": list(iinfo.GetTags()),
13145
        "admin_up": iinfo.admin_up,
13146
        "vcpus": beinfo[constants.BE_VCPUS],
13147
        "memory": beinfo[constants.BE_MEMORY],
13148
        "os": iinfo.os,
13149
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13150
        "nics": nic_data,
13151
        "disks": [{constants.IDISK_SIZE: dsk.size,
13152
                   constants.IDISK_MODE: dsk.mode}
13153
                  for dsk in iinfo.disks],
13154
        "disk_template": iinfo.disk_template,
13155
        "hypervisor": iinfo.hypervisor,
13156
        }
13157
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13158
                                                 pir["disks"])
13159
      instance_data[iinfo.name] = pir
13160

    
13161
    return instance_data
13162

    
13163
  def _AddNewInstance(self):
13164
    """Add new instance data to allocator structure.
13165

13166
    This in combination with _AllocatorGetClusterData will create the
13167
    correct structure needed as input for the allocator.
13168

13169
    The checks for the completeness of the opcode must have already been
13170
    done.
13171

13172
    """
13173
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13174

    
13175
    if self.disk_template in constants.DTS_INT_MIRROR:
13176
      self.required_nodes = 2
13177
    else:
13178
      self.required_nodes = 1
13179

    
13180
    request = {
13181
      "name": self.name,
13182
      "disk_template": self.disk_template,
13183
      "tags": self.tags,
13184
      "os": self.os,
13185
      "vcpus": self.vcpus,
13186
      "memory": self.memory,
13187
      "disks": self.disks,
13188
      "disk_space_total": disk_space,
13189
      "nics": self.nics,
13190
      "required_nodes": self.required_nodes,
13191
      "hypervisor": self.hypervisor,
13192
      }
13193

    
13194
    return request
13195

    
13196
  def _AddRelocateInstance(self):
13197
    """Add relocate instance data to allocator structure.
13198

13199
    This in combination with _IAllocatorGetClusterData will create the
13200
    correct structure needed as input for the allocator.
13201

13202
    The checks for the completeness of the opcode must have already been
13203
    done.
13204

13205
    """
13206
    instance = self.cfg.GetInstanceInfo(self.name)
13207
    if instance is None:
13208
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13209
                                   " IAllocator" % self.name)
13210

    
13211
    if instance.disk_template not in constants.DTS_MIRRORED:
13212
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13213
                                 errors.ECODE_INVAL)
13214

    
13215
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13216
        len(instance.secondary_nodes) != 1:
13217
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13218
                                 errors.ECODE_STATE)
13219

    
13220
    self.required_nodes = 1
13221
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13222
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13223

    
13224
    request = {
13225
      "name": self.name,
13226
      "disk_space_total": disk_space,
13227
      "required_nodes": self.required_nodes,
13228
      "relocate_from": self.relocate_from,
13229
      }
13230
    return request
13231

    
13232
  def _AddNodeEvacuate(self):
13233
    """Get data for node-evacuate requests.
13234

13235
    """
13236
    return {
13237
      "instances": self.instances,
13238
      "evac_mode": self.evac_mode,
13239
      }
13240

    
13241
  def _AddChangeGroup(self):
13242
    """Get data for node-evacuate requests.
13243

13244
    """
13245
    return {
13246
      "instances": self.instances,
13247
      "target_groups": self.target_groups,
13248
      }
13249

    
13250
  def _BuildInputData(self, fn, keydata):
13251
    """Build input data structures.
13252

13253
    """
13254
    self._ComputeClusterData()
13255

    
13256
    request = fn()
13257
    request["type"] = self.mode
13258
    for keyname, keytype in keydata:
13259
      if keyname not in request:
13260
        raise errors.ProgrammerError("Request parameter %s is missing" %
13261
                                     keyname)
13262
      val = request[keyname]
13263
      if not keytype(val):
13264
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13265
                                     " validation, value %s, expected"
13266
                                     " type %s" % (keyname, val, keytype))
13267
    self.in_data["request"] = request
13268

    
13269
    self.in_text = serializer.Dump(self.in_data)
13270

    
13271
  _STRING_LIST = ht.TListOf(ht.TString)
13272
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13273
     # pylint: disable=E1101
13274
     # Class '...' has no 'OP_ID' member
13275
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13276
                          opcodes.OpInstanceMigrate.OP_ID,
13277
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13278
     })))
13279

    
13280
  _NEVAC_MOVED = \
13281
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13282
                       ht.TItems([ht.TNonEmptyString,
13283
                                  ht.TNonEmptyString,
13284
                                  ht.TListOf(ht.TNonEmptyString),
13285
                                 ])))
13286
  _NEVAC_FAILED = \
13287
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13288
                       ht.TItems([ht.TNonEmptyString,
13289
                                  ht.TMaybeString,
13290
                                 ])))
13291
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13292
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13293

    
13294
  _MODE_DATA = {
13295
    constants.IALLOCATOR_MODE_ALLOC:
13296
      (_AddNewInstance,
13297
       [
13298
        ("name", ht.TString),
13299
        ("memory", ht.TInt),
13300
        ("disks", ht.TListOf(ht.TDict)),
13301
        ("disk_template", ht.TString),
13302
        ("os", ht.TString),
13303
        ("tags", _STRING_LIST),
13304
        ("nics", ht.TListOf(ht.TDict)),
13305
        ("vcpus", ht.TInt),
13306
        ("hypervisor", ht.TString),
13307
        ], ht.TList),
13308
    constants.IALLOCATOR_MODE_RELOC:
13309
      (_AddRelocateInstance,
13310
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13311
       ht.TList),
13312
     constants.IALLOCATOR_MODE_NODE_EVAC:
13313
      (_AddNodeEvacuate, [
13314
        ("instances", _STRING_LIST),
13315
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13316
        ], _NEVAC_RESULT),
13317
     constants.IALLOCATOR_MODE_CHG_GROUP:
13318
      (_AddChangeGroup, [
13319
        ("instances", _STRING_LIST),
13320
        ("target_groups", _STRING_LIST),
13321
        ], _NEVAC_RESULT),
13322
    }
13323

    
13324
  def Run(self, name, validate=True, call_fn=None):
13325
    """Run an instance allocator and return the results.
13326

13327
    """
13328
    if call_fn is None:
13329
      call_fn = self.rpc.call_iallocator_runner
13330

    
13331
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13332
    result.Raise("Failure while running the iallocator script")
13333

    
13334
    self.out_text = result.payload
13335
    if validate:
13336
      self._ValidateResult()
13337

    
13338
  def _ValidateResult(self):
13339
    """Process the allocator results.
13340

13341
    This will process and if successful save the result in
13342
    self.out_data and the other parameters.
13343

13344
    """
13345
    try:
13346
      rdict = serializer.Load(self.out_text)
13347
    except Exception, err:
13348
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13349

    
13350
    if not isinstance(rdict, dict):
13351
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13352

    
13353
    # TODO: remove backwards compatiblity in later versions
13354
    if "nodes" in rdict and "result" not in rdict:
13355
      rdict["result"] = rdict["nodes"]
13356
      del rdict["nodes"]
13357

    
13358
    for key in "success", "info", "result":
13359
      if key not in rdict:
13360
        raise errors.OpExecError("Can't parse iallocator results:"
13361
                                 " missing key '%s'" % key)
13362
      setattr(self, key, rdict[key])
13363

    
13364
    if not self._result_check(self.result):
13365
      raise errors.OpExecError("Iallocator returned invalid result,"
13366
                               " expected %s, got %s" %
13367
                               (self._result_check, self.result),
13368
                               errors.ECODE_INVAL)
13369

    
13370
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13371
      assert self.relocate_from is not None
13372
      assert self.required_nodes == 1
13373

    
13374
      node2group = dict((name, ndata["group"])
13375
                        for (name, ndata) in self.in_data["nodes"].items())
13376

    
13377
      fn = compat.partial(self._NodesToGroups, node2group,
13378
                          self.in_data["nodegroups"])
13379

    
13380
      instance = self.cfg.GetInstanceInfo(self.name)
13381
      request_groups = fn(self.relocate_from + [instance.primary_node])
13382
      result_groups = fn(rdict["result"] + [instance.primary_node])
13383

    
13384
      if self.success and not set(result_groups).issubset(request_groups):
13385
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13386
                                 " differ from original groups (%s)" %
13387
                                 (utils.CommaJoin(result_groups),
13388
                                  utils.CommaJoin(request_groups)))
13389

    
13390
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13391
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13392

    
13393
    self.out_data = rdict
13394

    
13395
  @staticmethod
13396
  def _NodesToGroups(node2group, groups, nodes):
13397
    """Returns a list of unique group names for a list of nodes.
13398

13399
    @type node2group: dict
13400
    @param node2group: Map from node name to group UUID
13401
    @type groups: dict
13402
    @param groups: Group information
13403
    @type nodes: list
13404
    @param nodes: Node names
13405

13406
    """
13407
    result = set()
13408

    
13409
    for node in nodes:
13410
      try:
13411
        group_uuid = node2group[node]
13412
      except KeyError:
13413
        # Ignore unknown node
13414
        pass
13415
      else:
13416
        try:
13417
          group = groups[group_uuid]
13418
        except KeyError:
13419
          # Can't find group, let's use UUID
13420
          group_name = group_uuid
13421
        else:
13422
          group_name = group["name"]
13423

    
13424
        result.add(group_name)
13425

    
13426
    return sorted(result)
13427

    
13428

    
13429
class LUTestAllocator(NoHooksLU):
13430
  """Run allocator tests.
13431

13432
  This LU runs the allocator tests
13433

13434
  """
13435
  def CheckPrereq(self):
13436
    """Check prerequisites.
13437

13438
    This checks the opcode parameters depending on the director and mode test.
13439

13440
    """
13441
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13442
      for attr in ["memory", "disks", "disk_template",
13443
                   "os", "tags", "nics", "vcpus"]:
13444
        if not hasattr(self.op, attr):
13445
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13446
                                     attr, errors.ECODE_INVAL)
13447
      iname = self.cfg.ExpandInstanceName(self.op.name)
13448
      if iname is not None:
13449
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13450
                                   iname, errors.ECODE_EXISTS)
13451
      if not isinstance(self.op.nics, list):
13452
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13453
                                   errors.ECODE_INVAL)
13454
      if not isinstance(self.op.disks, list):
13455
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13456
                                   errors.ECODE_INVAL)
13457
      for row in self.op.disks:
13458
        if (not isinstance(row, dict) or
13459
            constants.IDISK_SIZE not in row or
13460
            not isinstance(row[constants.IDISK_SIZE], int) or
13461
            constants.IDISK_MODE not in row or
13462
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13463
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13464
                                     " parameter", errors.ECODE_INVAL)
13465
      if self.op.hypervisor is None:
13466
        self.op.hypervisor = self.cfg.GetHypervisorType()
13467
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13468
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13469
      self.op.name = fname
13470
      self.relocate_from = \
13471
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13472
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13473
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13474
      if not self.op.instances:
13475
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13476
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13477
    else:
13478
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13479
                                 self.op.mode, errors.ECODE_INVAL)
13480

    
13481
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13482
      if self.op.allocator is None:
13483
        raise errors.OpPrereqError("Missing allocator name",
13484
                                   errors.ECODE_INVAL)
13485
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13486
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13487
                                 self.op.direction, errors.ECODE_INVAL)
13488

    
13489
  def Exec(self, feedback_fn):
13490
    """Run the allocator test.
13491

13492
    """
13493
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13494
      ial = IAllocator(self.cfg, self.rpc,
13495
                       mode=self.op.mode,
13496
                       name=self.op.name,
13497
                       memory=self.op.memory,
13498
                       disks=self.op.disks,
13499
                       disk_template=self.op.disk_template,
13500
                       os=self.op.os,
13501
                       tags=self.op.tags,
13502
                       nics=self.op.nics,
13503
                       vcpus=self.op.vcpus,
13504
                       hypervisor=self.op.hypervisor,
13505
                       )
13506
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13507
      ial = IAllocator(self.cfg, self.rpc,
13508
                       mode=self.op.mode,
13509
                       name=self.op.name,
13510
                       relocate_from=list(self.relocate_from),
13511
                       )
13512
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13513
      ial = IAllocator(self.cfg, self.rpc,
13514
                       mode=self.op.mode,
13515
                       instances=self.op.instances,
13516
                       target_groups=self.op.target_groups)
13517
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13518
      ial = IAllocator(self.cfg, self.rpc,
13519
                       mode=self.op.mode,
13520
                       instances=self.op.instances,
13521
                       evac_mode=self.op.evac_mode)
13522
    else:
13523
      raise errors.ProgrammerError("Uncatched mode %s in"
13524
                                   " LUTestAllocator.Exec", self.op.mode)
13525

    
13526
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13527
      result = ial.in_text
13528
    else:
13529
      ial.Run(self.op.allocator, validate=False)
13530
      result = ial.out_text
13531
    return result
13532

    
13533

    
13534
#: Query type implementations
13535
_QUERY_IMPL = {
13536
  constants.QR_INSTANCE: _InstanceQuery,
13537
  constants.QR_NODE: _NodeQuery,
13538
  constants.QR_GROUP: _GroupQuery,
13539
  constants.QR_OS: _OsQuery,
13540
  }
13541

    
13542
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13543

    
13544

    
13545
def _GetQueryImplementation(name):
13546
  """Returns the implemtnation for a query type.
13547

13548
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13549

13550
  """
13551
  try:
13552
    return _QUERY_IMPL[name]
13553
  except KeyError:
13554
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13555
                               errors.ECODE_INVAL)