Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ c85b15c1

History | View | Annotate | Download (477.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import logging
36
import copy
37
import OpenSSL
38
import socket
39
import tempfile
40
import shutil
41
import itertools
42
import operator
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import query
58
from ganeti import qlang
59
from ganeti import opcodes
60
from ganeti import ht
61
from ganeti import runtime
62

    
63
import ganeti.masterd.instance # pylint: disable=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    # readability alias
123
    self.owned_locks = context.glm.list_owned
124
    self.context = context
125
    self.rpc = rpc
126
    # Dicts used to declare locking needs to mcpu
127
    self.needed_locks = None
128
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
129
    self.add_locks = {}
130
    self.remove_locks = {}
131
    # Used to force good behavior when calling helper functions
132
    self.recalculate_locks = {}
133
    # logging
134
    self.Log = processor.Log # pylint: disable=C0103
135
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
136
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
137
    self.LogStep = processor.LogStep # pylint: disable=C0103
138
    # support for dry-run
139
    self.dry_run_result = None
140
    # support for generic debug attribute
141
    if (not hasattr(self.op, "debug_level") or
142
        not isinstance(self.op.debug_level, int)):
143
      self.op.debug_level = 0
144

    
145
    # Tasklets
146
    self.tasklets = None
147

    
148
    # Validate opcode parameters and set defaults
149
    self.op.Validate(True)
150

    
151
    self.CheckArguments()
152

    
153
  def CheckArguments(self):
154
    """Check syntactic validity for the opcode arguments.
155

156
    This method is for doing a simple syntactic check and ensure
157
    validity of opcode parameters, without any cluster-related
158
    checks. While the same can be accomplished in ExpandNames and/or
159
    CheckPrereq, doing these separate is better because:
160

161
      - ExpandNames is left as as purely a lock-related function
162
      - CheckPrereq is run after we have acquired locks (and possible
163
        waited for them)
164

165
    The function is allowed to change the self.op attribute so that
166
    later methods can no longer worry about missing parameters.
167

168
    """
169
    pass
170

    
171
  def ExpandNames(self):
172
    """Expand names for this LU.
173

174
    This method is called before starting to execute the opcode, and it should
175
    update all the parameters of the opcode to their canonical form (e.g. a
176
    short node name must be fully expanded after this method has successfully
177
    completed). This way locking, hooks, logging, etc. can work correctly.
178

179
    LUs which implement this method must also populate the self.needed_locks
180
    member, as a dict with lock levels as keys, and a list of needed lock names
181
    as values. Rules:
182

183
      - use an empty dict if you don't need any lock
184
      - if you don't need any lock at a particular level omit that level
185
      - don't put anything for the BGL level
186
      - if you want all locks at a level use locking.ALL_SET as a value
187

188
    If you need to share locks (rather than acquire them exclusively) at one
189
    level you can modify self.share_locks, setting a true value (usually 1) for
190
    that level. By default locks are not shared.
191

192
    This function can also define a list of tasklets, which then will be
193
    executed in order instead of the usual LU-level CheckPrereq and Exec
194
    functions, if those are not defined by the LU.
195

196
    Examples::
197

198
      # Acquire all nodes and one instance
199
      self.needed_locks = {
200
        locking.LEVEL_NODE: locking.ALL_SET,
201
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
202
      }
203
      # Acquire just two nodes
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206
      }
207
      # Acquire no locks
208
      self.needed_locks = {} # No, you can't leave it to the default value None
209

210
    """
211
    # The implementation of this method is mandatory only if the new LU is
212
    # concurrent, so that old LUs don't need to be changed all at the same
213
    # time.
214
    if self.REQ_BGL:
215
      self.needed_locks = {} # Exclusive LUs don't need locks.
216
    else:
217
      raise NotImplementedError
218

    
219
  def DeclareLocks(self, level):
220
    """Declare LU locking needs for a level
221

222
    While most LUs can just declare their locking needs at ExpandNames time,
223
    sometimes there's the need to calculate some locks after having acquired
224
    the ones before. This function is called just before acquiring locks at a
225
    particular level, but after acquiring the ones at lower levels, and permits
226
    such calculations. It can be used to modify self.needed_locks, and by
227
    default it does nothing.
228

229
    This function is only called if you have something already set in
230
    self.needed_locks for the level.
231

232
    @param level: Locking level which is going to be locked
233
    @type level: member of ganeti.locking.LEVELS
234

235
    """
236

    
237
  def CheckPrereq(self):
238
    """Check prerequisites for this LU.
239

240
    This method should check that the prerequisites for the execution
241
    of this LU are fulfilled. It can do internode communication, but
242
    it should be idempotent - no cluster or system changes are
243
    allowed.
244

245
    The method should raise errors.OpPrereqError in case something is
246
    not fulfilled. Its return value is ignored.
247

248
    This method should also update all the parameters of the opcode to
249
    their canonical form if it hasn't been done by ExpandNames before.
250

251
    """
252
    if self.tasklets is not None:
253
      for (idx, tl) in enumerate(self.tasklets):
254
        logging.debug("Checking prerequisites for tasklet %s/%s",
255
                      idx + 1, len(self.tasklets))
256
        tl.CheckPrereq()
257
    else:
258
      pass
259

    
260
  def Exec(self, feedback_fn):
261
    """Execute the LU.
262

263
    This method should implement the actual work. It should raise
264
    errors.OpExecError for failures that are somewhat dealt with in
265
    code, or expected.
266

267
    """
268
    if self.tasklets is not None:
269
      for (idx, tl) in enumerate(self.tasklets):
270
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271
        tl.Exec(feedback_fn)
272
    else:
273
      raise NotImplementedError
274

    
275
  def BuildHooksEnv(self):
276
    """Build hooks environment for this LU.
277

278
    @rtype: dict
279
    @return: Dictionary containing the environment that will be used for
280
      running the hooks for this LU. The keys of the dict must not be prefixed
281
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282
      will extend the environment with additional variables. If no environment
283
      should be defined, an empty dictionary should be returned (not C{None}).
284
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
285
      will not be called.
286

287
    """
288
    raise NotImplementedError
289

    
290
  def BuildHooksNodes(self):
291
    """Build list of nodes to run LU's hooks.
292

293
    @rtype: tuple; (list, list)
294
    @return: Tuple containing a list of node names on which the hook
295
      should run before the execution and a list of node names on which the
296
      hook should run after the execution. No nodes should be returned as an
297
      empty list (and not None).
298
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
299
      will not be called.
300

301
    """
302
    raise NotImplementedError
303

    
304
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305
    """Notify the LU about the results of its hooks.
306

307
    This method is called every time a hooks phase is executed, and notifies
308
    the Logical Unit about the hooks' result. The LU can then use it to alter
309
    its result based on the hooks.  By default the method does nothing and the
310
    previous result is passed back unchanged but any LU can define it if it
311
    wants to use the local cluster hook-scripts somehow.
312

313
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
314
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315
    @param hook_results: the results of the multi-node hooks rpc call
316
    @param feedback_fn: function used send feedback back to the caller
317
    @param lu_result: the previous Exec result this LU had, or None
318
        in the PRE phase
319
    @return: the new Exec result, based on the previous result
320
        and hook results
321

322
    """
323
    # API must be kept, thus we ignore the unused argument and could
324
    # be a function warnings
325
    # pylint: disable=W0613,R0201
326
    return lu_result
327

    
328
  def _ExpandAndLockInstance(self):
329
    """Helper function to expand and lock an instance.
330

331
    Many LUs that work on an instance take its name in self.op.instance_name
332
    and need to expand it and then declare the expanded name for locking. This
333
    function does it, and then updates self.op.instance_name to the expanded
334
    name. It also initializes needed_locks as a dict, if this hasn't been done
335
    before.
336

337
    """
338
    if self.needed_locks is None:
339
      self.needed_locks = {}
340
    else:
341
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342
        "_ExpandAndLockInstance called with instance-level locks set"
343
    self.op.instance_name = _ExpandInstanceName(self.cfg,
344
                                                self.op.instance_name)
345
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
346

    
347
  def _LockInstancesNodes(self, primary_only=False):
348
    """Helper function to declare instances' nodes for locking.
349

350
    This function should be called after locking one or more instances to lock
351
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352
    with all primary or secondary nodes for instances already locked and
353
    present in self.needed_locks[locking.LEVEL_INSTANCE].
354

355
    It should be called from DeclareLocks, and for safety only works if
356
    self.recalculate_locks[locking.LEVEL_NODE] is set.
357

358
    In the future it may grow parameters to just lock some instance's nodes, or
359
    to just lock primaries or secondary nodes, if needed.
360

361
    If should be called in DeclareLocks in a way similar to::
362

363
      if level == locking.LEVEL_NODE:
364
        self._LockInstancesNodes()
365

366
    @type primary_only: boolean
367
    @param primary_only: only lock primary nodes of locked instances
368

369
    """
370
    assert locking.LEVEL_NODE in self.recalculate_locks, \
371
      "_LockInstancesNodes helper function called with no nodes to recalculate"
372

    
373
    # TODO: check if we're really been called with the instance locks held
374

    
375
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376
    # future we might want to have different behaviors depending on the value
377
    # of self.recalculate_locks[locking.LEVEL_NODE]
378
    wanted_nodes = []
379
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381
      wanted_nodes.append(instance.primary_node)
382
      if not primary_only:
383
        wanted_nodes.extend(instance.secondary_nodes)
384

    
385
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
389

    
390
    del self.recalculate_locks[locking.LEVEL_NODE]
391

    
392

    
393
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
394
  """Simple LU which runs no hooks.
395

396
  This LU is intended as a parent for other LogicalUnits which will
397
  run no hooks, in order to reduce duplicate code.
398

399
  """
400
  HPATH = None
401
  HTYPE = None
402

    
403
  def BuildHooksEnv(self):
404
    """Empty BuildHooksEnv for NoHooksLu.
405

406
    This just raises an error.
407

408
    """
409
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
410

    
411
  def BuildHooksNodes(self):
412
    """Empty BuildHooksNodes for NoHooksLU.
413

414
    """
415
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
416

    
417

    
418
class Tasklet:
419
  """Tasklet base class.
420

421
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
423
  tasklets know nothing about locks.
424

425
  Subclasses must follow these rules:
426
    - Implement CheckPrereq
427
    - Implement Exec
428

429
  """
430
  def __init__(self, lu):
431
    self.lu = lu
432

    
433
    # Shortcuts
434
    self.cfg = lu.cfg
435
    self.rpc = lu.rpc
436

    
437
  def CheckPrereq(self):
438
    """Check prerequisites for this tasklets.
439

440
    This method should check whether the prerequisites for the execution of
441
    this tasklet are fulfilled. It can do internode communication, but it
442
    should be idempotent - no cluster or system changes are allowed.
443

444
    The method should raise errors.OpPrereqError in case something is not
445
    fulfilled. Its return value is ignored.
446

447
    This method should also update all parameters to their canonical form if it
448
    hasn't been done before.
449

450
    """
451
    pass
452

    
453
  def Exec(self, feedback_fn):
454
    """Execute the tasklet.
455

456
    This method should implement the actual work. It should raise
457
    errors.OpExecError for failures that are somewhat dealt with in code, or
458
    expected.
459

460
    """
461
    raise NotImplementedError
462

    
463

    
464
class _QueryBase:
465
  """Base for query utility classes.
466

467
  """
468
  #: Attribute holding field definitions
469
  FIELDS = None
470

    
471
  def __init__(self, filter_, fields, use_locking):
472
    """Initializes this class.
473

474
    """
475
    self.use_locking = use_locking
476

    
477
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
478
                             namefield="name")
479
    self.requested_data = self.query.RequestedData()
480
    self.names = self.query.RequestedNames()
481

    
482
    # Sort only if no names were requested
483
    self.sort_by_name = not self.names
484

    
485
    self.do_locking = None
486
    self.wanted = None
487

    
488
  def _GetNames(self, lu, all_names, lock_level):
489
    """Helper function to determine names asked for in the query.
490

491
    """
492
    if self.do_locking:
493
      names = lu.owned_locks(lock_level)
494
    else:
495
      names = all_names
496

    
497
    if self.wanted == locking.ALL_SET:
498
      assert not self.names
499
      # caller didn't specify names, so ordering is not important
500
      return utils.NiceSort(names)
501

    
502
    # caller specified names and we must keep the same order
503
    assert self.names
504
    assert not self.do_locking or lu.glm.is_owned(lock_level)
505

    
506
    missing = set(self.wanted).difference(names)
507
    if missing:
508
      raise errors.OpExecError("Some items were removed before retrieving"
509
                               " their data: %s" % missing)
510

    
511
    # Return expanded names
512
    return self.wanted
513

    
514
  def ExpandNames(self, lu):
515
    """Expand names for this query.
516

517
    See L{LogicalUnit.ExpandNames}.
518

519
    """
520
    raise NotImplementedError()
521

    
522
  def DeclareLocks(self, lu, level):
523
    """Declare locks for this query.
524

525
    See L{LogicalUnit.DeclareLocks}.
526

527
    """
528
    raise NotImplementedError()
529

    
530
  def _GetQueryData(self, lu):
531
    """Collects all data for this query.
532

533
    @return: Query data object
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def NewStyleQuery(self, lu):
539
    """Collect data and execute query.
540

541
    """
542
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543
                                  sort_by_name=self.sort_by_name)
544

    
545
  def OldStyleQuery(self, lu):
546
    """Collect data and execute query.
547

548
    """
549
    return self.query.OldStyleQuery(self._GetQueryData(lu),
550
                                    sort_by_name=self.sort_by_name)
551

    
552

    
553
def _ShareAll():
554
  """Returns a dict declaring all lock levels shared.
555

556
  """
557
  return dict.fromkeys(locking.LEVELS, 1)
558

    
559

    
560
def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
561
                              cur_group_uuid):
562
  """Checks if node groups for locked instances are still correct.
563

564
  @type cfg: L{config.ConfigWriter}
565
  @param cfg: Cluster configuration
566
  @type instances: dict; string as key, L{objects.Instance} as value
567
  @param instances: Dictionary, instance name as key, instance object as value
568
  @type owned_groups: iterable of string
569
  @param owned_groups: List of owned groups
570
  @type owned_nodes: iterable of string
571
  @param owned_nodes: List of owned nodes
572
  @type cur_group_uuid: string or None
573
  @type cur_group_uuid: Optional group UUID to check against instance's groups
574

575
  """
576
  for (name, inst) in instances.items():
577
    assert owned_nodes.issuperset(inst.all_nodes), \
578
      "Instance %s's nodes changed while we kept the lock" % name
579

    
580
    inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
581

    
582
    assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
583
      "Instance %s has no node in group %s" % (name, cur_group_uuid)
584

    
585

    
586
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
587
  """Checks if the owned node groups are still correct for an instance.
588

589
  @type cfg: L{config.ConfigWriter}
590
  @param cfg: The cluster configuration
591
  @type instance_name: string
592
  @param instance_name: Instance name
593
  @type owned_groups: set or frozenset
594
  @param owned_groups: List of currently owned node groups
595

596
  """
597
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
598

    
599
  if not owned_groups.issuperset(inst_groups):
600
    raise errors.OpPrereqError("Instance %s's node groups changed since"
601
                               " locks were acquired, current groups are"
602
                               " are '%s', owning groups '%s'; retry the"
603
                               " operation" %
604
                               (instance_name,
605
                                utils.CommaJoin(inst_groups),
606
                                utils.CommaJoin(owned_groups)),
607
                               errors.ECODE_STATE)
608

    
609
  return inst_groups
610

    
611

    
612
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
613
  """Checks if the instances in a node group are still correct.
614

615
  @type cfg: L{config.ConfigWriter}
616
  @param cfg: The cluster configuration
617
  @type group_uuid: string
618
  @param group_uuid: Node group UUID
619
  @type owned_instances: set or frozenset
620
  @param owned_instances: List of currently owned instances
621

622
  """
623
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
624
  if owned_instances != wanted_instances:
625
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
626
                               " locks were acquired, wanted '%s', have '%s';"
627
                               " retry the operation" %
628
                               (group_uuid,
629
                                utils.CommaJoin(wanted_instances),
630
                                utils.CommaJoin(owned_instances)),
631
                               errors.ECODE_STATE)
632

    
633
  return wanted_instances
634

    
635

    
636
def _SupportsOob(cfg, node):
637
  """Tells if node supports OOB.
638

639
  @type cfg: L{config.ConfigWriter}
640
  @param cfg: The cluster configuration
641
  @type node: L{objects.Node}
642
  @param node: The node
643
  @return: The OOB script if supported or an empty string otherwise
644

645
  """
646
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
647

    
648

    
649
def _GetWantedNodes(lu, nodes):
650
  """Returns list of checked and expanded node names.
651

652
  @type lu: L{LogicalUnit}
653
  @param lu: the logical unit on whose behalf we execute
654
  @type nodes: list
655
  @param nodes: list of node names or None for all nodes
656
  @rtype: list
657
  @return: the list of nodes, sorted
658
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
659

660
  """
661
  if nodes:
662
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
663

    
664
  return utils.NiceSort(lu.cfg.GetNodeList())
665

    
666

    
667
def _GetWantedInstances(lu, instances):
668
  """Returns list of checked and expanded instance names.
669

670
  @type lu: L{LogicalUnit}
671
  @param lu: the logical unit on whose behalf we execute
672
  @type instances: list
673
  @param instances: list of instance names or None for all instances
674
  @rtype: list
675
  @return: the list of instances, sorted
676
  @raise errors.OpPrereqError: if the instances parameter is wrong type
677
  @raise errors.OpPrereqError: if any of the passed instances is not found
678

679
  """
680
  if instances:
681
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
682
  else:
683
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
684
  return wanted
685

    
686

    
687
def _GetUpdatedParams(old_params, update_dict,
688
                      use_default=True, use_none=False):
689
  """Return the new version of a parameter dictionary.
690

691
  @type old_params: dict
692
  @param old_params: old parameters
693
  @type update_dict: dict
694
  @param update_dict: dict containing new parameter values, or
695
      constants.VALUE_DEFAULT to reset the parameter to its default
696
      value
697
  @param use_default: boolean
698
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
699
      values as 'to be deleted' values
700
  @param use_none: boolean
701
  @type use_none: whether to recognise C{None} values as 'to be
702
      deleted' values
703
  @rtype: dict
704
  @return: the new parameter dictionary
705

706
  """
707
  params_copy = copy.deepcopy(old_params)
708
  for key, val in update_dict.iteritems():
709
    if ((use_default and val == constants.VALUE_DEFAULT) or
710
        (use_none and val is None)):
711
      try:
712
        del params_copy[key]
713
      except KeyError:
714
        pass
715
    else:
716
      params_copy[key] = val
717
  return params_copy
718

    
719

    
720
def _ReleaseLocks(lu, level, names=None, keep=None):
721
  """Releases locks owned by an LU.
722

723
  @type lu: L{LogicalUnit}
724
  @param level: Lock level
725
  @type names: list or None
726
  @param names: Names of locks to release
727
  @type keep: list or None
728
  @param keep: Names of locks to retain
729

730
  """
731
  assert not (keep is not None and names is not None), \
732
         "Only one of the 'names' and the 'keep' parameters can be given"
733

    
734
  if names is not None:
735
    should_release = names.__contains__
736
  elif keep:
737
    should_release = lambda name: name not in keep
738
  else:
739
    should_release = None
740

    
741
  if should_release:
742
    retain = []
743
    release = []
744

    
745
    # Determine which locks to release
746
    for name in lu.owned_locks(level):
747
      if should_release(name):
748
        release.append(name)
749
      else:
750
        retain.append(name)
751

    
752
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
753

    
754
    # Release just some locks
755
    lu.glm.release(level, names=release)
756

    
757
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
758
  else:
759
    # Release everything
760
    lu.glm.release(level)
761

    
762
    assert not lu.glm.is_owned(level), "No locks should be owned"
763

    
764

    
765
def _MapInstanceDisksToNodes(instances):
766
  """Creates a map from (node, volume) to instance name.
767

768
  @type instances: list of L{objects.Instance}
769
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
770

771
  """
772
  return dict(((node, vol), inst.name)
773
              for inst in instances
774
              for (node, vols) in inst.MapLVsByNode().items()
775
              for vol in vols)
776

    
777

    
778
def _RunPostHook(lu, node_name):
779
  """Runs the post-hook for an opcode on a single node.
780

781
  """
782
  hm = lu.proc.BuildHooksManager(lu)
783
  try:
784
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
785
  except:
786
    # pylint: disable=W0702
787
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
788

    
789

    
790
def _CheckOutputFields(static, dynamic, selected):
791
  """Checks whether all selected fields are valid.
792

793
  @type static: L{utils.FieldSet}
794
  @param static: static fields set
795
  @type dynamic: L{utils.FieldSet}
796
  @param dynamic: dynamic fields set
797

798
  """
799
  f = utils.FieldSet()
800
  f.Extend(static)
801
  f.Extend(dynamic)
802

    
803
  delta = f.NonMatching(selected)
804
  if delta:
805
    raise errors.OpPrereqError("Unknown output fields selected: %s"
806
                               % ",".join(delta), errors.ECODE_INVAL)
807

    
808

    
809
def _CheckGlobalHvParams(params):
810
  """Validates that given hypervisor params are not global ones.
811

812
  This will ensure that instances don't get customised versions of
813
  global params.
814

815
  """
816
  used_globals = constants.HVC_GLOBALS.intersection(params)
817
  if used_globals:
818
    msg = ("The following hypervisor parameters are global and cannot"
819
           " be customized at instance level, please modify them at"
820
           " cluster level: %s" % utils.CommaJoin(used_globals))
821
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
822

    
823

    
824
def _CheckNodeOnline(lu, node, msg=None):
825
  """Ensure that a given node is online.
826

827
  @param lu: the LU on behalf of which we make the check
828
  @param node: the node to check
829
  @param msg: if passed, should be a message to replace the default one
830
  @raise errors.OpPrereqError: if the node is offline
831

832
  """
833
  if msg is None:
834
    msg = "Can't use offline node"
835
  if lu.cfg.GetNodeInfo(node).offline:
836
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
837

    
838

    
839
def _CheckNodeNotDrained(lu, node):
840
  """Ensure that a given node is not drained.
841

842
  @param lu: the LU on behalf of which we make the check
843
  @param node: the node to check
844
  @raise errors.OpPrereqError: if the node is drained
845

846
  """
847
  if lu.cfg.GetNodeInfo(node).drained:
848
    raise errors.OpPrereqError("Can't use drained node %s" % node,
849
                               errors.ECODE_STATE)
850

    
851

    
852
def _CheckNodeVmCapable(lu, node):
853
  """Ensure that a given node is vm capable.
854

855
  @param lu: the LU on behalf of which we make the check
856
  @param node: the node to check
857
  @raise errors.OpPrereqError: if the node is not vm capable
858

859
  """
860
  if not lu.cfg.GetNodeInfo(node).vm_capable:
861
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
862
                               errors.ECODE_STATE)
863

    
864

    
865
def _CheckNodeHasOS(lu, node, os_name, force_variant):
866
  """Ensure that a node supports a given OS.
867

868
  @param lu: the LU on behalf of which we make the check
869
  @param node: the node to check
870
  @param os_name: the OS to query about
871
  @param force_variant: whether to ignore variant errors
872
  @raise errors.OpPrereqError: if the node is not supporting the OS
873

874
  """
875
  result = lu.rpc.call_os_get(node, os_name)
876
  result.Raise("OS '%s' not in supported OS list for node %s" %
877
               (os_name, node),
878
               prereq=True, ecode=errors.ECODE_INVAL)
879
  if not force_variant:
880
    _CheckOSVariant(result.payload, os_name)
881

    
882

    
883
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
884
  """Ensure that a node has the given secondary ip.
885

886
  @type lu: L{LogicalUnit}
887
  @param lu: the LU on behalf of which we make the check
888
  @type node: string
889
  @param node: the node to check
890
  @type secondary_ip: string
891
  @param secondary_ip: the ip to check
892
  @type prereq: boolean
893
  @param prereq: whether to throw a prerequisite or an execute error
894
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
895
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
896

897
  """
898
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
899
  result.Raise("Failure checking secondary ip on node %s" % node,
900
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
901
  if not result.payload:
902
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
903
           " please fix and re-run this command" % secondary_ip)
904
    if prereq:
905
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
906
    else:
907
      raise errors.OpExecError(msg)
908

    
909

    
910
def _GetClusterDomainSecret():
911
  """Reads the cluster domain secret.
912

913
  """
914
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
915
                               strict=True)
916

    
917

    
918
def _CheckInstanceDown(lu, instance, reason):
919
  """Ensure that an instance is not running."""
920
  if instance.admin_up:
921
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
922
                               (instance.name, reason), errors.ECODE_STATE)
923

    
924
  pnode = instance.primary_node
925
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
926
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
927
              prereq=True, ecode=errors.ECODE_ENVIRON)
928

    
929
  if instance.name in ins_l.payload:
930
    raise errors.OpPrereqError("Instance %s is running, %s" %
931
                               (instance.name, reason), errors.ECODE_STATE)
932

    
933

    
934
def _ExpandItemName(fn, name, kind):
935
  """Expand an item name.
936

937
  @param fn: the function to use for expansion
938
  @param name: requested item name
939
  @param kind: text description ('Node' or 'Instance')
940
  @return: the resolved (full) name
941
  @raise errors.OpPrereqError: if the item is not found
942

943
  """
944
  full_name = fn(name)
945
  if full_name is None:
946
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
947
                               errors.ECODE_NOENT)
948
  return full_name
949

    
950

    
951
def _ExpandNodeName(cfg, name):
952
  """Wrapper over L{_ExpandItemName} for nodes."""
953
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
954

    
955

    
956
def _ExpandInstanceName(cfg, name):
957
  """Wrapper over L{_ExpandItemName} for instance."""
958
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
959

    
960

    
961
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
962
                          memory, vcpus, nics, disk_template, disks,
963
                          bep, hvp, hypervisor_name, tags):
964
  """Builds instance related env variables for hooks
965

966
  This builds the hook environment from individual variables.
967

968
  @type name: string
969
  @param name: the name of the instance
970
  @type primary_node: string
971
  @param primary_node: the name of the instance's primary node
972
  @type secondary_nodes: list
973
  @param secondary_nodes: list of secondary nodes as strings
974
  @type os_type: string
975
  @param os_type: the name of the instance's OS
976
  @type status: boolean
977
  @param status: the should_run status of the instance
978
  @type memory: string
979
  @param memory: the memory size of the instance
980
  @type vcpus: string
981
  @param vcpus: the count of VCPUs the instance has
982
  @type nics: list
983
  @param nics: list of tuples (ip, mac, mode, link) representing
984
      the NICs the instance has
985
  @type disk_template: string
986
  @param disk_template: the disk template of the instance
987
  @type disks: list
988
  @param disks: the list of (size, mode) pairs
989
  @type bep: dict
990
  @param bep: the backend parameters for the instance
991
  @type hvp: dict
992
  @param hvp: the hypervisor parameters for the instance
993
  @type hypervisor_name: string
994
  @param hypervisor_name: the hypervisor for the instance
995
  @type tags: list
996
  @param tags: list of instance tags as strings
997
  @rtype: dict
998
  @return: the hook environment for this instance
999

1000
  """
1001
  if status:
1002
    str_status = "up"
1003
  else:
1004
    str_status = "down"
1005
  env = {
1006
    "OP_TARGET": name,
1007
    "INSTANCE_NAME": name,
1008
    "INSTANCE_PRIMARY": primary_node,
1009
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1010
    "INSTANCE_OS_TYPE": os_type,
1011
    "INSTANCE_STATUS": str_status,
1012
    "INSTANCE_MEMORY": memory,
1013
    "INSTANCE_VCPUS": vcpus,
1014
    "INSTANCE_DISK_TEMPLATE": disk_template,
1015
    "INSTANCE_HYPERVISOR": hypervisor_name,
1016
  }
1017

    
1018
  if nics:
1019
    nic_count = len(nics)
1020
    for idx, (ip, mac, mode, link) in enumerate(nics):
1021
      if ip is None:
1022
        ip = ""
1023
      env["INSTANCE_NIC%d_IP" % idx] = ip
1024
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1025
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1026
      env["INSTANCE_NIC%d_LINK" % idx] = link
1027
      if mode == constants.NIC_MODE_BRIDGED:
1028
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1029
  else:
1030
    nic_count = 0
1031

    
1032
  env["INSTANCE_NIC_COUNT"] = nic_count
1033

    
1034
  if disks:
1035
    disk_count = len(disks)
1036
    for idx, (size, mode) in enumerate(disks):
1037
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1038
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1039
  else:
1040
    disk_count = 0
1041

    
1042
  env["INSTANCE_DISK_COUNT"] = disk_count
1043

    
1044
  if not tags:
1045
    tags = []
1046

    
1047
  env["INSTANCE_TAGS"] = " ".join(tags)
1048

    
1049
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1050
    for key, value in source.items():
1051
      env["INSTANCE_%s_%s" % (kind, key)] = value
1052

    
1053
  return env
1054

    
1055

    
1056
def _NICListToTuple(lu, nics):
1057
  """Build a list of nic information tuples.
1058

1059
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1060
  value in LUInstanceQueryData.
1061

1062
  @type lu:  L{LogicalUnit}
1063
  @param lu: the logical unit on whose behalf we execute
1064
  @type nics: list of L{objects.NIC}
1065
  @param nics: list of nics to convert to hooks tuples
1066

1067
  """
1068
  hooks_nics = []
1069
  cluster = lu.cfg.GetClusterInfo()
1070
  for nic in nics:
1071
    ip = nic.ip
1072
    mac = nic.mac
1073
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1074
    mode = filled_params[constants.NIC_MODE]
1075
    link = filled_params[constants.NIC_LINK]
1076
    hooks_nics.append((ip, mac, mode, link))
1077
  return hooks_nics
1078

    
1079

    
1080
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1081
  """Builds instance related env variables for hooks from an object.
1082

1083
  @type lu: L{LogicalUnit}
1084
  @param lu: the logical unit on whose behalf we execute
1085
  @type instance: L{objects.Instance}
1086
  @param instance: the instance for which we should build the
1087
      environment
1088
  @type override: dict
1089
  @param override: dictionary with key/values that will override
1090
      our values
1091
  @rtype: dict
1092
  @return: the hook environment dictionary
1093

1094
  """
1095
  cluster = lu.cfg.GetClusterInfo()
1096
  bep = cluster.FillBE(instance)
1097
  hvp = cluster.FillHV(instance)
1098
  args = {
1099
    "name": instance.name,
1100
    "primary_node": instance.primary_node,
1101
    "secondary_nodes": instance.secondary_nodes,
1102
    "os_type": instance.os,
1103
    "status": instance.admin_up,
1104
    "memory": bep[constants.BE_MEMORY],
1105
    "vcpus": bep[constants.BE_VCPUS],
1106
    "nics": _NICListToTuple(lu, instance.nics),
1107
    "disk_template": instance.disk_template,
1108
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1109
    "bep": bep,
1110
    "hvp": hvp,
1111
    "hypervisor_name": instance.hypervisor,
1112
    "tags": instance.tags,
1113
  }
1114
  if override:
1115
    args.update(override)
1116
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1117

    
1118

    
1119
def _AdjustCandidatePool(lu, exceptions):
1120
  """Adjust the candidate pool after node operations.
1121

1122
  """
1123
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1124
  if mod_list:
1125
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1126
               utils.CommaJoin(node.name for node in mod_list))
1127
    for name in mod_list:
1128
      lu.context.ReaddNode(name)
1129
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1130
  if mc_now > mc_max:
1131
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1132
               (mc_now, mc_max))
1133

    
1134

    
1135
def _DecideSelfPromotion(lu, exceptions=None):
1136
  """Decide whether I should promote myself as a master candidate.
1137

1138
  """
1139
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1140
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1141
  # the new node will increase mc_max with one, so:
1142
  mc_should = min(mc_should + 1, cp_size)
1143
  return mc_now < mc_should
1144

    
1145

    
1146
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1147
  """Check that the brigdes needed by a list of nics exist.
1148

1149
  """
1150
  cluster = lu.cfg.GetClusterInfo()
1151
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1152
  brlist = [params[constants.NIC_LINK] for params in paramslist
1153
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1154
  if brlist:
1155
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1156
    result.Raise("Error checking bridges on destination node '%s'" %
1157
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1158

    
1159

    
1160
def _CheckInstanceBridgesExist(lu, instance, node=None):
1161
  """Check that the brigdes needed by an instance exist.
1162

1163
  """
1164
  if node is None:
1165
    node = instance.primary_node
1166
  _CheckNicsBridgesExist(lu, instance.nics, node)
1167

    
1168

    
1169
def _CheckOSVariant(os_obj, name):
1170
  """Check whether an OS name conforms to the os variants specification.
1171

1172
  @type os_obj: L{objects.OS}
1173
  @param os_obj: OS object to check
1174
  @type name: string
1175
  @param name: OS name passed by the user, to check for validity
1176

1177
  """
1178
  variant = objects.OS.GetVariant(name)
1179
  if not os_obj.supported_variants:
1180
    if variant:
1181
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1182
                                 " passed)" % (os_obj.name, variant),
1183
                                 errors.ECODE_INVAL)
1184
    return
1185
  if not variant:
1186
    raise errors.OpPrereqError("OS name must include a variant",
1187
                               errors.ECODE_INVAL)
1188

    
1189
  if variant not in os_obj.supported_variants:
1190
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1191

    
1192

    
1193
def _GetNodeInstancesInner(cfg, fn):
1194
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1195

    
1196

    
1197
def _GetNodeInstances(cfg, node_name):
1198
  """Returns a list of all primary and secondary instances on a node.
1199

1200
  """
1201

    
1202
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1203

    
1204

    
1205
def _GetNodePrimaryInstances(cfg, node_name):
1206
  """Returns primary instances on a node.
1207

1208
  """
1209
  return _GetNodeInstancesInner(cfg,
1210
                                lambda inst: node_name == inst.primary_node)
1211

    
1212

    
1213
def _GetNodeSecondaryInstances(cfg, node_name):
1214
  """Returns secondary instances on a node.
1215

1216
  """
1217
  return _GetNodeInstancesInner(cfg,
1218
                                lambda inst: node_name in inst.secondary_nodes)
1219

    
1220

    
1221
def _GetStorageTypeArgs(cfg, storage_type):
1222
  """Returns the arguments for a storage type.
1223

1224
  """
1225
  # Special case for file storage
1226
  if storage_type == constants.ST_FILE:
1227
    # storage.FileStorage wants a list of storage directories
1228
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1229

    
1230
  return []
1231

    
1232

    
1233
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1234
  faulty = []
1235

    
1236
  for dev in instance.disks:
1237
    cfg.SetDiskID(dev, node_name)
1238

    
1239
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1240
  result.Raise("Failed to get disk status from node %s" % node_name,
1241
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1242

    
1243
  for idx, bdev_status in enumerate(result.payload):
1244
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1245
      faulty.append(idx)
1246

    
1247
  return faulty
1248

    
1249

    
1250
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1251
  """Check the sanity of iallocator and node arguments and use the
1252
  cluster-wide iallocator if appropriate.
1253

1254
  Check that at most one of (iallocator, node) is specified. If none is
1255
  specified, then the LU's opcode's iallocator slot is filled with the
1256
  cluster-wide default iallocator.
1257

1258
  @type iallocator_slot: string
1259
  @param iallocator_slot: the name of the opcode iallocator slot
1260
  @type node_slot: string
1261
  @param node_slot: the name of the opcode target node slot
1262

1263
  """
1264
  node = getattr(lu.op, node_slot, None)
1265
  iallocator = getattr(lu.op, iallocator_slot, None)
1266

    
1267
  if node is not None and iallocator is not None:
1268
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1269
                               errors.ECODE_INVAL)
1270
  elif node is None and iallocator is None:
1271
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1272
    if default_iallocator:
1273
      setattr(lu.op, iallocator_slot, default_iallocator)
1274
    else:
1275
      raise errors.OpPrereqError("No iallocator or node given and no"
1276
                                 " cluster-wide default iallocator found;"
1277
                                 " please specify either an iallocator or a"
1278
                                 " node, or set a cluster-wide default"
1279
                                 " iallocator")
1280

    
1281

    
1282
def _GetDefaultIAllocator(cfg, iallocator):
1283
  """Decides on which iallocator to use.
1284

1285
  @type cfg: L{config.ConfigWriter}
1286
  @param cfg: Cluster configuration object
1287
  @type iallocator: string or None
1288
  @param iallocator: Iallocator specified in opcode
1289
  @rtype: string
1290
  @return: Iallocator name
1291

1292
  """
1293
  if not iallocator:
1294
    # Use default iallocator
1295
    iallocator = cfg.GetDefaultIAllocator()
1296

    
1297
  if not iallocator:
1298
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1299
                               " opcode nor as a cluster-wide default",
1300
                               errors.ECODE_INVAL)
1301

    
1302
  return iallocator
1303

    
1304

    
1305
class LUClusterPostInit(LogicalUnit):
1306
  """Logical unit for running hooks after cluster initialization.
1307

1308
  """
1309
  HPATH = "cluster-init"
1310
  HTYPE = constants.HTYPE_CLUSTER
1311

    
1312
  def BuildHooksEnv(self):
1313
    """Build hooks env.
1314

1315
    """
1316
    return {
1317
      "OP_TARGET": self.cfg.GetClusterName(),
1318
      }
1319

    
1320
  def BuildHooksNodes(self):
1321
    """Build hooks nodes.
1322

1323
    """
1324
    return ([], [self.cfg.GetMasterNode()])
1325

    
1326
  def Exec(self, feedback_fn):
1327
    """Nothing to do.
1328

1329
    """
1330
    return True
1331

    
1332

    
1333
class LUClusterDestroy(LogicalUnit):
1334
  """Logical unit for destroying the cluster.
1335

1336
  """
1337
  HPATH = "cluster-destroy"
1338
  HTYPE = constants.HTYPE_CLUSTER
1339

    
1340
  def BuildHooksEnv(self):
1341
    """Build hooks env.
1342

1343
    """
1344
    return {
1345
      "OP_TARGET": self.cfg.GetClusterName(),
1346
      }
1347

    
1348
  def BuildHooksNodes(self):
1349
    """Build hooks nodes.
1350

1351
    """
1352
    return ([], [])
1353

    
1354
  def CheckPrereq(self):
1355
    """Check prerequisites.
1356

1357
    This checks whether the cluster is empty.
1358

1359
    Any errors are signaled by raising errors.OpPrereqError.
1360

1361
    """
1362
    master = self.cfg.GetMasterNode()
1363

    
1364
    nodelist = self.cfg.GetNodeList()
1365
    if len(nodelist) != 1 or nodelist[0] != master:
1366
      raise errors.OpPrereqError("There are still %d node(s) in"
1367
                                 " this cluster." % (len(nodelist) - 1),
1368
                                 errors.ECODE_INVAL)
1369
    instancelist = self.cfg.GetInstanceList()
1370
    if instancelist:
1371
      raise errors.OpPrereqError("There are still %d instance(s) in"
1372
                                 " this cluster." % len(instancelist),
1373
                                 errors.ECODE_INVAL)
1374

    
1375
  def Exec(self, feedback_fn):
1376
    """Destroys the cluster.
1377

1378
    """
1379
    master = self.cfg.GetMasterNode()
1380

    
1381
    # Run post hooks on master node before it's removed
1382
    _RunPostHook(self, master)
1383

    
1384
    result = self.rpc.call_node_deactivate_master_ip(master)
1385
    result.Raise("Could not disable the master role")
1386

    
1387
    return master
1388

    
1389

    
1390
def _VerifyCertificate(filename):
1391
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1392

1393
  @type filename: string
1394
  @param filename: Path to PEM file
1395

1396
  """
1397
  try:
1398
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1399
                                           utils.ReadFile(filename))
1400
  except Exception, err: # pylint: disable=W0703
1401
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1402
            "Failed to load X509 certificate %s: %s" % (filename, err))
1403

    
1404
  (errcode, msg) = \
1405
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1406
                                constants.SSL_CERT_EXPIRATION_ERROR)
1407

    
1408
  if msg:
1409
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1410
  else:
1411
    fnamemsg = None
1412

    
1413
  if errcode is None:
1414
    return (None, fnamemsg)
1415
  elif errcode == utils.CERT_WARNING:
1416
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1417
  elif errcode == utils.CERT_ERROR:
1418
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1419

    
1420
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1421

    
1422

    
1423
def _GetAllHypervisorParameters(cluster, instances):
1424
  """Compute the set of all hypervisor parameters.
1425

1426
  @type cluster: L{objects.Cluster}
1427
  @param cluster: the cluster object
1428
  @param instances: list of L{objects.Instance}
1429
  @param instances: additional instances from which to obtain parameters
1430
  @rtype: list of (origin, hypervisor, parameters)
1431
  @return: a list with all parameters found, indicating the hypervisor they
1432
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1433

1434
  """
1435
  hvp_data = []
1436

    
1437
  for hv_name in cluster.enabled_hypervisors:
1438
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1439

    
1440
  for os_name, os_hvp in cluster.os_hvp.items():
1441
    for hv_name, hv_params in os_hvp.items():
1442
      if hv_params:
1443
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1444
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1445

    
1446
  # TODO: collapse identical parameter values in a single one
1447
  for instance in instances:
1448
    if instance.hvparams:
1449
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1450
                       cluster.FillHV(instance)))
1451

    
1452
  return hvp_data
1453

    
1454

    
1455
class _VerifyErrors(object):
1456
  """Mix-in for cluster/group verify LUs.
1457

1458
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1459
  self.op and self._feedback_fn to be available.)
1460

1461
  """
1462
  TCLUSTER = "cluster"
1463
  TNODE = "node"
1464
  TINSTANCE = "instance"
1465

    
1466
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1467
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1468
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1469
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1470
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1471
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1472
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1473
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1474
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1475
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1476
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1477
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1478
  ENODEDRBD = (TNODE, "ENODEDRBD")
1479
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1480
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1481
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1482
  ENODEHV = (TNODE, "ENODEHV")
1483
  ENODELVM = (TNODE, "ENODELVM")
1484
  ENODEN1 = (TNODE, "ENODEN1")
1485
  ENODENET = (TNODE, "ENODENET")
1486
  ENODEOS = (TNODE, "ENODEOS")
1487
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1488
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1489
  ENODERPC = (TNODE, "ENODERPC")
1490
  ENODESSH = (TNODE, "ENODESSH")
1491
  ENODEVERSION = (TNODE, "ENODEVERSION")
1492
  ENODESETUP = (TNODE, "ENODESETUP")
1493
  ENODETIME = (TNODE, "ENODETIME")
1494
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1495

    
1496
  ETYPE_FIELD = "code"
1497
  ETYPE_ERROR = "ERROR"
1498
  ETYPE_WARNING = "WARNING"
1499

    
1500
  def _Error(self, ecode, item, msg, *args, **kwargs):
1501
    """Format an error message.
1502

1503
    Based on the opcode's error_codes parameter, either format a
1504
    parseable error code, or a simpler error string.
1505

1506
    This must be called only from Exec and functions called from Exec.
1507

1508
    """
1509
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1510
    itype, etxt = ecode
1511
    # first complete the msg
1512
    if args:
1513
      msg = msg % args
1514
    # then format the whole message
1515
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1516
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1517
    else:
1518
      if item:
1519
        item = " " + item
1520
      else:
1521
        item = ""
1522
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1523
    # and finally report it via the feedback_fn
1524
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1525

    
1526
  def _ErrorIf(self, cond, *args, **kwargs):
1527
    """Log an error message if the passed condition is True.
1528

1529
    """
1530
    cond = (bool(cond)
1531
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1532
    if cond:
1533
      self._Error(*args, **kwargs)
1534
    # do not mark the operation as failed for WARN cases only
1535
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1536
      self.bad = self.bad or cond
1537

    
1538

    
1539
class LUClusterVerify(NoHooksLU):
1540
  """Submits all jobs necessary to verify the cluster.
1541

1542
  """
1543
  REQ_BGL = False
1544

    
1545
  def ExpandNames(self):
1546
    self.needed_locks = {}
1547

    
1548
  def Exec(self, feedback_fn):
1549
    jobs = []
1550

    
1551
    if self.op.group_name:
1552
      groups = [self.op.group_name]
1553
      depends_fn = lambda: None
1554
    else:
1555
      groups = self.cfg.GetNodeGroupList()
1556

    
1557
      # Verify global configuration
1558
      jobs.append([opcodes.OpClusterVerifyConfig()])
1559

    
1560
      # Always depend on global verification
1561
      depends_fn = lambda: [(-len(jobs), [])]
1562

    
1563
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1564
                                              depends=depends_fn())]
1565
                for group in groups)
1566

    
1567
    # Fix up all parameters
1568
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1569
      op.debug_simulate_errors = self.op.debug_simulate_errors
1570
      op.verbose = self.op.verbose
1571
      op.error_codes = self.op.error_codes
1572
      try:
1573
        op.skip_checks = self.op.skip_checks
1574
      except AttributeError:
1575
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1576

    
1577
    return ResultWithJobs(jobs)
1578

    
1579

    
1580
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1581
  """Verifies the cluster config.
1582

1583
  """
1584
  REQ_BGL = True
1585

    
1586
  def _VerifyHVP(self, hvp_data):
1587
    """Verifies locally the syntax of the hypervisor parameters.
1588

1589
    """
1590
    for item, hv_name, hv_params in hvp_data:
1591
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1592
             (item, hv_name))
1593
      try:
1594
        hv_class = hypervisor.GetHypervisor(hv_name)
1595
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1596
        hv_class.CheckParameterSyntax(hv_params)
1597
      except errors.GenericError, err:
1598
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1599

    
1600
  def ExpandNames(self):
1601
    # Information can be safely retrieved as the BGL is acquired in exclusive
1602
    # mode
1603
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1604
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1605
    self.all_node_info = self.cfg.GetAllNodesInfo()
1606
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1607
    self.needed_locks = {}
1608

    
1609
  def Exec(self, feedback_fn):
1610
    """Verify integrity of cluster, performing various test on nodes.
1611

1612
    """
1613
    self.bad = False
1614
    self._feedback_fn = feedback_fn
1615

    
1616
    feedback_fn("* Verifying cluster config")
1617

    
1618
    for msg in self.cfg.VerifyConfig():
1619
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1620

    
1621
    feedback_fn("* Verifying cluster certificate files")
1622

    
1623
    for cert_filename in constants.ALL_CERT_FILES:
1624
      (errcode, msg) = _VerifyCertificate(cert_filename)
1625
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1626

    
1627
    feedback_fn("* Verifying hypervisor parameters")
1628

    
1629
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1630
                                                self.all_inst_info.values()))
1631

    
1632
    feedback_fn("* Verifying all nodes belong to an existing group")
1633

    
1634
    # We do this verification here because, should this bogus circumstance
1635
    # occur, it would never be caught by VerifyGroup, which only acts on
1636
    # nodes/instances reachable from existing node groups.
1637

    
1638
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1639
                         if node.group not in self.all_group_info)
1640

    
1641
    dangling_instances = {}
1642
    no_node_instances = []
1643

    
1644
    for inst in self.all_inst_info.values():
1645
      if inst.primary_node in dangling_nodes:
1646
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1647
      elif inst.primary_node not in self.all_node_info:
1648
        no_node_instances.append(inst.name)
1649

    
1650
    pretty_dangling = [
1651
        "%s (%s)" %
1652
        (node.name,
1653
         utils.CommaJoin(dangling_instances.get(node.name,
1654
                                                ["no instances"])))
1655
        for node in dangling_nodes]
1656

    
1657
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1658
                  "the following nodes (and their instances) belong to a non"
1659
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1660

    
1661
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1662
                  "the following instances have a non-existing primary-node:"
1663
                  " %s", utils.CommaJoin(no_node_instances))
1664

    
1665
    return not self.bad
1666

    
1667

    
1668
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1669
  """Verifies the status of a node group.
1670

1671
  """
1672
  HPATH = "cluster-verify"
1673
  HTYPE = constants.HTYPE_CLUSTER
1674
  REQ_BGL = False
1675

    
1676
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1677

    
1678
  class NodeImage(object):
1679
    """A class representing the logical and physical status of a node.
1680

1681
    @type name: string
1682
    @ivar name: the node name to which this object refers
1683
    @ivar volumes: a structure as returned from
1684
        L{ganeti.backend.GetVolumeList} (runtime)
1685
    @ivar instances: a list of running instances (runtime)
1686
    @ivar pinst: list of configured primary instances (config)
1687
    @ivar sinst: list of configured secondary instances (config)
1688
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1689
        instances for which this node is secondary (config)
1690
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1691
    @ivar dfree: free disk, as reported by the node (runtime)
1692
    @ivar offline: the offline status (config)
1693
    @type rpc_fail: boolean
1694
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1695
        not whether the individual keys were correct) (runtime)
1696
    @type lvm_fail: boolean
1697
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1698
    @type hyp_fail: boolean
1699
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1700
    @type ghost: boolean
1701
    @ivar ghost: whether this is a known node or not (config)
1702
    @type os_fail: boolean
1703
    @ivar os_fail: whether the RPC call didn't return valid OS data
1704
    @type oslist: list
1705
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1706
    @type vm_capable: boolean
1707
    @ivar vm_capable: whether the node can host instances
1708

1709
    """
1710
    def __init__(self, offline=False, name=None, vm_capable=True):
1711
      self.name = name
1712
      self.volumes = {}
1713
      self.instances = []
1714
      self.pinst = []
1715
      self.sinst = []
1716
      self.sbp = {}
1717
      self.mfree = 0
1718
      self.dfree = 0
1719
      self.offline = offline
1720
      self.vm_capable = vm_capable
1721
      self.rpc_fail = False
1722
      self.lvm_fail = False
1723
      self.hyp_fail = False
1724
      self.ghost = False
1725
      self.os_fail = False
1726
      self.oslist = {}
1727

    
1728
  def ExpandNames(self):
1729
    # This raises errors.OpPrereqError on its own:
1730
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1731

    
1732
    # Get instances in node group; this is unsafe and needs verification later
1733
    inst_names = \
1734
      self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1735

    
1736
    self.needed_locks = {
1737
      locking.LEVEL_INSTANCE: inst_names,
1738
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1739
      locking.LEVEL_NODE: [],
1740
      }
1741

    
1742
    self.share_locks = _ShareAll()
1743

    
1744
  def DeclareLocks(self, level):
1745
    if level == locking.LEVEL_NODE:
1746
      # Get members of node group; this is unsafe and needs verification later
1747
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1748

    
1749
      all_inst_info = self.cfg.GetAllInstancesInfo()
1750

    
1751
      # In Exec(), we warn about mirrored instances that have primary and
1752
      # secondary living in separate node groups. To fully verify that
1753
      # volumes for these instances are healthy, we will need to do an
1754
      # extra call to their secondaries. We ensure here those nodes will
1755
      # be locked.
1756
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1757
        # Important: access only the instances whose lock is owned
1758
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1759
          nodes.update(all_inst_info[inst].secondary_nodes)
1760

    
1761
      self.needed_locks[locking.LEVEL_NODE] = nodes
1762

    
1763
  def CheckPrereq(self):
1764
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1765
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1766

    
1767
    group_nodes = set(self.group_info.members)
1768
    group_instances = \
1769
      self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1770

    
1771
    unlocked_nodes = \
1772
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1773

    
1774
    unlocked_instances = \
1775
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1776

    
1777
    if unlocked_nodes:
1778
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1779
                                 utils.CommaJoin(unlocked_nodes),
1780
                                 errors.ECODE_STATE)
1781

    
1782
    if unlocked_instances:
1783
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1784
                                 utils.CommaJoin(unlocked_instances),
1785
                                 errors.ECODE_STATE)
1786

    
1787
    self.all_node_info = self.cfg.GetAllNodesInfo()
1788
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1789

    
1790
    self.my_node_names = utils.NiceSort(group_nodes)
1791
    self.my_inst_names = utils.NiceSort(group_instances)
1792

    
1793
    self.my_node_info = dict((name, self.all_node_info[name])
1794
                             for name in self.my_node_names)
1795

    
1796
    self.my_inst_info = dict((name, self.all_inst_info[name])
1797
                             for name in self.my_inst_names)
1798

    
1799
    # We detect here the nodes that will need the extra RPC calls for verifying
1800
    # split LV volumes; they should be locked.
1801
    extra_lv_nodes = set()
1802

    
1803
    for inst in self.my_inst_info.values():
1804
      if inst.disk_template in constants.DTS_INT_MIRROR:
1805
        for nname in inst.all_nodes:
1806
          if self.all_node_info[nname].group != self.group_uuid:
1807
            extra_lv_nodes.add(nname)
1808

    
1809
    unlocked_lv_nodes = \
1810
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1811

    
1812
    if unlocked_lv_nodes:
1813
      raise errors.OpPrereqError("Missing node locks for LV check: %s" %
1814
                                 utils.CommaJoin(unlocked_lv_nodes),
1815
                                 errors.ECODE_STATE)
1816
    self.extra_lv_nodes = list(extra_lv_nodes)
1817

    
1818
  def _VerifyNode(self, ninfo, nresult):
1819
    """Perform some basic validation on data returned from a node.
1820

1821
      - check the result data structure is well formed and has all the
1822
        mandatory fields
1823
      - check ganeti version
1824

1825
    @type ninfo: L{objects.Node}
1826
    @param ninfo: the node to check
1827
    @param nresult: the results from the node
1828
    @rtype: boolean
1829
    @return: whether overall this call was successful (and we can expect
1830
         reasonable values in the respose)
1831

1832
    """
1833
    node = ninfo.name
1834
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1835

    
1836
    # main result, nresult should be a non-empty dict
1837
    test = not nresult or not isinstance(nresult, dict)
1838
    _ErrorIf(test, self.ENODERPC, node,
1839
                  "unable to verify node: no data returned")
1840
    if test:
1841
      return False
1842

    
1843
    # compares ganeti version
1844
    local_version = constants.PROTOCOL_VERSION
1845
    remote_version = nresult.get("version", None)
1846
    test = not (remote_version and
1847
                isinstance(remote_version, (list, tuple)) and
1848
                len(remote_version) == 2)
1849
    _ErrorIf(test, self.ENODERPC, node,
1850
             "connection to node returned invalid data")
1851
    if test:
1852
      return False
1853

    
1854
    test = local_version != remote_version[0]
1855
    _ErrorIf(test, self.ENODEVERSION, node,
1856
             "incompatible protocol versions: master %s,"
1857
             " node %s", local_version, remote_version[0])
1858
    if test:
1859
      return False
1860

    
1861
    # node seems compatible, we can actually try to look into its results
1862

    
1863
    # full package version
1864
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1865
                  self.ENODEVERSION, node,
1866
                  "software version mismatch: master %s, node %s",
1867
                  constants.RELEASE_VERSION, remote_version[1],
1868
                  code=self.ETYPE_WARNING)
1869

    
1870
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1871
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1872
      for hv_name, hv_result in hyp_result.iteritems():
1873
        test = hv_result is not None
1874
        _ErrorIf(test, self.ENODEHV, node,
1875
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1876

    
1877
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1878
    if ninfo.vm_capable and isinstance(hvp_result, list):
1879
      for item, hv_name, hv_result in hvp_result:
1880
        _ErrorIf(True, self.ENODEHV, node,
1881
                 "hypervisor %s parameter verify failure (source %s): %s",
1882
                 hv_name, item, hv_result)
1883

    
1884
    test = nresult.get(constants.NV_NODESETUP,
1885
                       ["Missing NODESETUP results"])
1886
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1887
             "; ".join(test))
1888

    
1889
    return True
1890

    
1891
  def _VerifyNodeTime(self, ninfo, nresult,
1892
                      nvinfo_starttime, nvinfo_endtime):
1893
    """Check the node time.
1894

1895
    @type ninfo: L{objects.Node}
1896
    @param ninfo: the node to check
1897
    @param nresult: the remote results for the node
1898
    @param nvinfo_starttime: the start time of the RPC call
1899
    @param nvinfo_endtime: the end time of the RPC call
1900

1901
    """
1902
    node = ninfo.name
1903
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1904

    
1905
    ntime = nresult.get(constants.NV_TIME, None)
1906
    try:
1907
      ntime_merged = utils.MergeTime(ntime)
1908
    except (ValueError, TypeError):
1909
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1910
      return
1911

    
1912
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1913
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1914
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1915
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1916
    else:
1917
      ntime_diff = None
1918

    
1919
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1920
             "Node time diverges by at least %s from master node time",
1921
             ntime_diff)
1922

    
1923
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1924
    """Check the node LVM results.
1925

1926
    @type ninfo: L{objects.Node}
1927
    @param ninfo: the node to check
1928
    @param nresult: the remote results for the node
1929
    @param vg_name: the configured VG name
1930

1931
    """
1932
    if vg_name is None:
1933
      return
1934

    
1935
    node = ninfo.name
1936
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1937

    
1938
    # checks vg existence and size > 20G
1939
    vglist = nresult.get(constants.NV_VGLIST, None)
1940
    test = not vglist
1941
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1942
    if not test:
1943
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1944
                                            constants.MIN_VG_SIZE)
1945
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1946

    
1947
    # check pv names
1948
    pvlist = nresult.get(constants.NV_PVLIST, None)
1949
    test = pvlist is None
1950
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1951
    if not test:
1952
      # check that ':' is not present in PV names, since it's a
1953
      # special character for lvcreate (denotes the range of PEs to
1954
      # use on the PV)
1955
      for _, pvname, owner_vg in pvlist:
1956
        test = ":" in pvname
1957
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1958
                 " '%s' of VG '%s'", pvname, owner_vg)
1959

    
1960
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1961
    """Check the node bridges.
1962

1963
    @type ninfo: L{objects.Node}
1964
    @param ninfo: the node to check
1965
    @param nresult: the remote results for the node
1966
    @param bridges: the expected list of bridges
1967

1968
    """
1969
    if not bridges:
1970
      return
1971

    
1972
    node = ninfo.name
1973
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1974

    
1975
    missing = nresult.get(constants.NV_BRIDGES, None)
1976
    test = not isinstance(missing, list)
1977
    _ErrorIf(test, self.ENODENET, node,
1978
             "did not return valid bridge information")
1979
    if not test:
1980
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1981
               utils.CommaJoin(sorted(missing)))
1982

    
1983
  def _VerifyNodeNetwork(self, ninfo, nresult):
1984
    """Check the node network connectivity results.
1985

1986
    @type ninfo: L{objects.Node}
1987
    @param ninfo: the node to check
1988
    @param nresult: the remote results for the node
1989

1990
    """
1991
    node = ninfo.name
1992
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1993

    
1994
    test = constants.NV_NODELIST not in nresult
1995
    _ErrorIf(test, self.ENODESSH, node,
1996
             "node hasn't returned node ssh connectivity data")
1997
    if not test:
1998
      if nresult[constants.NV_NODELIST]:
1999
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2000
          _ErrorIf(True, self.ENODESSH, node,
2001
                   "ssh communication with node '%s': %s", a_node, a_msg)
2002

    
2003
    test = constants.NV_NODENETTEST not in nresult
2004
    _ErrorIf(test, self.ENODENET, node,
2005
             "node hasn't returned node tcp connectivity data")
2006
    if not test:
2007
      if nresult[constants.NV_NODENETTEST]:
2008
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2009
        for anode in nlist:
2010
          _ErrorIf(True, self.ENODENET, node,
2011
                   "tcp communication with node '%s': %s",
2012
                   anode, nresult[constants.NV_NODENETTEST][anode])
2013

    
2014
    test = constants.NV_MASTERIP not in nresult
2015
    _ErrorIf(test, self.ENODENET, node,
2016
             "node hasn't returned node master IP reachability data")
2017
    if not test:
2018
      if not nresult[constants.NV_MASTERIP]:
2019
        if node == self.master_node:
2020
          msg = "the master node cannot reach the master IP (not configured?)"
2021
        else:
2022
          msg = "cannot reach the master IP"
2023
        _ErrorIf(True, self.ENODENET, node, msg)
2024

    
2025
  def _VerifyInstance(self, instance, instanceconfig, node_image,
2026
                      diskstatus):
2027
    """Verify an instance.
2028

2029
    This function checks to see if the required block devices are
2030
    available on the instance's node.
2031

2032
    """
2033
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2034
    node_current = instanceconfig.primary_node
2035

    
2036
    node_vol_should = {}
2037
    instanceconfig.MapLVsByNode(node_vol_should)
2038

    
2039
    for node in node_vol_should:
2040
      n_img = node_image[node]
2041
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2042
        # ignore missing volumes on offline or broken nodes
2043
        continue
2044
      for volume in node_vol_should[node]:
2045
        test = volume not in n_img.volumes
2046
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2047
                 "volume %s missing on node %s", volume, node)
2048

    
2049
    if instanceconfig.admin_up:
2050
      pri_img = node_image[node_current]
2051
      test = instance not in pri_img.instances and not pri_img.offline
2052
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
2053
               "instance not running on its primary node %s",
2054
               node_current)
2055

    
2056
    diskdata = [(nname, success, status, idx)
2057
                for (nname, disks) in diskstatus.items()
2058
                for idx, (success, status) in enumerate(disks)]
2059

    
2060
    for nname, success, bdev_status, idx in diskdata:
2061
      # the 'ghost node' construction in Exec() ensures that we have a
2062
      # node here
2063
      snode = node_image[nname]
2064
      bad_snode = snode.ghost or snode.offline
2065
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2066
               self.EINSTANCEFAULTYDISK, instance,
2067
               "couldn't retrieve status for disk/%s on %s: %s",
2068
               idx, nname, bdev_status)
2069
      _ErrorIf((instanceconfig.admin_up and success and
2070
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2071
               self.EINSTANCEFAULTYDISK, instance,
2072
               "disk/%s on %s is faulty", idx, nname)
2073

    
2074
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2075
    """Verify if there are any unknown volumes in the cluster.
2076

2077
    The .os, .swap and backup volumes are ignored. All other volumes are
2078
    reported as unknown.
2079

2080
    @type reserved: L{ganeti.utils.FieldSet}
2081
    @param reserved: a FieldSet of reserved volume names
2082

2083
    """
2084
    for node, n_img in node_image.items():
2085
      if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2086
          self.all_node_info[node].group != self.group_uuid):
2087
        # skip non-healthy nodes
2088
        continue
2089
      for volume in n_img.volumes:
2090
        test = ((node not in node_vol_should or
2091
                volume not in node_vol_should[node]) and
2092
                not reserved.Matches(volume))
2093
        self._ErrorIf(test, self.ENODEORPHANLV, node,
2094
                      "volume %s is unknown", volume)
2095

    
2096
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2097
    """Verify N+1 Memory Resilience.
2098

2099
    Check that if one single node dies we can still start all the
2100
    instances it was primary for.
2101

2102
    """
2103
    cluster_info = self.cfg.GetClusterInfo()
2104
    for node, n_img in node_image.items():
2105
      # This code checks that every node which is now listed as
2106
      # secondary has enough memory to host all instances it is
2107
      # supposed to should a single other node in the cluster fail.
2108
      # FIXME: not ready for failover to an arbitrary node
2109
      # FIXME: does not support file-backed instances
2110
      # WARNING: we currently take into account down instances as well
2111
      # as up ones, considering that even if they're down someone
2112
      # might want to start them even in the event of a node failure.
2113
      if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2114
        # we're skipping nodes marked offline and nodes in other groups from
2115
        # the N+1 warning, since most likely we don't have good memory
2116
        # infromation from them; we already list instances living on such
2117
        # nodes, and that's enough warning
2118
        continue
2119
      for prinode, instances in n_img.sbp.items():
2120
        needed_mem = 0
2121
        for instance in instances:
2122
          bep = cluster_info.FillBE(instance_cfg[instance])
2123
          if bep[constants.BE_AUTO_BALANCE]:
2124
            needed_mem += bep[constants.BE_MEMORY]
2125
        test = n_img.mfree < needed_mem
2126
        self._ErrorIf(test, self.ENODEN1, node,
2127
                      "not enough memory to accomodate instance failovers"
2128
                      " should node %s fail (%dMiB needed, %dMiB available)",
2129
                      prinode, needed_mem, n_img.mfree)
2130

    
2131
  @classmethod
2132
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2133
                   (files_all, files_opt, files_mc, files_vm)):
2134
    """Verifies file checksums collected from all nodes.
2135

2136
    @param errorif: Callback for reporting errors
2137
    @param nodeinfo: List of L{objects.Node} objects
2138
    @param master_node: Name of master node
2139
    @param all_nvinfo: RPC results
2140

2141
    """
2142
    # Define functions determining which nodes to consider for a file
2143
    files2nodefn = [
2144
      (files_all, None),
2145
      (files_mc, lambda node: (node.master_candidate or
2146
                               node.name == master_node)),
2147
      (files_vm, lambda node: node.vm_capable),
2148
      ]
2149

    
2150
    # Build mapping from filename to list of nodes which should have the file
2151
    nodefiles = {}
2152
    for (files, fn) in files2nodefn:
2153
      if fn is None:
2154
        filenodes = nodeinfo
2155
      else:
2156
        filenodes = filter(fn, nodeinfo)
2157
      nodefiles.update((filename,
2158
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2159
                       for filename in files)
2160

    
2161
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2162

    
2163
    fileinfo = dict((filename, {}) for filename in nodefiles)
2164
    ignore_nodes = set()
2165

    
2166
    for node in nodeinfo:
2167
      if node.offline:
2168
        ignore_nodes.add(node.name)
2169
        continue
2170

    
2171
      nresult = all_nvinfo[node.name]
2172

    
2173
      if nresult.fail_msg or not nresult.payload:
2174
        node_files = None
2175
      else:
2176
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2177

    
2178
      test = not (node_files and isinstance(node_files, dict))
2179
      errorif(test, cls.ENODEFILECHECK, node.name,
2180
              "Node did not return file checksum data")
2181
      if test:
2182
        ignore_nodes.add(node.name)
2183
        continue
2184

    
2185
      # Build per-checksum mapping from filename to nodes having it
2186
      for (filename, checksum) in node_files.items():
2187
        assert filename in nodefiles
2188
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2189

    
2190
    for (filename, checksums) in fileinfo.items():
2191
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2192

    
2193
      # Nodes having the file
2194
      with_file = frozenset(node_name
2195
                            for nodes in fileinfo[filename].values()
2196
                            for node_name in nodes) - ignore_nodes
2197

    
2198
      expected_nodes = nodefiles[filename] - ignore_nodes
2199

    
2200
      # Nodes missing file
2201
      missing_file = expected_nodes - with_file
2202

    
2203
      if filename in files_opt:
2204
        # All or no nodes
2205
        errorif(missing_file and missing_file != expected_nodes,
2206
                cls.ECLUSTERFILECHECK, None,
2207
                "File %s is optional, but it must exist on all or no"
2208
                " nodes (not found on %s)",
2209
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2210
      else:
2211
        # Non-optional files
2212
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2213
                "File %s is missing from node(s) %s", filename,
2214
                utils.CommaJoin(utils.NiceSort(missing_file)))
2215

    
2216
        # Warn if a node has a file it shouldn't
2217
        unexpected = with_file - expected_nodes
2218
        errorif(unexpected,
2219
                cls.ECLUSTERFILECHECK, None,
2220
                "File %s should not exist on node(s) %s",
2221
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2222

    
2223
      # See if there are multiple versions of the file
2224
      test = len(checksums) > 1
2225
      if test:
2226
        variants = ["variant %s on %s" %
2227
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2228
                    for (idx, (checksum, nodes)) in
2229
                      enumerate(sorted(checksums.items()))]
2230
      else:
2231
        variants = []
2232

    
2233
      errorif(test, cls.ECLUSTERFILECHECK, None,
2234
              "File %s found with %s different checksums (%s)",
2235
              filename, len(checksums), "; ".join(variants))
2236

    
2237
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2238
                      drbd_map):
2239
    """Verifies and the node DRBD status.
2240

2241
    @type ninfo: L{objects.Node}
2242
    @param ninfo: the node to check
2243
    @param nresult: the remote results for the node
2244
    @param instanceinfo: the dict of instances
2245
    @param drbd_helper: the configured DRBD usermode helper
2246
    @param drbd_map: the DRBD map as returned by
2247
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2248

2249
    """
2250
    node = ninfo.name
2251
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2252

    
2253
    if drbd_helper:
2254
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2255
      test = (helper_result == None)
2256
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2257
               "no drbd usermode helper returned")
2258
      if helper_result:
2259
        status, payload = helper_result
2260
        test = not status
2261
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2262
                 "drbd usermode helper check unsuccessful: %s", payload)
2263
        test = status and (payload != drbd_helper)
2264
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2265
                 "wrong drbd usermode helper: %s", payload)
2266

    
2267
    # compute the DRBD minors
2268
    node_drbd = {}
2269
    for minor, instance in drbd_map[node].items():
2270
      test = instance not in instanceinfo
2271
      _ErrorIf(test, self.ECLUSTERCFG, None,
2272
               "ghost instance '%s' in temporary DRBD map", instance)
2273
        # ghost instance should not be running, but otherwise we
2274
        # don't give double warnings (both ghost instance and
2275
        # unallocated minor in use)
2276
      if test:
2277
        node_drbd[minor] = (instance, False)
2278
      else:
2279
        instance = instanceinfo[instance]
2280
        node_drbd[minor] = (instance.name, instance.admin_up)
2281

    
2282
    # and now check them
2283
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2284
    test = not isinstance(used_minors, (tuple, list))
2285
    _ErrorIf(test, self.ENODEDRBD, node,
2286
             "cannot parse drbd status file: %s", str(used_minors))
2287
    if test:
2288
      # we cannot check drbd status
2289
      return
2290

    
2291
    for minor, (iname, must_exist) in node_drbd.items():
2292
      test = minor not in used_minors and must_exist
2293
      _ErrorIf(test, self.ENODEDRBD, node,
2294
               "drbd minor %d of instance %s is not active", minor, iname)
2295
    for minor in used_minors:
2296
      test = minor not in node_drbd
2297
      _ErrorIf(test, self.ENODEDRBD, node,
2298
               "unallocated drbd minor %d is in use", minor)
2299

    
2300
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2301
    """Builds the node OS structures.
2302

2303
    @type ninfo: L{objects.Node}
2304
    @param ninfo: the node to check
2305
    @param nresult: the remote results for the node
2306
    @param nimg: the node image object
2307

2308
    """
2309
    node = ninfo.name
2310
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2311

    
2312
    remote_os = nresult.get(constants.NV_OSLIST, None)
2313
    test = (not isinstance(remote_os, list) or
2314
            not compat.all(isinstance(v, list) and len(v) == 7
2315
                           for v in remote_os))
2316

    
2317
    _ErrorIf(test, self.ENODEOS, node,
2318
             "node hasn't returned valid OS data")
2319

    
2320
    nimg.os_fail = test
2321

    
2322
    if test:
2323
      return
2324

    
2325
    os_dict = {}
2326

    
2327
    for (name, os_path, status, diagnose,
2328
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2329

    
2330
      if name not in os_dict:
2331
        os_dict[name] = []
2332

    
2333
      # parameters is a list of lists instead of list of tuples due to
2334
      # JSON lacking a real tuple type, fix it:
2335
      parameters = [tuple(v) for v in parameters]
2336
      os_dict[name].append((os_path, status, diagnose,
2337
                            set(variants), set(parameters), set(api_ver)))
2338

    
2339
    nimg.oslist = os_dict
2340

    
2341
  def _VerifyNodeOS(self, ninfo, nimg, base):
2342
    """Verifies the node OS list.
2343

2344
    @type ninfo: L{objects.Node}
2345
    @param ninfo: the node to check
2346
    @param nimg: the node image object
2347
    @param base: the 'template' node we match against (e.g. from the master)
2348

2349
    """
2350
    node = ninfo.name
2351
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2352

    
2353
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2354

    
2355
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2356
    for os_name, os_data in nimg.oslist.items():
2357
      assert os_data, "Empty OS status for OS %s?!" % os_name
2358
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2359
      _ErrorIf(not f_status, self.ENODEOS, node,
2360
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2361
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2362
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2363
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2364
      # comparisons with the 'base' image
2365
      test = os_name not in base.oslist
2366
      _ErrorIf(test, self.ENODEOS, node,
2367
               "Extra OS %s not present on reference node (%s)",
2368
               os_name, base.name)
2369
      if test:
2370
        continue
2371
      assert base.oslist[os_name], "Base node has empty OS status?"
2372
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2373
      if not b_status:
2374
        # base OS is invalid, skipping
2375
        continue
2376
      for kind, a, b in [("API version", f_api, b_api),
2377
                         ("variants list", f_var, b_var),
2378
                         ("parameters", beautify_params(f_param),
2379
                          beautify_params(b_param))]:
2380
        _ErrorIf(a != b, self.ENODEOS, node,
2381
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2382
                 kind, os_name, base.name,
2383
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2384

    
2385
    # check any missing OSes
2386
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2387
    _ErrorIf(missing, self.ENODEOS, node,
2388
             "OSes present on reference node %s but missing on this node: %s",
2389
             base.name, utils.CommaJoin(missing))
2390

    
2391
  def _VerifyOob(self, ninfo, nresult):
2392
    """Verifies out of band functionality of a node.
2393

2394
    @type ninfo: L{objects.Node}
2395
    @param ninfo: the node to check
2396
    @param nresult: the remote results for the node
2397

2398
    """
2399
    node = ninfo.name
2400
    # We just have to verify the paths on master and/or master candidates
2401
    # as the oob helper is invoked on the master
2402
    if ((ninfo.master_candidate or ninfo.master_capable) and
2403
        constants.NV_OOB_PATHS in nresult):
2404
      for path_result in nresult[constants.NV_OOB_PATHS]:
2405
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2406

    
2407
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2408
    """Verifies and updates the node volume data.
2409

2410
    This function will update a L{NodeImage}'s internal structures
2411
    with data from the remote call.
2412

2413
    @type ninfo: L{objects.Node}
2414
    @param ninfo: the node to check
2415
    @param nresult: the remote results for the node
2416
    @param nimg: the node image object
2417
    @param vg_name: the configured VG name
2418

2419
    """
2420
    node = ninfo.name
2421
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2422

    
2423
    nimg.lvm_fail = True
2424
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2425
    if vg_name is None:
2426
      pass
2427
    elif isinstance(lvdata, basestring):
2428
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2429
               utils.SafeEncode(lvdata))
2430
    elif not isinstance(lvdata, dict):
2431
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2432
    else:
2433
      nimg.volumes = lvdata
2434
      nimg.lvm_fail = False
2435

    
2436
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2437
    """Verifies and updates the node instance list.
2438

2439
    If the listing was successful, then updates this node's instance
2440
    list. Otherwise, it marks the RPC call as failed for the instance
2441
    list key.
2442

2443
    @type ninfo: L{objects.Node}
2444
    @param ninfo: the node to check
2445
    @param nresult: the remote results for the node
2446
    @param nimg: the node image object
2447

2448
    """
2449
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2450
    test = not isinstance(idata, list)
2451
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2452
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2453
    if test:
2454
      nimg.hyp_fail = True
2455
    else:
2456
      nimg.instances = idata
2457

    
2458
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2459
    """Verifies and computes a node information map
2460

2461
    @type ninfo: L{objects.Node}
2462
    @param ninfo: the node to check
2463
    @param nresult: the remote results for the node
2464
    @param nimg: the node image object
2465
    @param vg_name: the configured VG name
2466

2467
    """
2468
    node = ninfo.name
2469
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2470

    
2471
    # try to read free memory (from the hypervisor)
2472
    hv_info = nresult.get(constants.NV_HVINFO, None)
2473
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2474
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2475
    if not test:
2476
      try:
2477
        nimg.mfree = int(hv_info["memory_free"])
2478
      except (ValueError, TypeError):
2479
        _ErrorIf(True, self.ENODERPC, node,
2480
                 "node returned invalid nodeinfo, check hypervisor")
2481

    
2482
    # FIXME: devise a free space model for file based instances as well
2483
    if vg_name is not None:
2484
      test = (constants.NV_VGLIST not in nresult or
2485
              vg_name not in nresult[constants.NV_VGLIST])
2486
      _ErrorIf(test, self.ENODELVM, node,
2487
               "node didn't return data for the volume group '%s'"
2488
               " - it is either missing or broken", vg_name)
2489
      if not test:
2490
        try:
2491
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2492
        except (ValueError, TypeError):
2493
          _ErrorIf(True, self.ENODERPC, node,
2494
                   "node returned invalid LVM info, check LVM status")
2495

    
2496
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2497
    """Gets per-disk status information for all instances.
2498

2499
    @type nodelist: list of strings
2500
    @param nodelist: Node names
2501
    @type node_image: dict of (name, L{objects.Node})
2502
    @param node_image: Node objects
2503
    @type instanceinfo: dict of (name, L{objects.Instance})
2504
    @param instanceinfo: Instance objects
2505
    @rtype: {instance: {node: [(succes, payload)]}}
2506
    @return: a dictionary of per-instance dictionaries with nodes as
2507
        keys and disk information as values; the disk information is a
2508
        list of tuples (success, payload)
2509

2510
    """
2511
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2512

    
2513
    node_disks = {}
2514
    node_disks_devonly = {}
2515
    diskless_instances = set()
2516
    diskless = constants.DT_DISKLESS
2517

    
2518
    for nname in nodelist:
2519
      node_instances = list(itertools.chain(node_image[nname].pinst,
2520
                                            node_image[nname].sinst))
2521
      diskless_instances.update(inst for inst in node_instances
2522
                                if instanceinfo[inst].disk_template == diskless)
2523
      disks = [(inst, disk)
2524
               for inst in node_instances
2525
               for disk in instanceinfo[inst].disks]
2526

    
2527
      if not disks:
2528
        # No need to collect data
2529
        continue
2530

    
2531
      node_disks[nname] = disks
2532

    
2533
      # Creating copies as SetDiskID below will modify the objects and that can
2534
      # lead to incorrect data returned from nodes
2535
      devonly = [dev.Copy() for (_, dev) in disks]
2536

    
2537
      for dev in devonly:
2538
        self.cfg.SetDiskID(dev, nname)
2539

    
2540
      node_disks_devonly[nname] = devonly
2541

    
2542
    assert len(node_disks) == len(node_disks_devonly)
2543

    
2544
    # Collect data from all nodes with disks
2545
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2546
                                                          node_disks_devonly)
2547

    
2548
    assert len(result) == len(node_disks)
2549

    
2550
    instdisk = {}
2551

    
2552
    for (nname, nres) in result.items():
2553
      disks = node_disks[nname]
2554

    
2555
      if nres.offline:
2556
        # No data from this node
2557
        data = len(disks) * [(False, "node offline")]
2558
      else:
2559
        msg = nres.fail_msg
2560
        _ErrorIf(msg, self.ENODERPC, nname,
2561
                 "while getting disk information: %s", msg)
2562
        if msg:
2563
          # No data from this node
2564
          data = len(disks) * [(False, msg)]
2565
        else:
2566
          data = []
2567
          for idx, i in enumerate(nres.payload):
2568
            if isinstance(i, (tuple, list)) and len(i) == 2:
2569
              data.append(i)
2570
            else:
2571
              logging.warning("Invalid result from node %s, entry %d: %s",
2572
                              nname, idx, i)
2573
              data.append((False, "Invalid result from the remote node"))
2574

    
2575
      for ((inst, _), status) in zip(disks, data):
2576
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2577

    
2578
    # Add empty entries for diskless instances.
2579
    for inst in diskless_instances:
2580
      assert inst not in instdisk
2581
      instdisk[inst] = {}
2582

    
2583
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2584
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2585
                      compat.all(isinstance(s, (tuple, list)) and
2586
                                 len(s) == 2 for s in statuses)
2587
                      for inst, nnames in instdisk.items()
2588
                      for nname, statuses in nnames.items())
2589
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2590

    
2591
    return instdisk
2592

    
2593
  @staticmethod
2594
  def _SshNodeSelector(group_uuid, all_nodes):
2595
    """Create endless iterators for all potential SSH check hosts.
2596

2597
    """
2598
    nodes = [node for node in all_nodes
2599
             if (node.group != group_uuid and
2600
                 not node.offline)]
2601
    keyfunc = operator.attrgetter("group")
2602

    
2603
    return map(itertools.cycle,
2604
               [sorted(map(operator.attrgetter("name"), names))
2605
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2606
                                                  keyfunc)])
2607

    
2608
  @classmethod
2609
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2610
    """Choose which nodes should talk to which other nodes.
2611

2612
    We will make nodes contact all nodes in their group, and one node from
2613
    every other group.
2614

2615
    @warning: This algorithm has a known issue if one node group is much
2616
      smaller than others (e.g. just one node). In such a case all other
2617
      nodes will talk to the single node.
2618

2619
    """
2620
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2621
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2622

    
2623
    return (online_nodes,
2624
            dict((name, sorted([i.next() for i in sel]))
2625
                 for name in online_nodes))
2626

    
2627
  def BuildHooksEnv(self):
2628
    """Build hooks env.
2629

2630
    Cluster-Verify hooks just ran in the post phase and their failure makes
2631
    the output be logged in the verify output and the verification to fail.
2632

2633
    """
2634
    env = {
2635
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2636
      }
2637

    
2638
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2639
               for node in self.my_node_info.values())
2640

    
2641
    return env
2642

    
2643
  def BuildHooksNodes(self):
2644
    """Build hooks nodes.
2645

2646
    """
2647
    return ([], self.my_node_names)
2648

    
2649
  def Exec(self, feedback_fn):
2650
    """Verify integrity of the node group, performing various test on nodes.
2651

2652
    """
2653
    # This method has too many local variables. pylint: disable=R0914
2654
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2655

    
2656
    if not self.my_node_names:
2657
      # empty node group
2658
      feedback_fn("* Empty node group, skipping verification")
2659
      return True
2660

    
2661
    self.bad = False
2662
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2663
    verbose = self.op.verbose
2664
    self._feedback_fn = feedback_fn
2665

    
2666
    vg_name = self.cfg.GetVGName()
2667
    drbd_helper = self.cfg.GetDRBDHelper()
2668
    cluster = self.cfg.GetClusterInfo()
2669
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2670
    hypervisors = cluster.enabled_hypervisors
2671
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2672

    
2673
    i_non_redundant = [] # Non redundant instances
2674
    i_non_a_balanced = [] # Non auto-balanced instances
2675
    n_offline = 0 # Count of offline nodes
2676
    n_drained = 0 # Count of nodes being drained
2677
    node_vol_should = {}
2678

    
2679
    # FIXME: verify OS list
2680

    
2681
    # File verification
2682
    filemap = _ComputeAncillaryFiles(cluster, False)
2683

    
2684
    # do local checksums
2685
    master_node = self.master_node = self.cfg.GetMasterNode()
2686
    master_ip = self.cfg.GetMasterIP()
2687

    
2688
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2689

    
2690
    node_verify_param = {
2691
      constants.NV_FILELIST:
2692
        utils.UniqueSequence(filename
2693
                             for files in filemap
2694
                             for filename in files),
2695
      constants.NV_NODELIST:
2696
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2697
                                  self.all_node_info.values()),
2698
      constants.NV_HYPERVISOR: hypervisors,
2699
      constants.NV_HVPARAMS:
2700
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2701
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2702
                                 for node in node_data_list
2703
                                 if not node.offline],
2704
      constants.NV_INSTANCELIST: hypervisors,
2705
      constants.NV_VERSION: None,
2706
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2707
      constants.NV_NODESETUP: None,
2708
      constants.NV_TIME: None,
2709
      constants.NV_MASTERIP: (master_node, master_ip),
2710
      constants.NV_OSLIST: None,
2711
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2712
      }
2713

    
2714
    if vg_name is not None:
2715
      node_verify_param[constants.NV_VGLIST] = None
2716
      node_verify_param[constants.NV_LVLIST] = vg_name
2717
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2718
      node_verify_param[constants.NV_DRBDLIST] = None
2719

    
2720
    if drbd_helper:
2721
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2722

    
2723
    # bridge checks
2724
    # FIXME: this needs to be changed per node-group, not cluster-wide
2725
    bridges = set()
2726
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2727
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2728
      bridges.add(default_nicpp[constants.NIC_LINK])
2729
    for instance in self.my_inst_info.values():
2730
      for nic in instance.nics:
2731
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2732
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2733
          bridges.add(full_nic[constants.NIC_LINK])
2734

    
2735
    if bridges:
2736
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2737

    
2738
    # Build our expected cluster state
2739
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2740
                                                 name=node.name,
2741
                                                 vm_capable=node.vm_capable))
2742
                      for node in node_data_list)
2743

    
2744
    # Gather OOB paths
2745
    oob_paths = []
2746
    for node in self.all_node_info.values():
2747
      path = _SupportsOob(self.cfg, node)
2748
      if path and path not in oob_paths:
2749
        oob_paths.append(path)
2750

    
2751
    if oob_paths:
2752
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2753

    
2754
    for instance in self.my_inst_names:
2755
      inst_config = self.my_inst_info[instance]
2756

    
2757
      for nname in inst_config.all_nodes:
2758
        if nname not in node_image:
2759
          gnode = self.NodeImage(name=nname)
2760
          gnode.ghost = (nname not in self.all_node_info)
2761
          node_image[nname] = gnode
2762

    
2763
      inst_config.MapLVsByNode(node_vol_should)
2764

    
2765
      pnode = inst_config.primary_node
2766
      node_image[pnode].pinst.append(instance)
2767

    
2768
      for snode in inst_config.secondary_nodes:
2769
        nimg = node_image[snode]
2770
        nimg.sinst.append(instance)
2771
        if pnode not in nimg.sbp:
2772
          nimg.sbp[pnode] = []
2773
        nimg.sbp[pnode].append(instance)
2774

    
2775
    # At this point, we have the in-memory data structures complete,
2776
    # except for the runtime information, which we'll gather next
2777

    
2778
    # Due to the way our RPC system works, exact response times cannot be
2779
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2780
    # time before and after executing the request, we can at least have a time
2781
    # window.
2782
    nvinfo_starttime = time.time()
2783
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2784
                                           node_verify_param,
2785
                                           self.cfg.GetClusterName())
2786
    nvinfo_endtime = time.time()
2787

    
2788
    if self.extra_lv_nodes and vg_name is not None:
2789
      extra_lv_nvinfo = \
2790
          self.rpc.call_node_verify(self.extra_lv_nodes,
2791
                                    {constants.NV_LVLIST: vg_name},
2792
                                    self.cfg.GetClusterName())
2793
    else:
2794
      extra_lv_nvinfo = {}
2795

    
2796
    all_drbd_map = self.cfg.ComputeDRBDMap()
2797

    
2798
    feedback_fn("* Gathering disk information (%s nodes)" %
2799
                len(self.my_node_names))
2800
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2801
                                     self.my_inst_info)
2802

    
2803
    feedback_fn("* Verifying configuration file consistency")
2804

    
2805
    # If not all nodes are being checked, we need to make sure the master node
2806
    # and a non-checked vm_capable node are in the list.
2807
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2808
    if absent_nodes:
2809
      vf_nvinfo = all_nvinfo.copy()
2810
      vf_node_info = list(self.my_node_info.values())
2811
      additional_nodes = []
2812
      if master_node not in self.my_node_info:
2813
        additional_nodes.append(master_node)
2814
        vf_node_info.append(self.all_node_info[master_node])
2815
      # Add the first vm_capable node we find which is not included
2816
      for node in absent_nodes:
2817
        nodeinfo = self.all_node_info[node]
2818
        if nodeinfo.vm_capable and not nodeinfo.offline:
2819
          additional_nodes.append(node)
2820
          vf_node_info.append(self.all_node_info[node])
2821
          break
2822
      key = constants.NV_FILELIST
2823
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2824
                                                 {key: node_verify_param[key]},
2825
                                                 self.cfg.GetClusterName()))
2826
    else:
2827
      vf_nvinfo = all_nvinfo
2828
      vf_node_info = self.my_node_info.values()
2829

    
2830
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2831

    
2832
    feedback_fn("* Verifying node status")
2833

    
2834
    refos_img = None
2835

    
2836
    for node_i in node_data_list:
2837
      node = node_i.name
2838
      nimg = node_image[node]
2839

    
2840
      if node_i.offline:
2841
        if verbose:
2842
          feedback_fn("* Skipping offline node %s" % (node,))
2843
        n_offline += 1
2844
        continue
2845

    
2846
      if node == master_node:
2847
        ntype = "master"
2848
      elif node_i.master_candidate:
2849
        ntype = "master candidate"
2850
      elif node_i.drained:
2851
        ntype = "drained"
2852
        n_drained += 1
2853
      else:
2854
        ntype = "regular"
2855
      if verbose:
2856
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2857

    
2858
      msg = all_nvinfo[node].fail_msg
2859
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2860
      if msg:
2861
        nimg.rpc_fail = True
2862
        continue
2863

    
2864
      nresult = all_nvinfo[node].payload
2865

    
2866
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2867
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2868
      self._VerifyNodeNetwork(node_i, nresult)
2869
      self._VerifyOob(node_i, nresult)
2870

    
2871
      if nimg.vm_capable:
2872
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2873
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2874
                             all_drbd_map)
2875

    
2876
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2877
        self._UpdateNodeInstances(node_i, nresult, nimg)
2878
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2879
        self._UpdateNodeOS(node_i, nresult, nimg)
2880

    
2881
        if not nimg.os_fail:
2882
          if refos_img is None:
2883
            refos_img = nimg
2884
          self._VerifyNodeOS(node_i, nimg, refos_img)
2885
        self._VerifyNodeBridges(node_i, nresult, bridges)
2886

    
2887
        # Check whether all running instancies are primary for the node. (This
2888
        # can no longer be done from _VerifyInstance below, since some of the
2889
        # wrong instances could be from other node groups.)
2890
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2891

    
2892
        for inst in non_primary_inst:
2893
          test = inst in self.all_inst_info
2894
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2895
                   "instance should not run on node %s", node_i.name)
2896
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2897
                   "node is running unknown instance %s", inst)
2898

    
2899
    for node, result in extra_lv_nvinfo.items():
2900
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2901
                              node_image[node], vg_name)
2902

    
2903
    feedback_fn("* Verifying instance status")
2904
    for instance in self.my_inst_names:
2905
      if verbose:
2906
        feedback_fn("* Verifying instance %s" % instance)
2907
      inst_config = self.my_inst_info[instance]
2908
      self._VerifyInstance(instance, inst_config, node_image,
2909
                           instdisk[instance])
2910
      inst_nodes_offline = []
2911

    
2912
      pnode = inst_config.primary_node
2913
      pnode_img = node_image[pnode]
2914
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2915
               self.ENODERPC, pnode, "instance %s, connection to"
2916
               " primary node failed", instance)
2917

    
2918
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2919
               self.EINSTANCEBADNODE, instance,
2920
               "instance is marked as running and lives on offline node %s",
2921
               inst_config.primary_node)
2922

    
2923
      # If the instance is non-redundant we cannot survive losing its primary
2924
      # node, so we are not N+1 compliant. On the other hand we have no disk
2925
      # templates with more than one secondary so that situation is not well
2926
      # supported either.
2927
      # FIXME: does not support file-backed instances
2928
      if not inst_config.secondary_nodes:
2929
        i_non_redundant.append(instance)
2930

    
2931
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2932
               instance, "instance has multiple secondary nodes: %s",
2933
               utils.CommaJoin(inst_config.secondary_nodes),
2934
               code=self.ETYPE_WARNING)
2935

    
2936
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2937
        pnode = inst_config.primary_node
2938
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2939
        instance_groups = {}
2940

    
2941
        for node in instance_nodes:
2942
          instance_groups.setdefault(self.all_node_info[node].group,
2943
                                     []).append(node)
2944

    
2945
        pretty_list = [
2946
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2947
          # Sort so that we always list the primary node first.
2948
          for group, nodes in sorted(instance_groups.items(),
2949
                                     key=lambda (_, nodes): pnode in nodes,
2950
                                     reverse=True)]
2951

    
2952
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2953
                      instance, "instance has primary and secondary nodes in"
2954
                      " different groups: %s", utils.CommaJoin(pretty_list),
2955
                      code=self.ETYPE_WARNING)
2956

    
2957
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2958
        i_non_a_balanced.append(instance)
2959

    
2960
      for snode in inst_config.secondary_nodes:
2961
        s_img = node_image[snode]
2962
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2963
                 "instance %s, connection to secondary node failed", instance)
2964

    
2965
        if s_img.offline:
2966
          inst_nodes_offline.append(snode)
2967

    
2968
      # warn that the instance lives on offline nodes
2969
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2970
               "instance has offline secondary node(s) %s",
2971
               utils.CommaJoin(inst_nodes_offline))
2972
      # ... or ghost/non-vm_capable nodes
2973
      for node in inst_config.all_nodes:
2974
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2975
                 "instance lives on ghost node %s", node)
2976
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2977
                 instance, "instance lives on non-vm_capable node %s", node)
2978

    
2979
    feedback_fn("* Verifying orphan volumes")
2980
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2981

    
2982
    # We will get spurious "unknown volume" warnings if any node of this group
2983
    # is secondary for an instance whose primary is in another group. To avoid
2984
    # them, we find these instances and add their volumes to node_vol_should.
2985
    for inst in self.all_inst_info.values():
2986
      for secondary in inst.secondary_nodes:
2987
        if (secondary in self.my_node_info
2988
            and inst.name not in self.my_inst_info):
2989
          inst.MapLVsByNode(node_vol_should)
2990
          break
2991

    
2992
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2993

    
2994
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2995
      feedback_fn("* Verifying N+1 Memory redundancy")
2996
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2997

    
2998
    feedback_fn("* Other Notes")
2999
    if i_non_redundant:
3000
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3001
                  % len(i_non_redundant))
3002

    
3003
    if i_non_a_balanced:
3004
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3005
                  % len(i_non_a_balanced))
3006

    
3007
    if n_offline:
3008
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3009

    
3010
    if n_drained:
3011
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3012

    
3013
    return not self.bad
3014

    
3015
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3016
    """Analyze the post-hooks' result
3017

3018
    This method analyses the hook result, handles it, and sends some
3019
    nicely-formatted feedback back to the user.
3020

3021
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
3022
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3023
    @param hooks_results: the results of the multi-node hooks rpc call
3024
    @param feedback_fn: function used send feedback back to the caller
3025
    @param lu_result: previous Exec result
3026
    @return: the new Exec result, based on the previous result
3027
        and hook results
3028

3029
    """
3030
    # We only really run POST phase hooks, only for non-empty groups,
3031
    # and are only interested in their results
3032
    if not self.my_node_names:
3033
      # empty node group
3034
      pass
3035
    elif phase == constants.HOOKS_PHASE_POST:
3036
      # Used to change hooks' output to proper indentation
3037
      feedback_fn("* Hooks Results")
3038
      assert hooks_results, "invalid result from hooks"
3039

    
3040
      for node_name in hooks_results:
3041
        res = hooks_results[node_name]
3042
        msg = res.fail_msg
3043
        test = msg and not res.offline
3044
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
3045
                      "Communication failure in hooks execution: %s", msg)
3046
        if res.offline or msg:
3047
          # No need to investigate payload if node is offline or gave
3048
          # an error.
3049
          continue
3050
        for script, hkr, output in res.payload:
3051
          test = hkr == constants.HKR_FAIL
3052
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
3053
                        "Script %s failed, output:", script)
3054
          if test:
3055
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3056
            feedback_fn("%s" % output)
3057
            lu_result = False
3058

    
3059
    return lu_result
3060

    
3061

    
3062
class LUClusterVerifyDisks(NoHooksLU):
3063
  """Verifies the cluster disks status.
3064

3065
  """
3066
  REQ_BGL = False
3067

    
3068
  def ExpandNames(self):
3069
    self.share_locks = _ShareAll()
3070
    self.needed_locks = {
3071
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3072
      }
3073

    
3074
  def Exec(self, feedback_fn):
3075
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3076

    
3077
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3078
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3079
                           for group in group_names])
3080

    
3081

    
3082
class LUGroupVerifyDisks(NoHooksLU):
3083
  """Verifies the status of all disks in a node group.
3084

3085
  """
3086
  REQ_BGL = False
3087

    
3088
  def ExpandNames(self):
3089
    # Raises errors.OpPrereqError on its own if group can't be found
3090
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3091

    
3092
    self.share_locks = _ShareAll()
3093
    self.needed_locks = {
3094
      locking.LEVEL_INSTANCE: [],
3095
      locking.LEVEL_NODEGROUP: [],
3096
      locking.LEVEL_NODE: [],
3097
      }
3098

    
3099
  def DeclareLocks(self, level):
3100
    if level == locking.LEVEL_INSTANCE:
3101
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3102

    
3103
      # Lock instances optimistically, needs verification once node and group
3104
      # locks have been acquired
3105
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3106
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3107

    
3108
    elif level == locking.LEVEL_NODEGROUP:
3109
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3110

    
3111
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3112
        set([self.group_uuid] +
3113
            # Lock all groups used by instances optimistically; this requires
3114
            # going via the node before it's locked, requiring verification
3115
            # later on
3116
            [group_uuid
3117
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3118
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3119

    
3120
    elif level == locking.LEVEL_NODE:
3121
      # This will only lock the nodes in the group to be verified which contain
3122
      # actual instances
3123
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3124
      self._LockInstancesNodes()
3125

    
3126
      # Lock all nodes in group to be verified
3127
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3128
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3129
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3130

    
3131
  def CheckPrereq(self):
3132
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3133
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3134
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3135

    
3136
    assert self.group_uuid in owned_groups
3137

    
3138
    # Check if locked instances are still correct
3139
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3140

    
3141
    # Get instance information
3142
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3143

    
3144
    # Check if node groups for locked instances are still correct
3145
    _CheckInstancesNodeGroups(self.cfg, self.instances,
3146
                              owned_groups, owned_nodes, self.group_uuid)
3147

    
3148
  def Exec(self, feedback_fn):
3149
    """Verify integrity of cluster disks.
3150

3151
    @rtype: tuple of three items
3152
    @return: a tuple of (dict of node-to-node_error, list of instances
3153
        which need activate-disks, dict of instance: (node, volume) for
3154
        missing volumes
3155

3156
    """
3157
    res_nodes = {}
3158
    res_instances = set()
3159
    res_missing = {}
3160

    
3161
    nv_dict = _MapInstanceDisksToNodes([inst
3162
                                        for inst in self.instances.values()
3163
                                        if inst.admin_up])
3164

    
3165
    if nv_dict:
3166
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3167
                             set(self.cfg.GetVmCapableNodeList()))
3168

    
3169
      node_lvs = self.rpc.call_lv_list(nodes, [])
3170

    
3171
      for (node, node_res) in node_lvs.items():
3172
        if node_res.offline:
3173
          continue
3174

    
3175
        msg = node_res.fail_msg
3176
        if msg:
3177
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3178
          res_nodes[node] = msg
3179
          continue
3180

    
3181
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3182
          inst = nv_dict.pop((node, lv_name), None)
3183
          if not (lv_online or inst is None):
3184
            res_instances.add(inst)
3185

    
3186
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3187
      # better
3188
      for key, inst in nv_dict.iteritems():
3189
        res_missing.setdefault(inst, []).append(list(key))
3190

    
3191
    return (res_nodes, list(res_instances), res_missing)
3192

    
3193

    
3194
class LUClusterRepairDiskSizes(NoHooksLU):
3195
  """Verifies the cluster disks sizes.
3196

3197
  """
3198
  REQ_BGL = False
3199

    
3200
  def ExpandNames(self):
3201
    if self.op.instances:
3202
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3203
      self.needed_locks = {
3204
        locking.LEVEL_NODE: [],
3205
        locking.LEVEL_INSTANCE: self.wanted_names,
3206
        }
3207
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3208
    else:
3209
      self.wanted_names = None
3210
      self.needed_locks = {
3211
        locking.LEVEL_NODE: locking.ALL_SET,
3212
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3213
        }
3214
    self.share_locks = {
3215
      locking.LEVEL_NODE: 1,
3216
      locking.LEVEL_INSTANCE: 0,
3217
      }
3218

    
3219
  def DeclareLocks(self, level):
3220
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3221
      self._LockInstancesNodes(primary_only=True)
3222

    
3223
  def CheckPrereq(self):
3224
    """Check prerequisites.
3225

3226
    This only checks the optional instance list against the existing names.
3227

3228
    """
3229
    if self.wanted_names is None:
3230
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3231

    
3232
    self.wanted_instances = \
3233
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3234

    
3235
  def _EnsureChildSizes(self, disk):
3236
    """Ensure children of the disk have the needed disk size.
3237

3238
    This is valid mainly for DRBD8 and fixes an issue where the
3239
    children have smaller disk size.
3240

3241
    @param disk: an L{ganeti.objects.Disk} object
3242

3243
    """
3244
    if disk.dev_type == constants.LD_DRBD8:
3245
      assert disk.children, "Empty children for DRBD8?"
3246
      fchild = disk.children[0]
3247
      mismatch = fchild.size < disk.size
3248
      if mismatch:
3249
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3250
                     fchild.size, disk.size)
3251
        fchild.size = disk.size
3252

    
3253
      # and we recurse on this child only, not on the metadev
3254
      return self._EnsureChildSizes(fchild) or mismatch
3255
    else:
3256
      return False
3257

    
3258
  def Exec(self, feedback_fn):
3259
    """Verify the size of cluster disks.
3260

3261
    """
3262
    # TODO: check child disks too
3263
    # TODO: check differences in size between primary/secondary nodes
3264
    per_node_disks = {}
3265
    for instance in self.wanted_instances:
3266
      pnode = instance.primary_node
3267
      if pnode not in per_node_disks:
3268
        per_node_disks[pnode] = []
3269
      for idx, disk in enumerate(instance.disks):
3270
        per_node_disks[pnode].append((instance, idx, disk))
3271

    
3272
    changed = []
3273
    for node, dskl in per_node_disks.items():
3274
      newl = [v[2].Copy() for v in dskl]
3275
      for dsk in newl:
3276
        self.cfg.SetDiskID(dsk, node)
3277
      result = self.rpc.call_blockdev_getsize(node, newl)
3278
      if result.fail_msg:
3279
        self.LogWarning("Failure in blockdev_getsize call to node"
3280
                        " %s, ignoring", node)
3281
        continue
3282
      if len(result.payload) != len(dskl):
3283
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3284
                        " result.payload=%s", node, len(dskl), result.payload)
3285
        self.LogWarning("Invalid result from node %s, ignoring node results",
3286
                        node)
3287
        continue
3288
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3289
        if size is None:
3290
          self.LogWarning("Disk %d of instance %s did not return size"
3291
                          " information, ignoring", idx, instance.name)
3292
          continue
3293
        if not isinstance(size, (int, long)):
3294
          self.LogWarning("Disk %d of instance %s did not return valid"
3295
                          " size information, ignoring", idx, instance.name)
3296
          continue
3297
        size = size >> 20
3298
        if size != disk.size:
3299
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3300
                       " correcting: recorded %d, actual %d", idx,
3301
                       instance.name, disk.size, size)
3302
          disk.size = size
3303
          self.cfg.Update(instance, feedback_fn)
3304
          changed.append((instance.name, idx, size))
3305
        if self._EnsureChildSizes(disk):
3306
          self.cfg.Update(instance, feedback_fn)
3307
          changed.append((instance.name, idx, disk.size))
3308
    return changed
3309

    
3310

    
3311
class LUClusterRename(LogicalUnit):
3312
  """Rename the cluster.
3313

3314
  """
3315
  HPATH = "cluster-rename"
3316
  HTYPE = constants.HTYPE_CLUSTER
3317

    
3318
  def BuildHooksEnv(self):
3319
    """Build hooks env.
3320

3321
    """
3322
    return {
3323
      "OP_TARGET": self.cfg.GetClusterName(),
3324
      "NEW_NAME": self.op.name,
3325
      }
3326

    
3327
  def BuildHooksNodes(self):
3328
    """Build hooks nodes.
3329

3330
    """
3331
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3332

    
3333
  def CheckPrereq(self):
3334
    """Verify that the passed name is a valid one.
3335

3336
    """
3337
    hostname = netutils.GetHostname(name=self.op.name,
3338
                                    family=self.cfg.GetPrimaryIPFamily())
3339

    
3340
    new_name = hostname.name
3341
    self.ip = new_ip = hostname.ip
3342
    old_name = self.cfg.GetClusterName()
3343
    old_ip = self.cfg.GetMasterIP()
3344
    if new_name == old_name and new_ip == old_ip:
3345
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3346
                                 " cluster has changed",
3347
                                 errors.ECODE_INVAL)
3348
    if new_ip != old_ip:
3349
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3350
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3351
                                   " reachable on the network" %
3352
                                   new_ip, errors.ECODE_NOTUNIQUE)
3353

    
3354
    self.op.name = new_name
3355

    
3356
  def Exec(self, feedback_fn):
3357
    """Rename the cluster.
3358

3359
    """
3360
    clustername = self.op.name
3361
    ip = self.ip
3362

    
3363
    # shutdown the master IP
3364
    master = self.cfg.GetMasterNode()
3365
    result = self.rpc.call_node_deactivate_master_ip(master)
3366
    result.Raise("Could not disable the master role")
3367

    
3368
    try:
3369
      cluster = self.cfg.GetClusterInfo()
3370
      cluster.cluster_name = clustername
3371
      cluster.master_ip = ip
3372
      self.cfg.Update(cluster, feedback_fn)
3373

    
3374
      # update the known hosts file
3375
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3376
      node_list = self.cfg.GetOnlineNodeList()
3377
      try:
3378
        node_list.remove(master)
3379
      except ValueError:
3380
        pass
3381
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3382
    finally:
3383
      result = self.rpc.call_node_activate_master_ip(master)
3384
      msg = result.fail_msg
3385
      if msg:
3386
        self.LogWarning("Could not re-enable the master role on"
3387
                        " the master, please restart manually: %s", msg)
3388

    
3389
    return clustername
3390

    
3391

    
3392
class LUClusterSetParams(LogicalUnit):
3393
  """Change the parameters of the cluster.
3394

3395
  """
3396
  HPATH = "cluster-modify"
3397
  HTYPE = constants.HTYPE_CLUSTER
3398
  REQ_BGL = False
3399

    
3400
  def CheckArguments(self):
3401
    """Check parameters
3402

3403
    """
3404
    if self.op.uid_pool:
3405
      uidpool.CheckUidPool(self.op.uid_pool)
3406

    
3407
    if self.op.add_uids:
3408
      uidpool.CheckUidPool(self.op.add_uids)
3409

    
3410
    if self.op.remove_uids:
3411
      uidpool.CheckUidPool(self.op.remove_uids)
3412

    
3413
  def ExpandNames(self):
3414
    # FIXME: in the future maybe other cluster params won't require checking on
3415
    # all nodes to be modified.
3416
    self.needed_locks = {
3417
      locking.LEVEL_NODE: locking.ALL_SET,
3418
    }
3419
    self.share_locks[locking.LEVEL_NODE] = 1
3420

    
3421
  def BuildHooksEnv(self):
3422
    """Build hooks env.
3423

3424
    """
3425
    return {
3426
      "OP_TARGET": self.cfg.GetClusterName(),
3427
      "NEW_VG_NAME": self.op.vg_name,
3428
      }
3429

    
3430
  def BuildHooksNodes(self):
3431
    """Build hooks nodes.
3432

3433
    """
3434
    mn = self.cfg.GetMasterNode()
3435
    return ([mn], [mn])
3436

    
3437
  def CheckPrereq(self):
3438
    """Check prerequisites.
3439

3440
    This checks whether the given params don't conflict and
3441
    if the given volume group is valid.
3442

3443
    """
3444
    if self.op.vg_name is not None and not self.op.vg_name:
3445
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3446
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3447
                                   " instances exist", errors.ECODE_INVAL)
3448

    
3449
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3450
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3451
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3452
                                   " drbd-based instances exist",
3453
                                   errors.ECODE_INVAL)
3454

    
3455
    node_list = self.owned_locks(locking.LEVEL_NODE)
3456

    
3457
    # if vg_name not None, checks given volume group on all nodes
3458
    if self.op.vg_name:
3459
      vglist = self.rpc.call_vg_list(node_list)
3460
      for node in node_list:
3461
        msg = vglist[node].fail_msg
3462
        if msg:
3463
          # ignoring down node
3464
          self.LogWarning("Error while gathering data on node %s"
3465
                          " (ignoring node): %s", node, msg)
3466
          continue
3467
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3468
                                              self.op.vg_name,
3469
                                              constants.MIN_VG_SIZE)
3470
        if vgstatus:
3471
          raise errors.OpPrereqError("Error on node '%s': %s" %
3472
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3473

    
3474
    if self.op.drbd_helper:
3475
      # checks given drbd helper on all nodes
3476
      helpers = self.rpc.call_drbd_helper(node_list)
3477
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3478
        if ninfo.offline:
3479
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3480
          continue
3481
        msg = helpers[node].fail_msg
3482
        if msg:
3483
          raise errors.OpPrereqError("Error checking drbd helper on node"
3484
                                     " '%s': %s" % (node, msg),
3485
                                     errors.ECODE_ENVIRON)
3486
        node_helper = helpers[node].payload
3487
        if node_helper != self.op.drbd_helper:
3488
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3489
                                     (node, node_helper), errors.ECODE_ENVIRON)
3490

    
3491
    self.cluster = cluster = self.cfg.GetClusterInfo()
3492
    # validate params changes
3493
    if self.op.beparams:
3494
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3495
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3496

    
3497
    if self.op.ndparams:
3498
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3499
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3500

    
3501
      # TODO: we need a more general way to handle resetting
3502
      # cluster-level parameters to default values
3503
      if self.new_ndparams["oob_program"] == "":
3504
        self.new_ndparams["oob_program"] = \
3505
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3506

    
3507
    if self.op.nicparams:
3508
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3509
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3510
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3511
      nic_errors = []
3512

    
3513
      # check all instances for consistency
3514
      for instance in self.cfg.GetAllInstancesInfo().values():
3515
        for nic_idx, nic in enumerate(instance.nics):
3516
          params_copy = copy.deepcopy(nic.nicparams)
3517
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3518

    
3519
          # check parameter syntax
3520
          try:
3521
            objects.NIC.CheckParameterSyntax(params_filled)
3522
          except errors.ConfigurationError, err:
3523
            nic_errors.append("Instance %s, nic/%d: %s" %
3524
                              (instance.name, nic_idx, err))
3525

    
3526
          # if we're moving instances to routed, check that they have an ip
3527
          target_mode = params_filled[constants.NIC_MODE]
3528
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3529
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3530
                              " address" % (instance.name, nic_idx))
3531
      if nic_errors:
3532
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3533
                                   "\n".join(nic_errors))
3534

    
3535
    # hypervisor list/parameters
3536
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3537
    if self.op.hvparams:
3538
      for hv_name, hv_dict in self.op.hvparams.items():
3539
        if hv_name not in self.new_hvparams:
3540
          self.new_hvparams[hv_name] = hv_dict
3541
        else:
3542
          self.new_hvparams[hv_name].update(hv_dict)
3543

    
3544
    # os hypervisor parameters
3545
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3546
    if self.op.os_hvp:
3547
      for os_name, hvs in self.op.os_hvp.items():
3548
        if os_name not in self.new_os_hvp:
3549
          self.new_os_hvp[os_name] = hvs
3550
        else:
3551
          for hv_name, hv_dict in hvs.items():
3552
            if hv_name not in self.new_os_hvp[os_name]:
3553
              self.new_os_hvp[os_name][hv_name] = hv_dict
3554
            else:
3555
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3556

    
3557
    # os parameters
3558
    self.new_osp = objects.FillDict(cluster.osparams, {})
3559
    if self.op.osparams:
3560
      for os_name, osp in self.op.osparams.items():
3561
        if os_name not in self.new_osp:
3562
          self.new_osp[os_name] = {}
3563

    
3564
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3565
                                                  use_none=True)
3566

    
3567
        if not self.new_osp[os_name]:
3568
          # we removed all parameters
3569
          del self.new_osp[os_name]
3570
        else:
3571
          # check the parameter validity (remote check)
3572
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3573
                         os_name, self.new_osp[os_name])
3574

    
3575
    # changes to the hypervisor list
3576
    if self.op.enabled_hypervisors is not None:
3577
      self.hv_list = self.op.enabled_hypervisors
3578
      for hv in self.hv_list:
3579
        # if the hypervisor doesn't already exist in the cluster
3580
        # hvparams, we initialize it to empty, and then (in both
3581
        # cases) we make sure to fill the defaults, as we might not
3582
        # have a complete defaults list if the hypervisor wasn't
3583
        # enabled before
3584
        if hv not in new_hvp:
3585
          new_hvp[hv] = {}
3586
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3587
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3588
    else:
3589
      self.hv_list = cluster.enabled_hypervisors
3590

    
3591
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3592
      # either the enabled list has changed, or the parameters have, validate
3593
      for hv_name, hv_params in self.new_hvparams.items():
3594
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3595
            (self.op.enabled_hypervisors and
3596
             hv_name in self.op.enabled_hypervisors)):
3597
          # either this is a new hypervisor, or its parameters have changed
3598
          hv_class = hypervisor.GetHypervisor(hv_name)
3599
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3600
          hv_class.CheckParameterSyntax(hv_params)
3601
          _CheckHVParams(self, node_list, hv_name, hv_params)
3602

    
3603
    if self.op.os_hvp:
3604
      # no need to check any newly-enabled hypervisors, since the
3605
      # defaults have already been checked in the above code-block
3606
      for os_name, os_hvp in self.new_os_hvp.items():
3607
        for hv_name, hv_params in os_hvp.items():
3608
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3609
          # we need to fill in the new os_hvp on top of the actual hv_p
3610
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3611
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3612
          hv_class = hypervisor.GetHypervisor(hv_name)
3613
          hv_class.CheckParameterSyntax(new_osp)
3614
          _CheckHVParams(self, node_list, hv_name, new_osp)
3615

    
3616
    if self.op.default_iallocator:
3617
      alloc_script = utils.FindFile(self.op.default_iallocator,
3618
                                    constants.IALLOCATOR_SEARCH_PATH,
3619
                                    os.path.isfile)
3620
      if alloc_script is None:
3621
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3622
                                   " specified" % self.op.default_iallocator,
3623
                                   errors.ECODE_INVAL)
3624

    
3625
  def Exec(self, feedback_fn):
3626
    """Change the parameters of the cluster.
3627

3628
    """
3629
    if self.op.vg_name is not None:
3630
      new_volume = self.op.vg_name
3631
      if not new_volume:
3632
        new_volume = None
3633
      if new_volume != self.cfg.GetVGName():
3634
        self.cfg.SetVGName(new_volume)
3635
      else:
3636
        feedback_fn("Cluster LVM configuration already in desired"
3637
                    " state, not changing")
3638
    if self.op.drbd_helper is not None:
3639
      new_helper = self.op.drbd_helper
3640
      if not new_helper:
3641
        new_helper = None
3642
      if new_helper != self.cfg.GetDRBDHelper():
3643
        self.cfg.SetDRBDHelper(new_helper)
3644
      else:
3645
        feedback_fn("Cluster DRBD helper already in desired state,"
3646
                    " not changing")
3647
    if self.op.hvparams:
3648
      self.cluster.hvparams = self.new_hvparams
3649
    if self.op.os_hvp:
3650
      self.cluster.os_hvp = self.new_os_hvp
3651
    if self.op.enabled_hypervisors is not None:
3652
      self.cluster.hvparams = self.new_hvparams
3653
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3654
    if self.op.beparams:
3655
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3656
    if self.op.nicparams:
3657
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3658
    if self.op.osparams:
3659
      self.cluster.osparams = self.new_osp
3660
    if self.op.ndparams:
3661
      self.cluster.ndparams = self.new_ndparams
3662

    
3663
    if self.op.candidate_pool_size is not None:
3664
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3665
      # we need to update the pool size here, otherwise the save will fail
3666
      _AdjustCandidatePool(self, [])
3667

    
3668
    if self.op.maintain_node_health is not None:
3669
      self.cluster.maintain_node_health = self.op.maintain_node_health
3670

    
3671
    if self.op.prealloc_wipe_disks is not None:
3672
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3673

    
3674
    if self.op.add_uids is not None:
3675
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3676

    
3677
    if self.op.remove_uids is not None:
3678
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3679

    
3680
    if self.op.uid_pool is not None:
3681
      self.cluster.uid_pool = self.op.uid_pool
3682

    
3683
    if self.op.default_iallocator is not None:
3684
      self.cluster.default_iallocator = self.op.default_iallocator
3685

    
3686
    if self.op.reserved_lvs is not None:
3687
      self.cluster.reserved_lvs = self.op.reserved_lvs
3688

    
3689
    def helper_os(aname, mods, desc):
3690
      desc += " OS list"
3691
      lst = getattr(self.cluster, aname)
3692
      for key, val in mods:
3693
        if key == constants.DDM_ADD:
3694
          if val in lst:
3695
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3696
          else:
3697
            lst.append(val)
3698
        elif key == constants.DDM_REMOVE:
3699
          if val in lst:
3700
            lst.remove(val)
3701
          else:
3702
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3703
        else:
3704
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3705

    
3706
    if self.op.hidden_os:
3707
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3708

    
3709
    if self.op.blacklisted_os:
3710
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3711

    
3712
    if self.op.master_netdev:
3713
      master = self.cfg.GetMasterNode()
3714
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3715
                  self.cluster.master_netdev)
3716
      result = self.rpc.call_node_deactivate_master_ip(master)
3717
      result.Raise("Could not disable the master ip")
3718
      feedback_fn("Changing master_netdev from %s to %s" %
3719
                  (self.cluster.master_netdev, self.op.master_netdev))
3720
      self.cluster.master_netdev = self.op.master_netdev
3721

    
3722
    self.cfg.Update(self.cluster, feedback_fn)
3723

    
3724
    if self.op.master_netdev:
3725
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3726
                  self.op.master_netdev)
3727
      result = self.rpc.call_node_activate_master_ip(master)
3728
      if result.fail_msg:
3729
        self.LogWarning("Could not re-enable the master ip on"
3730
                        " the master, please restart manually: %s",
3731
                        result.fail_msg)
3732

    
3733

    
3734
def _UploadHelper(lu, nodes, fname):
3735
  """Helper for uploading a file and showing warnings.
3736

3737
  """
3738
  if os.path.exists(fname):
3739
    result = lu.rpc.call_upload_file(nodes, fname)
3740
    for to_node, to_result in result.items():
3741
      msg = to_result.fail_msg
3742
      if msg:
3743
        msg = ("Copy of file %s to node %s failed: %s" %
3744
               (fname, to_node, msg))
3745
        lu.proc.LogWarning(msg)
3746

    
3747

    
3748
def _ComputeAncillaryFiles(cluster, redist):
3749
  """Compute files external to Ganeti which need to be consistent.
3750

3751
  @type redist: boolean
3752
  @param redist: Whether to include files which need to be redistributed
3753

3754
  """
3755
  # Compute files for all nodes
3756
  files_all = set([
3757
    constants.SSH_KNOWN_HOSTS_FILE,
3758
    constants.CONFD_HMAC_KEY,
3759
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3760
    constants.RAPI_USERS_FILE,
3761
    ])
3762

    
3763
  if not redist:
3764
    files_all.update(constants.ALL_CERT_FILES)
3765
    files_all.update(ssconf.SimpleStore().GetFileList())
3766
  else:
3767
    # we need to ship at least the RAPI certificate
3768
    files_all.add(constants.RAPI_CERT_FILE)
3769

    
3770
  if cluster.modify_etc_hosts:
3771
    files_all.add(constants.ETC_HOSTS)
3772

    
3773
  # Files which are optional, these must:
3774
  # - be present in one other category as well
3775
  # - either exist or not exist on all nodes of that category (mc, vm all)
3776
  files_opt = set([
3777
    constants.RAPI_USERS_FILE,
3778
    ])
3779

    
3780
  # Files which should only be on master candidates
3781
  files_mc = set()
3782
  if not redist:
3783
    files_mc.add(constants.CLUSTER_CONF_FILE)
3784

    
3785
  # Files which should only be on VM-capable nodes
3786
  files_vm = set(filename
3787
    for hv_name in cluster.enabled_hypervisors
3788
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3789

    
3790
  files_opt |= set(filename
3791
    for hv_name in cluster.enabled_hypervisors
3792
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3793

    
3794
  # Filenames in each category must be unique
3795
  all_files_set = files_all | files_mc | files_vm
3796
  assert (len(all_files_set) ==
3797
          sum(map(len, [files_all, files_mc, files_vm]))), \
3798
         "Found file listed in more than one file list"
3799

    
3800
  # Optional files must be present in one other category
3801
  assert all_files_set.issuperset(files_opt), \
3802
         "Optional file not in a different required list"
3803

    
3804
  return (files_all, files_opt, files_mc, files_vm)
3805

    
3806

    
3807
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3808
  """Distribute additional files which are part of the cluster configuration.
3809

3810
  ConfigWriter takes care of distributing the config and ssconf files, but
3811
  there are more files which should be distributed to all nodes. This function
3812
  makes sure those are copied.
3813

3814
  @param lu: calling logical unit
3815
  @param additional_nodes: list of nodes not in the config to distribute to
3816
  @type additional_vm: boolean
3817
  @param additional_vm: whether the additional nodes are vm-capable or not
3818

3819
  """
3820
  # Gather target nodes
3821
  cluster = lu.cfg.GetClusterInfo()
3822
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3823

    
3824
  online_nodes = lu.cfg.GetOnlineNodeList()
3825
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3826

    
3827
  if additional_nodes is not None:
3828
    online_nodes.extend(additional_nodes)
3829
    if additional_vm:
3830
      vm_nodes.extend(additional_nodes)
3831

    
3832
  # Never distribute to master node
3833
  for nodelist in [online_nodes, vm_nodes]:
3834
    if master_info.name in nodelist:
3835
      nodelist.remove(master_info.name)
3836

    
3837
  # Gather file lists
3838
  (files_all, _, files_mc, files_vm) = \
3839
    _ComputeAncillaryFiles(cluster, True)
3840

    
3841
  # Never re-distribute configuration file from here
3842
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3843
              constants.CLUSTER_CONF_FILE in files_vm)
3844
  assert not files_mc, "Master candidates not handled in this function"
3845

    
3846
  filemap = [
3847
    (online_nodes, files_all),
3848
    (vm_nodes, files_vm),
3849
    ]
3850

    
3851
  # Upload the files
3852
  for (node_list, files) in filemap:
3853
    for fname in files:
3854
      _UploadHelper(lu, node_list, fname)
3855

    
3856

    
3857
class LUClusterRedistConf(NoHooksLU):
3858
  """Force the redistribution of cluster configuration.
3859

3860
  This is a very simple LU.
3861

3862
  """
3863
  REQ_BGL = False
3864

    
3865
  def ExpandNames(self):
3866
    self.needed_locks = {
3867
      locking.LEVEL_NODE: locking.ALL_SET,
3868
    }
3869
    self.share_locks[locking.LEVEL_NODE] = 1
3870

    
3871
  def Exec(self, feedback_fn):
3872
    """Redistribute the configuration.
3873

3874
    """
3875
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3876
    _RedistributeAncillaryFiles(self)
3877

    
3878

    
3879
class LUClusterActivateMasterIp(NoHooksLU):
3880
  """Activate the master IP on the master node.
3881

3882
  """
3883
  def Exec(self, feedback_fn):
3884
    """Activate the master IP.
3885

3886
    """
3887
    master = self.cfg.GetMasterNode()
3888
    result = self.rpc.call_node_activate_master_ip(master)
3889
    result.Raise("Could not activate the master IP")
3890

    
3891

    
3892
class LUClusterDeactivateMasterIp(NoHooksLU):
3893
  """Deactivate the master IP on the master node.
3894

3895
  """
3896
  def Exec(self, feedback_fn):
3897
    """Deactivate the master IP.
3898

3899
    """
3900
    master = self.cfg.GetMasterNode()
3901
    result = self.rpc.call_node_deactivate_master_ip(master)
3902
    result.Raise("Could not deactivate the master IP")
3903

    
3904

    
3905
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3906
  """Sleep and poll for an instance's disk to sync.
3907

3908
  """
3909
  if not instance.disks or disks is not None and not disks:
3910
    return True
3911

    
3912
  disks = _ExpandCheckDisks(instance, disks)
3913

    
3914
  if not oneshot:
3915
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3916

    
3917
  node = instance.primary_node
3918

    
3919
  for dev in disks:
3920
    lu.cfg.SetDiskID(dev, node)
3921

    
3922
  # TODO: Convert to utils.Retry
3923

    
3924
  retries = 0
3925
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3926
  while True:
3927
    max_time = 0
3928
    done = True
3929
    cumul_degraded = False
3930
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3931
    msg = rstats.fail_msg
3932
    if msg:
3933
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3934
      retries += 1
3935
      if retries >= 10:
3936
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3937
                                 " aborting." % node)
3938
      time.sleep(6)
3939
      continue
3940
    rstats = rstats.payload
3941
    retries = 0
3942
    for i, mstat in enumerate(rstats):
3943
      if mstat is None:
3944
        lu.LogWarning("Can't compute data for node %s/%s",
3945
                           node, disks[i].iv_name)
3946
        continue
3947

    
3948
      cumul_degraded = (cumul_degraded or
3949
                        (mstat.is_degraded and mstat.sync_percent is None))
3950
      if mstat.sync_percent is not None:
3951
        done = False
3952
        if mstat.estimated_time is not None:
3953
          rem_time = ("%s remaining (estimated)" %
3954
                      utils.FormatSeconds(mstat.estimated_time))
3955
          max_time = mstat.estimated_time
3956
        else:
3957
          rem_time = "no time estimate"
3958
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3959
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3960

    
3961
    # if we're done but degraded, let's do a few small retries, to
3962
    # make sure we see a stable and not transient situation; therefore
3963
    # we force restart of the loop
3964
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3965
      logging.info("Degraded disks found, %d retries left", degr_retries)
3966
      degr_retries -= 1
3967
      time.sleep(1)
3968
      continue
3969

    
3970
    if done or oneshot:
3971
      break
3972

    
3973
    time.sleep(min(60, max_time))
3974

    
3975
  if done:
3976
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3977
  return not cumul_degraded
3978

    
3979

    
3980
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3981
  """Check that mirrors are not degraded.
3982

3983
  The ldisk parameter, if True, will change the test from the
3984
  is_degraded attribute (which represents overall non-ok status for
3985
  the device(s)) to the ldisk (representing the local storage status).
3986

3987
  """
3988
  lu.cfg.SetDiskID(dev, node)
3989

    
3990
  result = True
3991

    
3992
  if on_primary or dev.AssembleOnSecondary():
3993
    rstats = lu.rpc.call_blockdev_find(node, dev)
3994
    msg = rstats.fail_msg
3995
    if msg:
3996
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3997
      result = False
3998
    elif not rstats.payload:
3999
      lu.LogWarning("Can't find disk on node %s", node)
4000
      result = False
4001
    else:
4002
      if ldisk:
4003
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4004
      else:
4005
        result = result and not rstats.payload.is_degraded
4006

    
4007
  if dev.children:
4008
    for child in dev.children:
4009
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4010

    
4011
  return result
4012

    
4013

    
4014
class LUOobCommand(NoHooksLU):
4015
  """Logical unit for OOB handling.
4016

4017
  """
4018
  REG_BGL = False
4019
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4020

    
4021
  def ExpandNames(self):
4022
    """Gather locks we need.
4023

4024
    """
4025
    if self.op.node_names:
4026
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4027
      lock_names = self.op.node_names
4028
    else:
4029
      lock_names = locking.ALL_SET
4030

    
4031
    self.needed_locks = {
4032
      locking.LEVEL_NODE: lock_names,
4033
      }
4034

    
4035
  def CheckPrereq(self):
4036
    """Check prerequisites.
4037

4038
    This checks:
4039
     - the node exists in the configuration
4040
     - OOB is supported
4041

4042
    Any errors are signaled by raising errors.OpPrereqError.
4043

4044
    """
4045
    self.nodes = []
4046
    self.master_node = self.cfg.GetMasterNode()
4047

    
4048
    assert self.op.power_delay >= 0.0
4049

    
4050
    if self.op.node_names:
4051
      if (self.op.command in self._SKIP_MASTER and
4052
          self.master_node in self.op.node_names):
4053
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4054
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4055

    
4056
        if master_oob_handler:
4057
          additional_text = ("run '%s %s %s' if you want to operate on the"
4058
                             " master regardless") % (master_oob_handler,
4059
                                                      self.op.command,
4060
                                                      self.master_node)
4061
        else:
4062
          additional_text = "it does not support out-of-band operations"
4063

    
4064
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4065
                                    " allowed for %s; %s") %
4066
                                   (self.master_node, self.op.command,
4067
                                    additional_text), errors.ECODE_INVAL)
4068
    else:
4069
      self.op.node_names = self.cfg.GetNodeList()
4070
      if self.op.command in self._SKIP_MASTER:
4071
        self.op.node_names.remove(self.master_node)
4072

    
4073
    if self.op.command in self._SKIP_MASTER:
4074
      assert self.master_node not in self.op.node_names
4075

    
4076
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4077
      if node is None:
4078
        raise errors.OpPrereqError("Node %s not found" % node_name,
4079
                                   errors.ECODE_NOENT)
4080
      else:
4081
        self.nodes.append(node)
4082

    
4083
      if (not self.op.ignore_status and
4084
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4085
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4086
                                    " not marked offline") % node_name,
4087
                                   errors.ECODE_STATE)
4088

    
4089
  def Exec(self, feedback_fn):
4090
    """Execute OOB and return result if we expect any.
4091

4092
    """
4093
    master_node = self.master_node
4094
    ret = []
4095

    
4096
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4097
                                              key=lambda node: node.name)):
4098
      node_entry = [(constants.RS_NORMAL, node.name)]
4099
      ret.append(node_entry)
4100

    
4101
      oob_program = _SupportsOob(self.cfg, node)
4102

    
4103
      if not oob_program:
4104
        node_entry.append((constants.RS_UNAVAIL, None))
4105
        continue
4106

    
4107
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4108
                   self.op.command, oob_program, node.name)
4109
      result = self.rpc.call_run_oob(master_node, oob_program,
4110
                                     self.op.command, node.name,
4111
                                     self.op.timeout)
4112

    
4113
      if result.fail_msg:
4114
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4115
                        node.name, result.fail_msg)
4116
        node_entry.append((constants.RS_NODATA, None))
4117
      else:
4118
        try:
4119
          self._CheckPayload(result)
4120
        except errors.OpExecError, err:
4121
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4122
                          node.name, err)
4123
          node_entry.append((constants.RS_NODATA, None))
4124
        else:
4125
          if self.op.command == constants.OOB_HEALTH:
4126
            # For health we should log important events
4127
            for item, status in result.payload:
4128
              if status in [constants.OOB_STATUS_WARNING,
4129
                            constants.OOB_STATUS_CRITICAL]:
4130
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4131
                                item, node.name, status)
4132

    
4133
          if self.op.command == constants.OOB_POWER_ON:
4134
            node.powered = True
4135
          elif self.op.command == constants.OOB_POWER_OFF:
4136
            node.powered = False
4137
          elif self.op.command == constants.OOB_POWER_STATUS:
4138
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4139
            if powered != node.powered:
4140
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4141
                               " match actual power state (%s)"), node.powered,
4142
                              node.name, powered)
4143

    
4144
          # For configuration changing commands we should update the node
4145
          if self.op.command in (constants.OOB_POWER_ON,
4146
                                 constants.OOB_POWER_OFF):
4147
            self.cfg.Update(node, feedback_fn)
4148

    
4149
          node_entry.append((constants.RS_NORMAL, result.payload))
4150

    
4151
          if (self.op.command == constants.OOB_POWER_ON and
4152
              idx < len(self.nodes) - 1):
4153
            time.sleep(self.op.power_delay)
4154

    
4155
    return ret
4156

    
4157
  def _CheckPayload(self, result):
4158
    """Checks if the payload is valid.
4159

4160
    @param result: RPC result
4161
    @raises errors.OpExecError: If payload is not valid
4162

4163
    """
4164
    errs = []
4165
    if self.op.command == constants.OOB_HEALTH:
4166
      if not isinstance(result.payload, list):
4167
        errs.append("command 'health' is expected to return a list but got %s" %
4168
                    type(result.payload))
4169
      else:
4170
        for item, status in result.payload:
4171
          if status not in constants.OOB_STATUSES:
4172
            errs.append("health item '%s' has invalid status '%s'" %
4173
                        (item, status))
4174

    
4175
    if self.op.command == constants.OOB_POWER_STATUS:
4176
      if not isinstance(result.payload, dict):
4177
        errs.append("power-status is expected to return a dict but got %s" %
4178
                    type(result.payload))
4179

    
4180
    if self.op.command in [
4181
        constants.OOB_POWER_ON,
4182
        constants.OOB_POWER_OFF,
4183
        constants.OOB_POWER_CYCLE,
4184
        ]:
4185
      if result.payload is not None:
4186
        errs.append("%s is expected to not return payload but got '%s'" %
4187
                    (self.op.command, result.payload))
4188

    
4189
    if errs:
4190
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4191
                               utils.CommaJoin(errs))
4192

    
4193

    
4194
class _OsQuery(_QueryBase):
4195
  FIELDS = query.OS_FIELDS
4196

    
4197
  def ExpandNames(self, lu):
4198
    # Lock all nodes in shared mode
4199
    # Temporary removal of locks, should be reverted later
4200
    # TODO: reintroduce locks when they are lighter-weight
4201
    lu.needed_locks = {}
4202
    #self.share_locks[locking.LEVEL_NODE] = 1
4203
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4204

    
4205
    # The following variables interact with _QueryBase._GetNames
4206
    if self.names:
4207
      self.wanted = self.names
4208
    else:
4209
      self.wanted = locking.ALL_SET
4210

    
4211
    self.do_locking = self.use_locking
4212

    
4213
  def DeclareLocks(self, lu, level):
4214
    pass
4215

    
4216
  @staticmethod
4217
  def _DiagnoseByOS(rlist):
4218
    """Remaps a per-node return list into an a per-os per-node dictionary
4219

4220
    @param rlist: a map with node names as keys and OS objects as values
4221

4222
    @rtype: dict
4223
    @return: a dictionary with osnames as keys and as value another
4224
        map, with nodes as keys and tuples of (path, status, diagnose,
4225
        variants, parameters, api_versions) as values, eg::
4226

4227
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4228
                                     (/srv/..., False, "invalid api")],
4229
                           "node2": [(/srv/..., True, "", [], [])]}
4230
          }
4231

4232
    """
4233
    all_os = {}
4234
    # we build here the list of nodes that didn't fail the RPC (at RPC
4235
    # level), so that nodes with a non-responding node daemon don't
4236
    # make all OSes invalid
4237
    good_nodes = [node_name for node_name in rlist
4238
                  if not rlist[node_name].fail_msg]
4239
    for node_name, nr in rlist.items():
4240
      if nr.fail_msg or not nr.payload:
4241
        continue
4242
      for (name, path, status, diagnose, variants,
4243
           params, api_versions) in nr.payload:
4244
        if name not in all_os:
4245
          # build a list of nodes for this os containing empty lists
4246
          # for each node in node_list
4247
          all_os[name] = {}
4248
          for nname in good_nodes:
4249
            all_os[name][nname] = []
4250
        # convert params from [name, help] to (name, help)
4251
        params = [tuple(v) for v in params]
4252
        all_os[name][node_name].append((path, status, diagnose,
4253
                                        variants, params, api_versions))
4254
    return all_os
4255

    
4256
  def _GetQueryData(self, lu):
4257
    """Computes the list of nodes and their attributes.
4258

4259
    """
4260
    # Locking is not used
4261
    assert not (compat.any(lu.glm.is_owned(level)
4262
                           for level in locking.LEVELS
4263
                           if level != locking.LEVEL_CLUSTER) or
4264
                self.do_locking or self.use_locking)
4265

    
4266
    valid_nodes = [node.name
4267
                   for node in lu.cfg.GetAllNodesInfo().values()
4268
                   if not node.offline and node.vm_capable]
4269
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4270
    cluster = lu.cfg.GetClusterInfo()
4271

    
4272
    data = {}
4273

    
4274
    for (os_name, os_data) in pol.items():
4275
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4276
                          hidden=(os_name in cluster.hidden_os),
4277
                          blacklisted=(os_name in cluster.blacklisted_os))
4278

    
4279
      variants = set()
4280
      parameters = set()
4281
      api_versions = set()
4282

    
4283
      for idx, osl in enumerate(os_data.values()):
4284
        info.valid = bool(info.valid and osl and osl[0][1])
4285
        if not info.valid:
4286
          break
4287

    
4288
        (node_variants, node_params, node_api) = osl[0][3:6]
4289
        if idx == 0:
4290
          # First entry
4291
          variants.update(node_variants)
4292
          parameters.update(node_params)
4293
          api_versions.update(node_api)
4294
        else:
4295
          # Filter out inconsistent values
4296
          variants.intersection_update(node_variants)
4297
          parameters.intersection_update(node_params)
4298
          api_versions.intersection_update(node_api)
4299

    
4300
      info.variants = list(variants)
4301
      info.parameters = list(parameters)
4302
      info.api_versions = list(api_versions)
4303

    
4304
      data[os_name] = info
4305

    
4306
    # Prepare data in requested order
4307
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4308
            if name in data]
4309

    
4310

    
4311
class LUOsDiagnose(NoHooksLU):
4312
  """Logical unit for OS diagnose/query.
4313

4314
  """
4315
  REQ_BGL = False
4316

    
4317
  @staticmethod
4318
  def _BuildFilter(fields, names):
4319
    """Builds a filter for querying OSes.
4320

4321
    """
4322
    name_filter = qlang.MakeSimpleFilter("name", names)
4323

    
4324
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4325
    # respective field is not requested
4326
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4327
                     for fname in ["hidden", "blacklisted"]
4328
                     if fname not in fields]
4329
    if "valid" not in fields:
4330
      status_filter.append([qlang.OP_TRUE, "valid"])
4331

    
4332
    if status_filter:
4333
      status_filter.insert(0, qlang.OP_AND)
4334
    else:
4335
      status_filter = None
4336

    
4337
    if name_filter and status_filter:
4338
      return [qlang.OP_AND, name_filter, status_filter]
4339
    elif name_filter:
4340
      return name_filter
4341
    else:
4342
      return status_filter
4343

    
4344
  def CheckArguments(self):
4345
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4346
                       self.op.output_fields, False)
4347

    
4348
  def ExpandNames(self):
4349
    self.oq.ExpandNames(self)
4350

    
4351
  def Exec(self, feedback_fn):
4352
    return self.oq.OldStyleQuery(self)
4353

    
4354

    
4355
class LUNodeRemove(LogicalUnit):
4356
  """Logical unit for removing a node.
4357

4358
  """
4359
  HPATH = "node-remove"
4360
  HTYPE = constants.HTYPE_NODE
4361

    
4362
  def BuildHooksEnv(self):
4363
    """Build hooks env.
4364

4365
    This doesn't run on the target node in the pre phase as a failed
4366
    node would then be impossible to remove.
4367

4368
    """
4369
    return {
4370
      "OP_TARGET": self.op.node_name,
4371
      "NODE_NAME": self.op.node_name,
4372
      }
4373

    
4374
  def BuildHooksNodes(self):
4375
    """Build hooks nodes.
4376

4377
    """
4378
    all_nodes = self.cfg.GetNodeList()
4379
    try:
4380
      all_nodes.remove(self.op.node_name)
4381
    except ValueError:
4382
      logging.warning("Node '%s', which is about to be removed, was not found"
4383
                      " in the list of all nodes", self.op.node_name)
4384
    return (all_nodes, all_nodes)
4385

    
4386
  def CheckPrereq(self):
4387
    """Check prerequisites.
4388

4389
    This checks:
4390
     - the node exists in the configuration
4391
     - it does not have primary or secondary instances
4392
     - it's not the master
4393

4394
    Any errors are signaled by raising errors.OpPrereqError.
4395

4396
    """
4397
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4398
    node = self.cfg.GetNodeInfo(self.op.node_name)
4399
    assert node is not None
4400

    
4401
    masternode = self.cfg.GetMasterNode()
4402
    if node.name == masternode:
4403
      raise errors.OpPrereqError("Node is the master node, failover to another"
4404
                                 " node is required", errors.ECODE_INVAL)
4405

    
4406
    for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4407
      if node.name in instance.all_nodes:
4408
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4409
                                   " please remove first" % instance_name,
4410
                                   errors.ECODE_INVAL)
4411
    self.op.node_name = node.name
4412
    self.node = node
4413

    
4414
  def Exec(self, feedback_fn):
4415
    """Removes the node from the cluster.
4416

4417
    """
4418
    node = self.node
4419
    logging.info("Stopping the node daemon and removing configs from node %s",
4420
                 node.name)
4421

    
4422
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4423

    
4424
    # Promote nodes to master candidate as needed
4425
    _AdjustCandidatePool(self, exceptions=[node.name])
4426
    self.context.RemoveNode(node.name)
4427

    
4428
    # Run post hooks on the node before it's removed
4429
    _RunPostHook(self, node.name)
4430

    
4431
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4432
    msg = result.fail_msg
4433
    if msg:
4434
      self.LogWarning("Errors encountered on the remote node while leaving"
4435
                      " the cluster: %s", msg)
4436

    
4437
    # Remove node from our /etc/hosts
4438
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4439
      master_node = self.cfg.GetMasterNode()
4440
      result = self.rpc.call_etc_hosts_modify(master_node,
4441
                                              constants.ETC_HOSTS_REMOVE,
4442
                                              node.name, None)
4443
      result.Raise("Can't update hosts file with new host data")
4444
      _RedistributeAncillaryFiles(self)
4445

    
4446

    
4447
class _NodeQuery(_QueryBase):
4448
  FIELDS = query.NODE_FIELDS
4449

    
4450
  def ExpandNames(self, lu):
4451
    lu.needed_locks = {}
4452
    lu.share_locks = _ShareAll()
4453

    
4454
    if self.names:
4455
      self.wanted = _GetWantedNodes(lu, self.names)
4456
    else:
4457
      self.wanted = locking.ALL_SET
4458

    
4459
    self.do_locking = (self.use_locking and
4460
                       query.NQ_LIVE in self.requested_data)
4461

    
4462
    if self.do_locking:
4463
      # If any non-static field is requested we need to lock the nodes
4464
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4465

    
4466
  def DeclareLocks(self, lu, level):
4467
    pass
4468

    
4469
  def _GetQueryData(self, lu):
4470
    """Computes the list of nodes and their attributes.
4471

4472
    """
4473
    all_info = lu.cfg.GetAllNodesInfo()
4474

    
4475
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4476

    
4477
    # Gather data as requested
4478
    if query.NQ_LIVE in self.requested_data:
4479
      # filter out non-vm_capable nodes
4480
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4481

    
4482
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4483
                                        lu.cfg.GetHypervisorType())
4484
      live_data = dict((name, nresult.payload)
4485
                       for (name, nresult) in node_data.items()
4486
                       if not nresult.fail_msg and nresult.payload)
4487
    else:
4488
      live_data = None
4489

    
4490
    if query.NQ_INST in self.requested_data:
4491
      node_to_primary = dict([(name, set()) for name in nodenames])
4492
      node_to_secondary = dict([(name, set()) for name in nodenames])
4493

    
4494
      inst_data = lu.cfg.GetAllInstancesInfo()
4495

    
4496
      for inst in inst_data.values():
4497
        if inst.primary_node in node_to_primary:
4498
          node_to_primary[inst.primary_node].add(inst.name)
4499
        for secnode in inst.secondary_nodes:
4500
          if secnode in node_to_secondary:
4501
            node_to_secondary[secnode].add(inst.name)
4502
    else:
4503
      node_to_primary = None
4504
      node_to_secondary = None
4505

    
4506
    if query.NQ_OOB in self.requested_data:
4507
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4508
                         for name, node in all_info.iteritems())
4509
    else:
4510
      oob_support = None
4511

    
4512
    if query.NQ_GROUP in self.requested_data:
4513
      groups = lu.cfg.GetAllNodeGroupsInfo()
4514
    else:
4515
      groups = {}
4516

    
4517
    return query.NodeQueryData([all_info[name] for name in nodenames],
4518
                               live_data, lu.cfg.GetMasterNode(),
4519
                               node_to_primary, node_to_secondary, groups,
4520
                               oob_support, lu.cfg.GetClusterInfo())
4521

    
4522

    
4523
class LUNodeQuery(NoHooksLU):
4524
  """Logical unit for querying nodes.
4525

4526
  """
4527
  # pylint: disable=W0142
4528
  REQ_BGL = False
4529

    
4530
  def CheckArguments(self):
4531
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4532
                         self.op.output_fields, self.op.use_locking)
4533

    
4534
  def ExpandNames(self):
4535
    self.nq.ExpandNames(self)
4536

    
4537
  def Exec(self, feedback_fn):
4538
    return self.nq.OldStyleQuery(self)
4539

    
4540

    
4541
class LUNodeQueryvols(NoHooksLU):
4542
  """Logical unit for getting volumes on node(s).
4543

4544
  """
4545
  REQ_BGL = False
4546
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4547
  _FIELDS_STATIC = utils.FieldSet("node")
4548

    
4549
  def CheckArguments(self):
4550
    _CheckOutputFields(static=self._FIELDS_STATIC,
4551
                       dynamic=self._FIELDS_DYNAMIC,
4552
                       selected=self.op.output_fields)
4553

    
4554
  def ExpandNames(self):
4555
    self.needed_locks = {}
4556
    self.share_locks[locking.LEVEL_NODE] = 1
4557
    if not self.op.nodes:
4558
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4559
    else:
4560
      self.needed_locks[locking.LEVEL_NODE] = \
4561
        _GetWantedNodes(self, self.op.nodes)
4562

    
4563
  def Exec(self, feedback_fn):
4564
    """Computes the list of nodes and their attributes.
4565

4566
    """
4567
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4568
    volumes = self.rpc.call_node_volumes(nodenames)
4569

    
4570
    ilist = self.cfg.GetAllInstancesInfo()
4571
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4572

    
4573
    output = []
4574
    for node in nodenames:
4575
      nresult = volumes[node]
4576
      if nresult.offline:
4577
        continue
4578
      msg = nresult.fail_msg
4579
      if msg:
4580
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4581
        continue
4582

    
4583
      node_vols = sorted(nresult.payload,
4584
                         key=operator.itemgetter("dev"))
4585

    
4586
      for vol in node_vols:
4587
        node_output = []
4588
        for field in self.op.output_fields:
4589
          if field == "node":
4590
            val = node
4591
          elif field == "phys":
4592
            val = vol["dev"]
4593
          elif field == "vg":
4594
            val = vol["vg"]
4595
          elif field == "name":
4596
            val = vol["name"]
4597
          elif field == "size":
4598
            val = int(float(vol["size"]))
4599
          elif field == "instance":
4600
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4601
          else:
4602
            raise errors.ParameterError(field)
4603
          node_output.append(str(val))
4604

    
4605
        output.append(node_output)
4606

    
4607
    return output
4608

    
4609

    
4610
class LUNodeQueryStorage(NoHooksLU):
4611
  """Logical unit for getting information on storage units on node(s).
4612

4613
  """
4614
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4615
  REQ_BGL = False
4616

    
4617
  def CheckArguments(self):
4618
    _CheckOutputFields(static=self._FIELDS_STATIC,
4619
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4620
                       selected=self.op.output_fields)
4621

    
4622
  def ExpandNames(self):
4623
    self.needed_locks = {}
4624
    self.share_locks[locking.LEVEL_NODE] = 1
4625

    
4626
    if self.op.nodes:
4627
      self.needed_locks[locking.LEVEL_NODE] = \
4628
        _GetWantedNodes(self, self.op.nodes)
4629
    else:
4630
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4631

    
4632
  def Exec(self, feedback_fn):
4633
    """Computes the list of nodes and their attributes.
4634

4635
    """
4636
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4637

    
4638
    # Always get name to sort by
4639
    if constants.SF_NAME in self.op.output_fields:
4640
      fields = self.op.output_fields[:]
4641
    else:
4642
      fields = [constants.SF_NAME] + self.op.output_fields
4643

    
4644
    # Never ask for node or type as it's only known to the LU
4645
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4646
      while extra in fields:
4647
        fields.remove(extra)
4648

    
4649
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4650
    name_idx = field_idx[constants.SF_NAME]
4651

    
4652
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4653
    data = self.rpc.call_storage_list(self.nodes,
4654
                                      self.op.storage_type, st_args,
4655
                                      self.op.name, fields)
4656

    
4657
    result = []
4658

    
4659
    for node in utils.NiceSort(self.nodes):
4660
      nresult = data[node]
4661
      if nresult.offline:
4662
        continue
4663

    
4664
      msg = nresult.fail_msg
4665
      if msg:
4666
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4667
        continue
4668

    
4669
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4670

    
4671
      for name in utils.NiceSort(rows.keys()):
4672
        row = rows[name]
4673

    
4674
        out = []
4675

    
4676
        for field in self.op.output_fields:
4677
          if field == constants.SF_NODE:
4678
            val = node
4679
          elif field == constants.SF_TYPE:
4680
            val = self.op.storage_type
4681
          elif field in field_idx:
4682
            val = row[field_idx[field]]
4683
          else:
4684
            raise errors.ParameterError(field)
4685

    
4686
          out.append(val)
4687

    
4688
        result.append(out)
4689

    
4690
    return result
4691

    
4692

    
4693
class _InstanceQuery(_QueryBase):
4694
  FIELDS = query.INSTANCE_FIELDS
4695

    
4696
  def ExpandNames(self, lu):
4697
    lu.needed_locks = {}
4698
    lu.share_locks = _ShareAll()
4699

    
4700
    if self.names:
4701
      self.wanted = _GetWantedInstances(lu, self.names)
4702
    else:
4703
      self.wanted = locking.ALL_SET
4704

    
4705
    self.do_locking = (self.use_locking and
4706
                       query.IQ_LIVE in self.requested_data)
4707
    if self.do_locking:
4708
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4709
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4710
      lu.needed_locks[locking.LEVEL_NODE] = []
4711
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4712

    
4713
    self.do_grouplocks = (self.do_locking and
4714
                          query.IQ_NODES in self.requested_data)
4715

    
4716
  def DeclareLocks(self, lu, level):
4717
    if self.do_locking:
4718
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4719
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4720

    
4721
        # Lock all groups used by instances optimistically; this requires going
4722
        # via the node before it's locked, requiring verification later on
4723
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4724
          set(group_uuid
4725
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4726
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4727
      elif level == locking.LEVEL_NODE:
4728
        lu._LockInstancesNodes() # pylint: disable=W0212
4729

    
4730
  @staticmethod
4731
  def _CheckGroupLocks(lu):
4732
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4733
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4734

    
4735
    # Check if node groups for locked instances are still correct
4736
    for instance_name in owned_instances:
4737
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4738

    
4739
  def _GetQueryData(self, lu):
4740
    """Computes the list of instances and their attributes.
4741

4742
    """
4743
    if self.do_grouplocks:
4744
      self._CheckGroupLocks(lu)
4745

    
4746
    cluster = lu.cfg.GetClusterInfo()
4747
    all_info = lu.cfg.GetAllInstancesInfo()
4748

    
4749
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4750

    
4751
    instance_list = [all_info[name] for name in instance_names]
4752
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4753
                                        for inst in instance_list)))
4754
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4755
    bad_nodes = []
4756
    offline_nodes = []
4757
    wrongnode_inst = set()
4758

    
4759
    # Gather data as requested
4760
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4761
      live_data = {}
4762
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4763
      for name in nodes:
4764
        result = node_data[name]
4765
        if result.offline:
4766
          # offline nodes will be in both lists
4767
          assert result.fail_msg
4768
          offline_nodes.append(name)
4769
        if result.fail_msg:
4770
          bad_nodes.append(name)
4771
        elif result.payload:
4772
          for inst in result.payload:
4773
            if inst in all_info:
4774
              if all_info[inst].primary_node == name:
4775
                live_data.update(result.payload)
4776
              else:
4777
                wrongnode_inst.add(inst)
4778
            else:
4779
              # orphan instance; we don't list it here as we don't
4780
              # handle this case yet in the output of instance listing
4781
              logging.warning("Orphan instance '%s' found on node %s",
4782
                              inst, name)
4783
        # else no instance is alive
4784
    else:
4785
      live_data = {}
4786

    
4787
    if query.IQ_DISKUSAGE in self.requested_data:
4788
      disk_usage = dict((inst.name,
4789
                         _ComputeDiskSize(inst.disk_template,
4790
                                          [{constants.IDISK_SIZE: disk.size}
4791
                                           for disk in inst.disks]))
4792
                        for inst in instance_list)
4793
    else:
4794
      disk_usage = None
4795

    
4796
    if query.IQ_CONSOLE in self.requested_data:
4797
      consinfo = {}
4798
      for inst in instance_list:
4799
        if inst.name in live_data:
4800
          # Instance is running
4801
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4802
        else:
4803
          consinfo[inst.name] = None
4804
      assert set(consinfo.keys()) == set(instance_names)
4805
    else:
4806
      consinfo = None
4807

    
4808
    if query.IQ_NODES in self.requested_data:
4809
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4810
                                            instance_list)))
4811
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4812
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4813
                    for uuid in set(map(operator.attrgetter("group"),
4814
                                        nodes.values())))
4815
    else:
4816
      nodes = None
4817
      groups = None
4818

    
4819
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4820
                                   disk_usage, offline_nodes, bad_nodes,
4821
                                   live_data, wrongnode_inst, consinfo,
4822
                                   nodes, groups)
4823

    
4824

    
4825
class LUQuery(NoHooksLU):
4826
  """Query for resources/items of a certain kind.
4827

4828
  """
4829
  # pylint: disable=W0142
4830
  REQ_BGL = False
4831

    
4832
  def CheckArguments(self):
4833
    qcls = _GetQueryImplementation(self.op.what)
4834

    
4835
    self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4836

    
4837
  def ExpandNames(self):
4838
    self.impl.ExpandNames(self)
4839

    
4840
  def DeclareLocks(self, level):
4841
    self.impl.DeclareLocks(self, level)
4842

    
4843
  def Exec(self, feedback_fn):
4844
    return self.impl.NewStyleQuery(self)
4845

    
4846

    
4847
class LUQueryFields(NoHooksLU):
4848
  """Query for resources/items of a certain kind.
4849

4850
  """
4851
  # pylint: disable=W0142
4852
  REQ_BGL = False
4853

    
4854
  def CheckArguments(self):
4855
    self.qcls = _GetQueryImplementation(self.op.what)
4856

    
4857
  def ExpandNames(self):
4858
    self.needed_locks = {}
4859

    
4860
  def Exec(self, feedback_fn):
4861
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4862

    
4863

    
4864
class LUNodeModifyStorage(NoHooksLU):
4865
  """Logical unit for modifying a storage volume on a node.
4866

4867
  """
4868
  REQ_BGL = False
4869

    
4870
  def CheckArguments(self):
4871
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4872

    
4873
    storage_type = self.op.storage_type
4874

    
4875
    try:
4876
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4877
    except KeyError:
4878
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4879
                                 " modified" % storage_type,
4880
                                 errors.ECODE_INVAL)
4881

    
4882
    diff = set(self.op.changes.keys()) - modifiable
4883
    if diff:
4884
      raise errors.OpPrereqError("The following fields can not be modified for"
4885
                                 " storage units of type '%s': %r" %
4886
                                 (storage_type, list(diff)),
4887
                                 errors.ECODE_INVAL)
4888

    
4889
  def ExpandNames(self):
4890
    self.needed_locks = {
4891
      locking.LEVEL_NODE: self.op.node_name,
4892
      }
4893

    
4894
  def Exec(self, feedback_fn):
4895
    """Computes the list of nodes and their attributes.
4896

4897
    """
4898
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4899
    result = self.rpc.call_storage_modify(self.op.node_name,
4900
                                          self.op.storage_type, st_args,
4901
                                          self.op.name, self.op.changes)
4902
    result.Raise("Failed to modify storage unit '%s' on %s" %
4903
                 (self.op.name, self.op.node_name))
4904

    
4905

    
4906
class LUNodeAdd(LogicalUnit):
4907
  """Logical unit for adding node to the cluster.
4908

4909
  """
4910
  HPATH = "node-add"
4911
  HTYPE = constants.HTYPE_NODE
4912
  _NFLAGS = ["master_capable", "vm_capable"]
4913

    
4914
  def CheckArguments(self):
4915
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4916
    # validate/normalize the node name
4917
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4918
                                         family=self.primary_ip_family)
4919
    self.op.node_name = self.hostname.name
4920

    
4921
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4922
      raise errors.OpPrereqError("Cannot readd the master node",
4923
                                 errors.ECODE_STATE)
4924

    
4925
    if self.op.readd and self.op.group:
4926
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4927
                                 " being readded", errors.ECODE_INVAL)
4928

    
4929
  def BuildHooksEnv(self):
4930
    """Build hooks env.
4931

4932
    This will run on all nodes before, and on all nodes + the new node after.
4933

4934
    """
4935
    return {
4936
      "OP_TARGET": self.op.node_name,
4937
      "NODE_NAME": self.op.node_name,
4938
      "NODE_PIP": self.op.primary_ip,
4939
      "NODE_SIP": self.op.secondary_ip,
4940
      "MASTER_CAPABLE": str(self.op.master_capable),
4941
      "VM_CAPABLE": str(self.op.vm_capable),
4942
      }
4943

    
4944
  def BuildHooksNodes(self):
4945
    """Build hooks nodes.
4946

4947
    """
4948
    # Exclude added node
4949
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4950
    post_nodes = pre_nodes + [self.op.node_name, ]
4951

    
4952
    return (pre_nodes, post_nodes)
4953

    
4954
  def CheckPrereq(self):
4955
    """Check prerequisites.
4956

4957
    This checks:
4958
     - the new node is not already in the config
4959
     - it is resolvable
4960
     - its parameters (single/dual homed) matches the cluster
4961

4962
    Any errors are signaled by raising errors.OpPrereqError.
4963

4964
    """
4965
    cfg = self.cfg
4966
    hostname = self.hostname
4967
    node = hostname.name
4968
    primary_ip = self.op.primary_ip = hostname.ip
4969
    if self.op.secondary_ip is None:
4970
      if self.primary_ip_family == netutils.IP6Address.family:
4971
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4972
                                   " IPv4 address must be given as secondary",
4973
                                   errors.ECODE_INVAL)
4974
      self.op.secondary_ip = primary_ip
4975

    
4976
    secondary_ip = self.op.secondary_ip
4977
    if not netutils.IP4Address.IsValid(secondary_ip):
4978
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4979
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4980

    
4981
    node_list = cfg.GetNodeList()
4982
    if not self.op.readd and node in node_list:
4983
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4984
                                 node, errors.ECODE_EXISTS)
4985
    elif self.op.readd and node not in node_list:
4986
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4987
                                 errors.ECODE_NOENT)
4988

    
4989
    self.changed_primary_ip = False
4990

    
4991
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4992
      if self.op.readd and node == existing_node_name:
4993
        if existing_node.secondary_ip != secondary_ip:
4994
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4995
                                     " address configuration as before",
4996
                                     errors.ECODE_INVAL)
4997
        if existing_node.primary_ip != primary_ip:
4998
          self.changed_primary_ip = True
4999

    
5000
        continue
5001

    
5002
      if (existing_node.primary_ip == primary_ip or
5003
          existing_node.secondary_ip == primary_ip or
5004
          existing_node.primary_ip == secondary_ip or
5005
          existing_node.secondary_ip == secondary_ip):
5006
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5007
                                   " existing node %s" % existing_node.name,
5008
                                   errors.ECODE_NOTUNIQUE)
5009

    
5010
    # After this 'if' block, None is no longer a valid value for the
5011
    # _capable op attributes
5012
    if self.op.readd:
5013
      old_node = self.cfg.GetNodeInfo(node)
5014
      assert old_node is not None, "Can't retrieve locked node %s" % node
5015
      for attr in self._NFLAGS:
5016
        if getattr(self.op, attr) is None:
5017
          setattr(self.op, attr, getattr(old_node, attr))
5018
    else:
5019
      for attr in self._NFLAGS:
5020
        if getattr(self.op, attr) is None:
5021
          setattr(self.op, attr, True)
5022

    
5023
    if self.op.readd and not self.op.vm_capable:
5024
      pri, sec = cfg.GetNodeInstances(node)
5025
      if pri or sec:
5026
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5027
                                   " flag set to false, but it already holds"
5028
                                   " instances" % node,
5029
                                   errors.ECODE_STATE)
5030

    
5031
    # check that the type of the node (single versus dual homed) is the
5032
    # same as for the master
5033
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5034
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5035
    newbie_singlehomed = secondary_ip == primary_ip
5036
    if master_singlehomed != newbie_singlehomed:
5037
      if master_singlehomed:
5038
        raise errors.OpPrereqError("The master has no secondary ip but the"
5039
                                   " new node has one",
5040
                                   errors.ECODE_INVAL)
5041
      else:
5042
        raise errors.OpPrereqError("The master has a secondary ip but the"
5043
                                   " new node doesn't have one",
5044
                                   errors.ECODE_INVAL)
5045

    
5046
    # checks reachability
5047
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5048
      raise errors.OpPrereqError("Node not reachable by ping",
5049
                                 errors.ECODE_ENVIRON)
5050

    
5051
    if not newbie_singlehomed:
5052
      # check reachability from my secondary ip to newbie's secondary ip
5053
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5054
                           source=myself.secondary_ip):
5055
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5056
                                   " based ping to node daemon port",
5057
                                   errors.ECODE_ENVIRON)
5058

    
5059
    if self.op.readd:
5060
      exceptions = [node]
5061
    else:
5062
      exceptions = []
5063

    
5064
    if self.op.master_capable:
5065
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5066
    else:
5067
      self.master_candidate = False
5068

    
5069
    if self.op.readd:
5070
      self.new_node = old_node
5071
    else:
5072
      node_group = cfg.LookupNodeGroup(self.op.group)
5073
      self.new_node = objects.Node(name=node,
5074
                                   primary_ip=primary_ip,
5075
                                   secondary_ip=secondary_ip,
5076
                                   master_candidate=self.master_candidate,
5077
                                   offline=False, drained=False,
5078
                                   group=node_group)
5079

    
5080
    if self.op.ndparams:
5081
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5082

    
5083
  def Exec(self, feedback_fn):
5084
    """Adds the new node to the cluster.
5085

5086
    """
5087
    new_node = self.new_node
5088
    node = new_node.name
5089

    
5090
    # We adding a new node so we assume it's powered
5091
    new_node.powered = True
5092

    
5093
    # for re-adds, reset the offline/drained/master-candidate flags;
5094
    # we need to reset here, otherwise offline would prevent RPC calls
5095
    # later in the procedure; this also means that if the re-add
5096
    # fails, we are left with a non-offlined, broken node
5097
    if self.op.readd:
5098
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5099
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5100
      # if we demote the node, we do cleanup later in the procedure
5101
      new_node.master_candidate = self.master_candidate
5102
      if self.changed_primary_ip:
5103
        new_node.primary_ip = self.op.primary_ip
5104

    
5105
    # copy the master/vm_capable flags
5106
    for attr in self._NFLAGS:
5107
      setattr(new_node, attr, getattr(self.op, attr))
5108

    
5109
    # notify the user about any possible mc promotion
5110
    if new_node.master_candidate:
5111
      self.LogInfo("Node will be a master candidate")
5112

    
5113
    if self.op.ndparams:
5114
      new_node.ndparams = self.op.ndparams
5115
    else:
5116
      new_node.ndparams = {}
5117

    
5118
    # check connectivity
5119
    result = self.rpc.call_version([node])[node]
5120
    result.Raise("Can't get version information from node %s" % node)
5121
    if constants.PROTOCOL_VERSION == result.payload:
5122
      logging.info("Communication to node %s fine, sw version %s match",
5123
                   node, result.payload)
5124
    else:
5125
      raise errors.OpExecError("Version mismatch master version %s,"
5126
                               " node version %s" %
5127
                               (constants.PROTOCOL_VERSION, result.payload))
5128

    
5129
    # Add node to our /etc/hosts, and add key to known_hosts
5130
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5131
      master_node = self.cfg.GetMasterNode()
5132
      result = self.rpc.call_etc_hosts_modify(master_node,
5133
                                              constants.ETC_HOSTS_ADD,
5134
                                              self.hostname.name,
5135
                                              self.hostname.ip)
5136
      result.Raise("Can't update hosts file with new host data")
5137

    
5138
    if new_node.secondary_ip != new_node.primary_ip:
5139
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5140
                               False)
5141

    
5142
    node_verify_list = [self.cfg.GetMasterNode()]
5143
    node_verify_param = {
5144
      constants.NV_NODELIST: ([node], {}),
5145
      # TODO: do a node-net-test as well?
5146
    }
5147

    
5148
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5149
                                       self.cfg.GetClusterName())
5150
    for verifier in node_verify_list:
5151
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5152
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5153
      if nl_payload:
5154
        for failed in nl_payload:
5155
          feedback_fn("ssh/hostname verification failed"
5156
                      " (checking from %s): %s" %
5157
                      (verifier, nl_payload[failed]))
5158
        raise errors.OpExecError("ssh/hostname verification failed")
5159

    
5160
    if self.op.readd:
5161
      _RedistributeAncillaryFiles(self)
5162
      self.context.ReaddNode(new_node)
5163
      # make sure we redistribute the config
5164
      self.cfg.Update(new_node, feedback_fn)
5165
      # and make sure the new node will not have old files around
5166
      if not new_node.master_candidate:
5167
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5168
        msg = result.fail_msg
5169
        if msg:
5170
          self.LogWarning("Node failed to demote itself from master"
5171
                          " candidate status: %s" % msg)
5172
    else:
5173
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5174
                                  additional_vm=self.op.vm_capable)
5175
      self.context.AddNode(new_node, self.proc.GetECId())
5176

    
5177

    
5178
class LUNodeSetParams(LogicalUnit):
5179
  """Modifies the parameters of a node.
5180

5181
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5182
      to the node role (as _ROLE_*)
5183
  @cvar _R2F: a dictionary from node role to tuples of flags
5184
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5185

5186
  """
5187
  HPATH = "node-modify"
5188
  HTYPE = constants.HTYPE_NODE
5189
  REQ_BGL = False
5190
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5191
  _F2R = {
5192
    (True, False, False): _ROLE_CANDIDATE,
5193
    (False, True, False): _ROLE_DRAINED,
5194
    (False, False, True): _ROLE_OFFLINE,
5195
    (False, False, False): _ROLE_REGULAR,
5196
    }
5197
  _R2F = dict((v, k) for k, v in _F2R.items())
5198
  _FLAGS = ["master_candidate", "drained", "offline"]
5199

    
5200
  def CheckArguments(self):
5201
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5202
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5203
                self.op.master_capable, self.op.vm_capable,
5204
                self.op.secondary_ip, self.op.ndparams]
5205
    if all_mods.count(None) == len(all_mods):
5206
      raise errors.OpPrereqError("Please pass at least one modification",
5207
                                 errors.ECODE_INVAL)
5208
    if all_mods.count(True) > 1:
5209
      raise errors.OpPrereqError("Can't set the node into more than one"
5210
                                 " state at the same time",
5211
                                 errors.ECODE_INVAL)
5212

    
5213
    # Boolean value that tells us whether we might be demoting from MC
5214
    self.might_demote = (self.op.master_candidate == False or
5215
                         self.op.offline == True or
5216
                         self.op.drained == True or
5217
                         self.op.master_capable == False)
5218

    
5219
    if self.op.secondary_ip:
5220
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5221
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5222
                                   " address" % self.op.secondary_ip,
5223
                                   errors.ECODE_INVAL)
5224

    
5225
    self.lock_all = self.op.auto_promote and self.might_demote
5226
    self.lock_instances = self.op.secondary_ip is not None
5227

    
5228
  def ExpandNames(self):
5229
    if self.lock_all:
5230
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5231
    else:
5232
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5233

    
5234
    if self.lock_instances:
5235
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5236

    
5237
  def DeclareLocks(self, level):
5238
    # If we have locked all instances, before waiting to lock nodes, release
5239
    # all the ones living on nodes unrelated to the current operation.
5240
    if level == locking.LEVEL_NODE and self.lock_instances:
5241
      self.affected_instances = []
5242
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5243
        instances_keep = []
5244

    
5245
        # Build list of instances to release
5246
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5247
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5248
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5249
              self.op.node_name in instance.all_nodes):
5250
            instances_keep.append(instance_name)
5251
            self.affected_instances.append(instance)
5252

    
5253
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5254

    
5255
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5256
                set(instances_keep))
5257

    
5258
  def BuildHooksEnv(self):
5259
    """Build hooks env.
5260

5261
    This runs on the master node.
5262

5263
    """
5264
    return {
5265
      "OP_TARGET": self.op.node_name,
5266
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5267
      "OFFLINE": str(self.op.offline),
5268
      "DRAINED": str(self.op.drained),
5269
      "MASTER_CAPABLE": str(self.op.master_capable),
5270
      "VM_CAPABLE": str(self.op.vm_capable),
5271
      }
5272

    
5273
  def BuildHooksNodes(self):
5274
    """Build hooks nodes.
5275

5276
    """
5277
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5278
    return (nl, nl)
5279

    
5280
  def CheckPrereq(self):
5281
    """Check prerequisites.
5282

5283
    This only checks the instance list against the existing names.
5284

5285
    """
5286
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5287

    
5288
    if (self.op.master_candidate is not None or
5289
        self.op.drained is not None or
5290
        self.op.offline is not None):
5291
      # we can't change the master's node flags
5292
      if self.op.node_name == self.cfg.GetMasterNode():
5293
        raise errors.OpPrereqError("The master role can be changed"
5294
                                   " only via master-failover",
5295
                                   errors.ECODE_INVAL)
5296

    
5297
    if self.op.master_candidate and not node.master_capable:
5298
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5299
                                 " it a master candidate" % node.name,
5300
                                 errors.ECODE_STATE)
5301

    
5302
    if self.op.vm_capable == False:
5303
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5304
      if ipri or isec:
5305
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5306
                                   " the vm_capable flag" % node.name,
5307
                                   errors.ECODE_STATE)
5308

    
5309
    if node.master_candidate and self.might_demote and not self.lock_all:
5310
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5311
      # check if after removing the current node, we're missing master
5312
      # candidates
5313
      (mc_remaining, mc_should, _) = \
5314
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5315
      if mc_remaining < mc_should:
5316
        raise errors.OpPrereqError("Not enough master candidates, please"
5317
                                   " pass auto promote option to allow"
5318
                                   " promotion", errors.ECODE_STATE)
5319

    
5320
    self.old_flags = old_flags = (node.master_candidate,
5321
                                  node.drained, node.offline)
5322
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5323
    self.old_role = old_role = self._F2R[old_flags]
5324

    
5325
    # Check for ineffective changes
5326
    for attr in self._FLAGS:
5327
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5328
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5329
        setattr(self.op, attr, None)
5330

    
5331
    # Past this point, any flag change to False means a transition
5332
    # away from the respective state, as only real changes are kept
5333

    
5334
    # TODO: We might query the real power state if it supports OOB
5335
    if _SupportsOob(self.cfg, node):
5336
      if self.op.offline is False and not (node.powered or
5337
                                           self.op.powered == True):
5338
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5339
                                    " offline status can be reset") %
5340
                                   self.op.node_name)
5341
    elif self.op.powered is not None:
5342
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5343
                                  " as it does not support out-of-band"
5344
                                  " handling") % self.op.node_name)
5345

    
5346
    # If we're being deofflined/drained, we'll MC ourself if needed
5347
    if (self.op.drained == False or self.op.offline == False or
5348
        (self.op.master_capable and not node.master_capable)):
5349
      if _DecideSelfPromotion(self):
5350
        self.op.master_candidate = True
5351
        self.LogInfo("Auto-promoting node to master candidate")
5352

    
5353
    # If we're no longer master capable, we'll demote ourselves from MC
5354
    if self.op.master_capable == False and node.master_candidate:
5355
      self.LogInfo("Demoting from master candidate")
5356
      self.op.master_candidate = False
5357

    
5358
    # Compute new role
5359
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5360
    if self.op.master_candidate:
5361
      new_role = self._ROLE_CANDIDATE
5362
    elif self.op.drained:
5363
      new_role = self._ROLE_DRAINED
5364
    elif self.op.offline:
5365
      new_role = self._ROLE_OFFLINE
5366
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5367
      # False is still in new flags, which means we're un-setting (the
5368
      # only) True flag
5369
      new_role = self._ROLE_REGULAR
5370
    else: # no new flags, nothing, keep old role
5371
      new_role = old_role
5372

    
5373
    self.new_role = new_role
5374

    
5375
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5376
      # Trying to transition out of offline status
5377
      result = self.rpc.call_version([node.name])[node.name]
5378
      if result.fail_msg:
5379
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5380
                                   " to report its version: %s" %
5381
                                   (node.name, result.fail_msg),
5382
                                   errors.ECODE_STATE)
5383
      else:
5384
        self.LogWarning("Transitioning node from offline to online state"
5385
                        " without using re-add. Please make sure the node"
5386
                        " is healthy!")
5387

    
5388
    if self.op.secondary_ip:
5389
      # Ok even without locking, because this can't be changed by any LU
5390
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5391
      master_singlehomed = master.secondary_ip == master.primary_ip
5392
      if master_singlehomed and self.op.secondary_ip:
5393
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5394
                                   " homed cluster", errors.ECODE_INVAL)
5395

    
5396
      if node.offline:
5397
        if self.affected_instances:
5398
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5399
                                     " node has instances (%s) configured"
5400
                                     " to use it" % self.affected_instances)
5401
      else:
5402
        # On online nodes, check that no instances are running, and that
5403
        # the node has the new ip and we can reach it.
5404
        for instance in self.affected_instances:
5405
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5406

    
5407
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5408
        if master.name != node.name:
5409
          # check reachability from master secondary ip to new secondary ip
5410
          if not netutils.TcpPing(self.op.secondary_ip,
5411
                                  constants.DEFAULT_NODED_PORT,
5412
                                  source=master.secondary_ip):
5413
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5414
                                       " based ping to node daemon port",
5415
                                       errors.ECODE_ENVIRON)
5416

    
5417
    if self.op.ndparams:
5418
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5419
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5420
      self.new_ndparams = new_ndparams
5421

    
5422
  def Exec(self, feedback_fn):
5423
    """Modifies a node.
5424

5425
    """
5426
    node = self.node
5427
    old_role = self.old_role
5428
    new_role = self.new_role
5429

    
5430
    result = []
5431

    
5432
    if self.op.ndparams:
5433
      node.ndparams = self.new_ndparams
5434

    
5435
    if self.op.powered is not None:
5436
      node.powered = self.op.powered
5437

    
5438
    for attr in ["master_capable", "vm_capable"]:
5439
      val = getattr(self.op, attr)
5440
      if val is not None:
5441
        setattr(node, attr, val)
5442
        result.append((attr, str(val)))
5443

    
5444
    if new_role != old_role:
5445
      # Tell the node to demote itself, if no longer MC and not offline
5446
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5447
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5448
        if msg:
5449
          self.LogWarning("Node failed to demote itself: %s", msg)
5450

    
5451
      new_flags = self._R2F[new_role]
5452
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5453
        if of != nf:
5454
          result.append((desc, str(nf)))
5455
      (node.master_candidate, node.drained, node.offline) = new_flags
5456

    
5457
      # we locked all nodes, we adjust the CP before updating this node
5458
      if self.lock_all:
5459
        _AdjustCandidatePool(self, [node.name])
5460

    
5461
    if self.op.secondary_ip:
5462
      node.secondary_ip = self.op.secondary_ip
5463
      result.append(("secondary_ip", self.op.secondary_ip))
5464

    
5465
    # this will trigger configuration file update, if needed
5466
    self.cfg.Update(node, feedback_fn)
5467

    
5468
    # this will trigger job queue propagation or cleanup if the mc
5469
    # flag changed
5470
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5471
      self.context.ReaddNode(node)
5472

    
5473
    return result
5474

    
5475

    
5476
class LUNodePowercycle(NoHooksLU):
5477
  """Powercycles a node.
5478

5479
  """
5480
  REQ_BGL = False
5481

    
5482
  def CheckArguments(self):
5483
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5484
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5485
      raise errors.OpPrereqError("The node is the master and the force"
5486
                                 " parameter was not set",
5487
                                 errors.ECODE_INVAL)
5488

    
5489
  def ExpandNames(self):
5490
    """Locking for PowercycleNode.
5491

5492
    This is a last-resort option and shouldn't block on other
5493
    jobs. Therefore, we grab no locks.
5494

5495
    """
5496
    self.needed_locks = {}
5497

    
5498
  def Exec(self, feedback_fn):
5499
    """Reboots a node.
5500

5501
    """
5502
    result = self.rpc.call_node_powercycle(self.op.node_name,
5503
                                           self.cfg.GetHypervisorType())
5504
    result.Raise("Failed to schedule the reboot")
5505
    return result.payload
5506

    
5507

    
5508
class LUClusterQuery(NoHooksLU):
5509
  """Query cluster configuration.
5510

5511
  """
5512
  REQ_BGL = False
5513

    
5514
  def ExpandNames(self):
5515
    self.needed_locks = {}
5516

    
5517
  def Exec(self, feedback_fn):
5518
    """Return cluster config.
5519

5520
    """
5521
    cluster = self.cfg.GetClusterInfo()
5522
    os_hvp = {}
5523

    
5524
    # Filter just for enabled hypervisors
5525
    for os_name, hv_dict in cluster.os_hvp.items():
5526
      os_hvp[os_name] = {}
5527
      for hv_name, hv_params in hv_dict.items():
5528
        if hv_name in cluster.enabled_hypervisors:
5529
          os_hvp[os_name][hv_name] = hv_params
5530

    
5531
    # Convert ip_family to ip_version
5532
    primary_ip_version = constants.IP4_VERSION
5533
    if cluster.primary_ip_family == netutils.IP6Address.family:
5534
      primary_ip_version = constants.IP6_VERSION
5535

    
5536
    result = {
5537
      "software_version": constants.RELEASE_VERSION,
5538
      "protocol_version": constants.PROTOCOL_VERSION,
5539
      "config_version": constants.CONFIG_VERSION,
5540
      "os_api_version": max(constants.OS_API_VERSIONS),
5541
      "export_version": constants.EXPORT_VERSION,
5542
      "architecture": runtime.GetArchInfo(),
5543
      "name": cluster.cluster_name,
5544
      "master": cluster.master_node,
5545
      "default_hypervisor": cluster.enabled_hypervisors[0],
5546
      "enabled_hypervisors": cluster.enabled_hypervisors,
5547
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5548
                        for hypervisor_name in cluster.enabled_hypervisors]),
5549
      "os_hvp": os_hvp,
5550
      "beparams": cluster.beparams,
5551
      "osparams": cluster.osparams,
5552
      "nicparams": cluster.nicparams,
5553
      "ndparams": cluster.ndparams,
5554
      "candidate_pool_size": cluster.candidate_pool_size,
5555
      "master_netdev": cluster.master_netdev,
5556
      "volume_group_name": cluster.volume_group_name,
5557
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5558
      "file_storage_dir": cluster.file_storage_dir,
5559
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5560
      "maintain_node_health": cluster.maintain_node_health,
5561
      "ctime": cluster.ctime,
5562
      "mtime": cluster.mtime,
5563
      "uuid": cluster.uuid,
5564
      "tags": list(cluster.GetTags()),
5565
      "uid_pool": cluster.uid_pool,
5566
      "default_iallocator": cluster.default_iallocator,
5567
      "reserved_lvs": cluster.reserved_lvs,
5568
      "primary_ip_version": primary_ip_version,
5569
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5570
      "hidden_os": cluster.hidden_os,
5571
      "blacklisted_os": cluster.blacklisted_os,
5572
      }
5573

    
5574
    return result
5575

    
5576

    
5577
class LUClusterConfigQuery(NoHooksLU):
5578
  """Return configuration values.
5579

5580
  """
5581
  REQ_BGL = False
5582
  _FIELDS_DYNAMIC = utils.FieldSet()
5583
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5584
                                  "watcher_pause", "volume_group_name")
5585

    
5586
  def CheckArguments(self):
5587
    _CheckOutputFields(static=self._FIELDS_STATIC,
5588
                       dynamic=self._FIELDS_DYNAMIC,
5589
                       selected=self.op.output_fields)
5590

    
5591
  def ExpandNames(self):
5592
    self.needed_locks = {}
5593

    
5594
  def Exec(self, feedback_fn):
5595
    """Dump a representation of the cluster config to the standard output.
5596

5597
    """
5598
    values = []
5599
    for field in self.op.output_fields:
5600
      if field == "cluster_name":
5601
        entry = self.cfg.GetClusterName()
5602
      elif field == "master_node":
5603
        entry = self.cfg.GetMasterNode()
5604
      elif field == "drain_flag":
5605
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5606
      elif field == "watcher_pause":
5607
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5608
      elif field == "volume_group_name":
5609
        entry = self.cfg.GetVGName()
5610
      else:
5611
        raise errors.ParameterError(field)
5612
      values.append(entry)
5613
    return values
5614

    
5615

    
5616
class LUInstanceActivateDisks(NoHooksLU):
5617
  """Bring up an instance's disks.
5618

5619
  """
5620
  REQ_BGL = False
5621

    
5622
  def ExpandNames(self):
5623
    self._ExpandAndLockInstance()
5624
    self.needed_locks[locking.LEVEL_NODE] = []
5625
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5626

    
5627
  def DeclareLocks(self, level):
5628
    if level == locking.LEVEL_NODE:
5629
      self._LockInstancesNodes()
5630

    
5631
  def CheckPrereq(self):
5632
    """Check prerequisites.
5633

5634
    This checks that the instance is in the cluster.
5635

5636
    """
5637
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5638
    assert self.instance is not None, \
5639
      "Cannot retrieve locked instance %s" % self.op.instance_name
5640
    _CheckNodeOnline(self, self.instance.primary_node)
5641

    
5642
  def Exec(self, feedback_fn):
5643
    """Activate the disks.
5644

5645
    """
5646
    disks_ok, disks_info = \
5647
              _AssembleInstanceDisks(self, self.instance,
5648
                                     ignore_size=self.op.ignore_size)
5649
    if not disks_ok:
5650
      raise errors.OpExecError("Cannot activate block devices")
5651

    
5652
    return disks_info
5653

    
5654

    
5655
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5656
                           ignore_size=False):
5657
  """Prepare the block devices for an instance.
5658

5659
  This sets up the block devices on all nodes.
5660

5661
  @type lu: L{LogicalUnit}
5662
  @param lu: the logical unit on whose behalf we execute
5663
  @type instance: L{objects.Instance}
5664
  @param instance: the instance for whose disks we assemble
5665
  @type disks: list of L{objects.Disk} or None
5666
  @param disks: which disks to assemble (or all, if None)
5667
  @type ignore_secondaries: boolean
5668
  @param ignore_secondaries: if true, errors on secondary nodes
5669
      won't result in an error return from the function
5670
  @type ignore_size: boolean
5671
  @param ignore_size: if true, the current known size of the disk
5672
      will not be used during the disk activation, useful for cases
5673
      when the size is wrong
5674
  @return: False if the operation failed, otherwise a list of
5675
      (host, instance_visible_name, node_visible_name)
5676
      with the mapping from node devices to instance devices
5677

5678
  """
5679
  device_info = []
5680
  disks_ok = True
5681
  iname = instance.name
5682
  disks = _ExpandCheckDisks(instance, disks)
5683

    
5684
  # With the two passes mechanism we try to reduce the window of
5685
  # opportunity for the race condition of switching DRBD to primary
5686
  # before handshaking occured, but we do not eliminate it
5687

    
5688
  # The proper fix would be to wait (with some limits) until the
5689
  # connection has been made and drbd transitions from WFConnection
5690
  # into any other network-connected state (Connected, SyncTarget,
5691
  # SyncSource, etc.)
5692

    
5693
  # 1st pass, assemble on all nodes in secondary mode
5694
  for idx, inst_disk in enumerate(disks):
5695
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5696
      if ignore_size:
5697
        node_disk = node_disk.Copy()
5698
        node_disk.UnsetSize()
5699
      lu.cfg.SetDiskID(node_disk, node)
5700
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5701
      msg = result.fail_msg
5702
      if msg:
5703
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5704
                           " (is_primary=False, pass=1): %s",
5705
                           inst_disk.iv_name, node, msg)
5706
        if not ignore_secondaries:
5707
          disks_ok = False
5708

    
5709
  # FIXME: race condition on drbd migration to primary
5710

    
5711
  # 2nd pass, do only the primary node
5712
  for idx, inst_disk in enumerate(disks):
5713
    dev_path = None
5714

    
5715
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5716
      if node != instance.primary_node:
5717
        continue
5718
      if ignore_size:
5719
        node_disk = node_disk.Copy()
5720
        node_disk.UnsetSize()
5721
      lu.cfg.SetDiskID(node_disk, node)
5722
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5723
      msg = result.fail_msg
5724
      if msg:
5725
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5726
                           " (is_primary=True, pass=2): %s",
5727
                           inst_disk.iv_name, node, msg)
5728
        disks_ok = False
5729
      else:
5730
        dev_path = result.payload
5731

    
5732
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5733

    
5734
  # leave the disks configured for the primary node
5735
  # this is a workaround that would be fixed better by
5736
  # improving the logical/physical id handling
5737
  for disk in disks:
5738
    lu.cfg.SetDiskID(disk, instance.primary_node)
5739

    
5740
  return disks_ok, device_info
5741

    
5742

    
5743
def _StartInstanceDisks(lu, instance, force):
5744
  """Start the disks of an instance.
5745

5746
  """
5747
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5748
                                           ignore_secondaries=force)
5749
  if not disks_ok:
5750
    _ShutdownInstanceDisks(lu, instance)
5751
    if force is not None and not force:
5752
      lu.proc.LogWarning("", hint="If the message above refers to a"
5753
                         " secondary node,"
5754
                         " you can retry the operation using '--force'.")
5755
    raise errors.OpExecError("Disk consistency error")
5756

    
5757

    
5758
class LUInstanceDeactivateDisks(NoHooksLU):
5759
  """Shutdown an instance's disks.
5760

5761
  """
5762
  REQ_BGL = False
5763

    
5764
  def ExpandNames(self):
5765
    self._ExpandAndLockInstance()
5766
    self.needed_locks[locking.LEVEL_NODE] = []
5767
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5768

    
5769
  def DeclareLocks(self, level):
5770
    if level == locking.LEVEL_NODE:
5771
      self._LockInstancesNodes()
5772

    
5773
  def CheckPrereq(self):
5774
    """Check prerequisites.
5775

5776
    This checks that the instance is in the cluster.
5777

5778
    """
5779
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5780
    assert self.instance is not None, \
5781
      "Cannot retrieve locked instance %s" % self.op.instance_name
5782

    
5783
  def Exec(self, feedback_fn):
5784
    """Deactivate the disks
5785

5786
    """
5787
    instance = self.instance
5788
    if self.op.force:
5789
      _ShutdownInstanceDisks(self, instance)
5790
    else:
5791
      _SafeShutdownInstanceDisks(self, instance)
5792

    
5793

    
5794
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5795
  """Shutdown block devices of an instance.
5796

5797
  This function checks if an instance is running, before calling
5798
  _ShutdownInstanceDisks.
5799

5800
  """
5801
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5802
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5803

    
5804

    
5805
def _ExpandCheckDisks(instance, disks):
5806
  """Return the instance disks selected by the disks list
5807

5808
  @type disks: list of L{objects.Disk} or None
5809
  @param disks: selected disks
5810
  @rtype: list of L{objects.Disk}
5811
  @return: selected instance disks to act on
5812

5813
  """
5814
  if disks is None:
5815
    return instance.disks
5816
  else:
5817
    if not set(disks).issubset(instance.disks):
5818
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5819
                                   " target instance")
5820
    return disks
5821

    
5822

    
5823
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5824
  """Shutdown block devices of an instance.
5825

5826
  This does the shutdown on all nodes of the instance.
5827

5828
  If the ignore_primary is false, errors on the primary node are
5829
  ignored.
5830

5831
  """
5832
  all_result = True
5833
  disks = _ExpandCheckDisks(instance, disks)
5834

    
5835
  for disk in disks:
5836
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5837
      lu.cfg.SetDiskID(top_disk, node)
5838
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5839
      msg = result.fail_msg
5840
      if msg:
5841
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5842
                      disk.iv_name, node, msg)
5843
        if ((node == instance.primary_node and not ignore_primary) or
5844
            (node != instance.primary_node and not result.offline)):
5845
          all_result = False
5846
  return all_result
5847

    
5848

    
5849
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5850
  """Checks if a node has enough free memory.
5851

5852
  This function check if a given node has the needed amount of free
5853
  memory. In case the node has less memory or we cannot get the
5854
  information from the node, this function raise an OpPrereqError
5855
  exception.
5856

5857
  @type lu: C{LogicalUnit}
5858
  @param lu: a logical unit from which we get configuration data
5859
  @type node: C{str}
5860
  @param node: the node to check
5861
  @type reason: C{str}
5862
  @param reason: string to use in the error message
5863
  @type requested: C{int}
5864
  @param requested: the amount of memory in MiB to check for
5865
  @type hypervisor_name: C{str}
5866
  @param hypervisor_name: the hypervisor to ask for memory stats
5867
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5868
      we cannot check the node
5869

5870
  """
5871
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5872
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5873
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5874
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5875
  if not isinstance(free_mem, int):
5876
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5877
                               " was '%s'" % (node, free_mem),
5878
                               errors.ECODE_ENVIRON)
5879
  if requested > free_mem:
5880
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5881
                               " needed %s MiB, available %s MiB" %
5882
                               (node, reason, requested, free_mem),
5883
                               errors.ECODE_NORES)
5884

    
5885

    
5886
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5887
  """Checks if nodes have enough free disk space in the all VGs.
5888

5889
  This function check if all given nodes have the needed amount of
5890
  free disk. In case any node has less disk or we cannot get the
5891
  information from the node, this function raise an OpPrereqError
5892
  exception.
5893

5894
  @type lu: C{LogicalUnit}
5895
  @param lu: a logical unit from which we get configuration data
5896
  @type nodenames: C{list}
5897
  @param nodenames: the list of node names to check
5898
  @type req_sizes: C{dict}
5899
  @param req_sizes: the hash of vg and corresponding amount of disk in
5900
      MiB to check for
5901
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5902
      or we cannot check the node
5903

5904
  """
5905
  for vg, req_size in req_sizes.items():
5906
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5907

    
5908

    
5909
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5910
  """Checks if nodes have enough free disk space in the specified VG.
5911

5912
  This function check if all given nodes have the needed amount of
5913
  free disk. In case any node has less disk or we cannot get the
5914
  information from the node, this function raise an OpPrereqError
5915
  exception.
5916

5917
  @type lu: C{LogicalUnit}
5918
  @param lu: a logical unit from which we get configuration data
5919
  @type nodenames: C{list}
5920
  @param nodenames: the list of node names to check
5921
  @type vg: C{str}
5922
  @param vg: the volume group to check
5923
  @type requested: C{int}
5924
  @param requested: the amount of disk in MiB to check for
5925
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5926
      or we cannot check the node
5927

5928
  """
5929
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5930
  for node in nodenames:
5931
    info = nodeinfo[node]
5932
    info.Raise("Cannot get current information from node %s" % node,
5933
               prereq=True, ecode=errors.ECODE_ENVIRON)
5934
    vg_free = info.payload.get("vg_free", None)
5935
    if not isinstance(vg_free, int):
5936
      raise errors.OpPrereqError("Can't compute free disk space on node"
5937
                                 " %s for vg %s, result was '%s'" %
5938
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5939
    if requested > vg_free:
5940
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5941
                                 " vg %s: required %d MiB, available %d MiB" %
5942
                                 (node, vg, requested, vg_free),
5943
                                 errors.ECODE_NORES)
5944

    
5945

    
5946
class LUInstanceStartup(LogicalUnit):
5947
  """Starts an instance.
5948

5949
  """
5950
  HPATH = "instance-start"
5951
  HTYPE = constants.HTYPE_INSTANCE
5952
  REQ_BGL = False
5953

    
5954
  def CheckArguments(self):
5955
    # extra beparams
5956
    if self.op.beparams:
5957
      # fill the beparams dict
5958
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5959

    
5960
  def ExpandNames(self):
5961
    self._ExpandAndLockInstance()
5962

    
5963
  def BuildHooksEnv(self):
5964
    """Build hooks env.
5965

5966
    This runs on master, primary and secondary nodes of the instance.
5967

5968
    """
5969
    env = {
5970
      "FORCE": self.op.force,
5971
      }
5972

    
5973
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5974

    
5975
    return env
5976

    
5977
  def BuildHooksNodes(self):
5978
    """Build hooks nodes.
5979

5980
    """
5981
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5982
    return (nl, nl)
5983

    
5984
  def CheckPrereq(self):
5985
    """Check prerequisites.
5986

5987
    This checks that the instance is in the cluster.
5988

5989
    """
5990
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5991
    assert self.instance is not None, \
5992
      "Cannot retrieve locked instance %s" % self.op.instance_name
5993

    
5994
    # extra hvparams
5995
    if self.op.hvparams:
5996
      # check hypervisor parameter syntax (locally)
5997
      cluster = self.cfg.GetClusterInfo()
5998
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5999
      filled_hvp = cluster.FillHV(instance)
6000
      filled_hvp.update(self.op.hvparams)
6001
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6002
      hv_type.CheckParameterSyntax(filled_hvp)
6003
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6004

    
6005
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6006

    
6007
    if self.primary_offline and self.op.ignore_offline_nodes:
6008
      self.proc.LogWarning("Ignoring offline primary node")
6009

    
6010
      if self.op.hvparams or self.op.beparams:
6011
        self.proc.LogWarning("Overridden parameters are ignored")
6012
    else:
6013
      _CheckNodeOnline(self, instance.primary_node)
6014

    
6015
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6016

    
6017
      # check bridges existence
6018
      _CheckInstanceBridgesExist(self, instance)
6019

    
6020
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6021
                                                instance.name,
6022
                                                instance.hypervisor)
6023
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6024
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6025
      if not remote_info.payload: # not running already
6026
        _CheckNodeFreeMemory(self, instance.primary_node,
6027
                             "starting instance %s" % instance.name,
6028
                             bep[constants.BE_MEMORY], instance.hypervisor)
6029

    
6030
  def Exec(self, feedback_fn):
6031
    """Start the instance.
6032

6033
    """
6034
    instance = self.instance
6035
    force = self.op.force
6036

    
6037
    if not self.op.no_remember:
6038
      self.cfg.MarkInstanceUp(instance.name)
6039

    
6040
    if self.primary_offline:
6041
      assert self.op.ignore_offline_nodes
6042
      self.proc.LogInfo("Primary node offline, marked instance as started")
6043
    else:
6044
      node_current = instance.primary_node
6045

    
6046
      _StartInstanceDisks(self, instance, force)
6047

    
6048
      result = self.rpc.call_instance_start(node_current, instance,
6049
                                            self.op.hvparams, self.op.beparams,
6050
                                            self.op.startup_paused)
6051
      msg = result.fail_msg
6052
      if msg:
6053
        _ShutdownInstanceDisks(self, instance)
6054
        raise errors.OpExecError("Could not start instance: %s" % msg)
6055

    
6056

    
6057
class LUInstanceReboot(LogicalUnit):
6058
  """Reboot an instance.
6059

6060
  """
6061
  HPATH = "instance-reboot"
6062
  HTYPE = constants.HTYPE_INSTANCE
6063
  REQ_BGL = False
6064

    
6065
  def ExpandNames(self):
6066
    self._ExpandAndLockInstance()
6067

    
6068
  def BuildHooksEnv(self):
6069
    """Build hooks env.
6070

6071
    This runs on master, primary and secondary nodes of the instance.
6072

6073
    """
6074
    env = {
6075
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6076
      "REBOOT_TYPE": self.op.reboot_type,
6077
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6078
      }
6079

    
6080
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6081

    
6082
    return env
6083

    
6084
  def BuildHooksNodes(self):
6085
    """Build hooks nodes.
6086

6087
    """
6088
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6089
    return (nl, nl)
6090

    
6091
  def CheckPrereq(self):
6092
    """Check prerequisites.
6093

6094
    This checks that the instance is in the cluster.
6095

6096
    """
6097
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6098
    assert self.instance is not None, \
6099
      "Cannot retrieve locked instance %s" % self.op.instance_name
6100

    
6101
    _CheckNodeOnline(self, instance.primary_node)
6102

    
6103
    # check bridges existence
6104
    _CheckInstanceBridgesExist(self, instance)
6105

    
6106
  def Exec(self, feedback_fn):
6107
    """Reboot the instance.
6108

6109
    """
6110
    instance = self.instance
6111
    ignore_secondaries = self.op.ignore_secondaries
6112
    reboot_type = self.op.reboot_type
6113

    
6114
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6115
                                              instance.name,
6116
                                              instance.hypervisor)
6117
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6118
    instance_running = bool(remote_info.payload)
6119

    
6120
    node_current = instance.primary_node
6121

    
6122
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6123
                                            constants.INSTANCE_REBOOT_HARD]:
6124
      for disk in instance.disks:
6125
        self.cfg.SetDiskID(disk, node_current)
6126
      result = self.rpc.call_instance_reboot(node_current, instance,
6127
                                             reboot_type,
6128
                                             self.op.shutdown_timeout)
6129
      result.Raise("Could not reboot instance")
6130
    else:
6131
      if instance_running:
6132
        result = self.rpc.call_instance_shutdown(node_current, instance,
6133
                                                 self.op.shutdown_timeout)
6134
        result.Raise("Could not shutdown instance for full reboot")
6135
        _ShutdownInstanceDisks(self, instance)
6136
      else:
6137
        self.LogInfo("Instance %s was already stopped, starting now",
6138
                     instance.name)
6139
      _StartInstanceDisks(self, instance, ignore_secondaries)
6140
      result = self.rpc.call_instance_start(node_current, instance,
6141
                                            None, None, False)
6142
      msg = result.fail_msg
6143
      if msg:
6144
        _ShutdownInstanceDisks(self, instance)
6145
        raise errors.OpExecError("Could not start instance for"
6146
                                 " full reboot: %s" % msg)
6147

    
6148
    self.cfg.MarkInstanceUp(instance.name)
6149

    
6150

    
6151
class LUInstanceShutdown(LogicalUnit):
6152
  """Shutdown an instance.
6153

6154
  """
6155
  HPATH = "instance-stop"
6156
  HTYPE = constants.HTYPE_INSTANCE
6157
  REQ_BGL = False
6158

    
6159
  def ExpandNames(self):
6160
    self._ExpandAndLockInstance()
6161

    
6162
  def BuildHooksEnv(self):
6163
    """Build hooks env.
6164

6165
    This runs on master, primary and secondary nodes of the instance.
6166

6167
    """
6168
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6169
    env["TIMEOUT"] = self.op.timeout
6170
    return env
6171

    
6172
  def BuildHooksNodes(self):
6173
    """Build hooks nodes.
6174

6175
    """
6176
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6177
    return (nl, nl)
6178

    
6179
  def CheckPrereq(self):
6180
    """Check prerequisites.
6181

6182
    This checks that the instance is in the cluster.
6183

6184
    """
6185
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6186
    assert self.instance is not None, \
6187
      "Cannot retrieve locked instance %s" % self.op.instance_name
6188

    
6189
    self.primary_offline = \
6190
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6191

    
6192
    if self.primary_offline and self.op.ignore_offline_nodes:
6193
      self.proc.LogWarning("Ignoring offline primary node")
6194
    else:
6195
      _CheckNodeOnline(self, self.instance.primary_node)
6196

    
6197
  def Exec(self, feedback_fn):
6198
    """Shutdown the instance.
6199

6200
    """
6201
    instance = self.instance
6202
    node_current = instance.primary_node
6203
    timeout = self.op.timeout
6204

    
6205
    if not self.op.no_remember:
6206
      self.cfg.MarkInstanceDown(instance.name)
6207

    
6208
    if self.primary_offline:
6209
      assert self.op.ignore_offline_nodes
6210
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6211
    else:
6212
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6213
      msg = result.fail_msg
6214
      if msg:
6215
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6216

    
6217
      _ShutdownInstanceDisks(self, instance)
6218

    
6219

    
6220
class LUInstanceReinstall(LogicalUnit):
6221
  """Reinstall an instance.
6222

6223
  """
6224
  HPATH = "instance-reinstall"
6225
  HTYPE = constants.HTYPE_INSTANCE
6226
  REQ_BGL = False
6227

    
6228
  def ExpandNames(self):
6229
    self._ExpandAndLockInstance()
6230

    
6231
  def BuildHooksEnv(self):
6232
    """Build hooks env.
6233

6234
    This runs on master, primary and secondary nodes of the instance.
6235

6236
    """
6237
    return _BuildInstanceHookEnvByObject(self, self.instance)
6238

    
6239
  def BuildHooksNodes(self):
6240
    """Build hooks nodes.
6241

6242
    """
6243
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6244
    return (nl, nl)
6245

    
6246
  def CheckPrereq(self):
6247
    """Check prerequisites.
6248

6249
    This checks that the instance is in the cluster and is not running.
6250

6251
    """
6252
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6253
    assert instance is not None, \
6254
      "Cannot retrieve locked instance %s" % self.op.instance_name
6255
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6256
                     " offline, cannot reinstall")
6257
    for node in instance.secondary_nodes:
6258
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6259
                       " cannot reinstall")
6260

    
6261
    if instance.disk_template == constants.DT_DISKLESS:
6262
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6263
                                 self.op.instance_name,
6264
                                 errors.ECODE_INVAL)
6265
    _CheckInstanceDown(self, instance, "cannot reinstall")
6266

    
6267
    if self.op.os_type is not None:
6268
      # OS verification
6269
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6270
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6271
      instance_os = self.op.os_type
6272
    else:
6273
      instance_os = instance.os
6274

    
6275
    nodelist = list(instance.all_nodes)
6276

    
6277
    if self.op.osparams:
6278
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6279
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6280
      self.os_inst = i_osdict # the new dict (without defaults)
6281
    else:
6282
      self.os_inst = None
6283

    
6284
    self.instance = instance
6285

    
6286
  def Exec(self, feedback_fn):
6287
    """Reinstall the instance.
6288

6289
    """
6290
    inst = self.instance
6291

    
6292
    if self.op.os_type is not None:
6293
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6294
      inst.os = self.op.os_type
6295
      # Write to configuration
6296
      self.cfg.Update(inst, feedback_fn)
6297

    
6298
    _StartInstanceDisks(self, inst, None)
6299
    try:
6300
      feedback_fn("Running the instance OS create scripts...")
6301
      # FIXME: pass debug option from opcode to backend
6302
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6303
                                             self.op.debug_level,
6304
                                             osparams=self.os_inst)
6305
      result.Raise("Could not install OS for instance %s on node %s" %
6306
                   (inst.name, inst.primary_node))
6307
    finally:
6308
      _ShutdownInstanceDisks(self, inst)
6309

    
6310

    
6311
class LUInstanceRecreateDisks(LogicalUnit):
6312
  """Recreate an instance's missing disks.
6313

6314
  """
6315
  HPATH = "instance-recreate-disks"
6316
  HTYPE = constants.HTYPE_INSTANCE
6317
  REQ_BGL = False
6318

    
6319
  def CheckArguments(self):
6320
    # normalise the disk list
6321
    self.op.disks = sorted(frozenset(self.op.disks))
6322

    
6323
  def ExpandNames(self):
6324
    self._ExpandAndLockInstance()
6325
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6326
    if self.op.nodes:
6327
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6328
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6329
    else:
6330
      self.needed_locks[locking.LEVEL_NODE] = []
6331

    
6332
  def DeclareLocks(self, level):
6333
    if level == locking.LEVEL_NODE:
6334
      # if we replace the nodes, we only need to lock the old primary,
6335
      # otherwise we need to lock all nodes for disk re-creation
6336
      primary_only = bool(self.op.nodes)
6337
      self._LockInstancesNodes(primary_only=primary_only)
6338

    
6339
  def BuildHooksEnv(self):
6340
    """Build hooks env.
6341

6342
    This runs on master, primary and secondary nodes of the instance.
6343

6344
    """
6345
    return _BuildInstanceHookEnvByObject(self, self.instance)
6346

    
6347
  def BuildHooksNodes(self):
6348
    """Build hooks nodes.
6349

6350
    """
6351
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6352
    return (nl, nl)
6353

    
6354
  def CheckPrereq(self):
6355
    """Check prerequisites.
6356

6357
    This checks that the instance is in the cluster and is not running.
6358

6359
    """
6360
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6361
    assert instance is not None, \
6362
      "Cannot retrieve locked instance %s" % self.op.instance_name
6363
    if self.op.nodes:
6364
      if len(self.op.nodes) != len(instance.all_nodes):
6365
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6366
                                   " %d replacement nodes were specified" %
6367
                                   (instance.name, len(instance.all_nodes),
6368
                                    len(self.op.nodes)),
6369
                                   errors.ECODE_INVAL)
6370
      assert instance.disk_template != constants.DT_DRBD8 or \
6371
          len(self.op.nodes) == 2
6372
      assert instance.disk_template != constants.DT_PLAIN or \
6373
          len(self.op.nodes) == 1
6374
      primary_node = self.op.nodes[0]
6375
    else:
6376
      primary_node = instance.primary_node
6377
    _CheckNodeOnline(self, primary_node)
6378

    
6379
    if instance.disk_template == constants.DT_DISKLESS:
6380
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6381
                                 self.op.instance_name, errors.ECODE_INVAL)
6382
    # if we replace nodes *and* the old primary is offline, we don't
6383
    # check
6384
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6385
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6386
    if not (self.op.nodes and old_pnode.offline):
6387
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6388

    
6389
    if not self.op.disks:
6390
      self.op.disks = range(len(instance.disks))
6391
    else:
6392
      for idx in self.op.disks:
6393
        if idx >= len(instance.disks):
6394
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6395
                                     errors.ECODE_INVAL)
6396
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6397
      raise errors.OpPrereqError("Can't recreate disks partially and"
6398
                                 " change the nodes at the same time",
6399
                                 errors.ECODE_INVAL)
6400
    self.instance = instance
6401

    
6402
  def Exec(self, feedback_fn):
6403
    """Recreate the disks.
6404

6405
    """
6406
    instance = self.instance
6407

    
6408
    to_skip = []
6409
    mods = [] # keeps track of needed logical_id changes
6410

    
6411
    for idx, disk in enumerate(instance.disks):
6412
      if idx not in self.op.disks: # disk idx has not been passed in
6413
        to_skip.append(idx)
6414
        continue
6415
      # update secondaries for disks, if needed
6416
      if self.op.nodes:
6417
        if disk.dev_type == constants.LD_DRBD8:
6418
          # need to update the nodes and minors
6419
          assert len(self.op.nodes) == 2
6420
          assert len(disk.logical_id) == 6 # otherwise disk internals
6421
                                           # have changed
6422
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6423
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6424
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6425
                    new_minors[0], new_minors[1], old_secret)
6426
          assert len(disk.logical_id) == len(new_id)
6427
          mods.append((idx, new_id))
6428

    
6429
    # now that we have passed all asserts above, we can apply the mods
6430
    # in a single run (to avoid partial changes)
6431
    for idx, new_id in mods:
6432
      instance.disks[idx].logical_id = new_id
6433

    
6434
    # change primary node, if needed
6435
    if self.op.nodes:
6436
      instance.primary_node = self.op.nodes[0]
6437
      self.LogWarning("Changing the instance's nodes, you will have to"
6438
                      " remove any disks left on the older nodes manually")
6439

    
6440
    if self.op.nodes:
6441
      self.cfg.Update(instance, feedback_fn)
6442

    
6443
    _CreateDisks(self, instance, to_skip=to_skip)
6444

    
6445

    
6446
class LUInstanceRename(LogicalUnit):
6447
  """Rename an instance.
6448

6449
  """
6450
  HPATH = "instance-rename"
6451
  HTYPE = constants.HTYPE_INSTANCE
6452

    
6453
  def CheckArguments(self):
6454
    """Check arguments.
6455

6456
    """
6457
    if self.op.ip_check and not self.op.name_check:
6458
      # TODO: make the ip check more flexible and not depend on the name check
6459
      raise errors.OpPrereqError("IP address check requires a name check",
6460
                                 errors.ECODE_INVAL)
6461

    
6462
  def BuildHooksEnv(self):
6463
    """Build hooks env.
6464

6465
    This runs on master, primary and secondary nodes of the instance.
6466

6467
    """
6468
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6469
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6470
    return env
6471

    
6472
  def BuildHooksNodes(self):
6473
    """Build hooks nodes.
6474

6475
    """
6476
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6477
    return (nl, nl)
6478

    
6479
  def CheckPrereq(self):
6480
    """Check prerequisites.
6481

6482
    This checks that the instance is in the cluster and is not running.
6483

6484
    """
6485
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6486
                                                self.op.instance_name)
6487
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6488
    assert instance is not None
6489
    _CheckNodeOnline(self, instance.primary_node)
6490
    _CheckInstanceDown(self, instance, "cannot rename")
6491
    self.instance = instance
6492

    
6493
    new_name = self.op.new_name
6494
    if self.op.name_check:
6495
      hostname = netutils.GetHostname(name=new_name)
6496
      if hostname.name != new_name:
6497
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6498
                     hostname.name)
6499
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6500
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6501
                                    " same as given hostname '%s'") %
6502
                                    (hostname.name, self.op.new_name),
6503
                                    errors.ECODE_INVAL)
6504
      new_name = self.op.new_name = hostname.name
6505
      if (self.op.ip_check and
6506
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6507
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6508
                                   (hostname.ip, new_name),
6509
                                   errors.ECODE_NOTUNIQUE)
6510

    
6511
    instance_list = self.cfg.GetInstanceList()
6512
    if new_name in instance_list and new_name != instance.name:
6513
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6514
                                 new_name, errors.ECODE_EXISTS)
6515

    
6516
  def Exec(self, feedback_fn):
6517
    """Rename the instance.
6518

6519
    """
6520
    inst = self.instance
6521
    old_name = inst.name
6522

    
6523
    rename_file_storage = False
6524
    if (inst.disk_template in constants.DTS_FILEBASED and
6525
        self.op.new_name != inst.name):
6526
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6527
      rename_file_storage = True
6528

    
6529
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6530
    # Change the instance lock. This is definitely safe while we hold the BGL.
6531
    # Otherwise the new lock would have to be added in acquired mode.
6532
    assert self.REQ_BGL
6533
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6534
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6535

    
6536
    # re-read the instance from the configuration after rename
6537
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6538

    
6539
    if rename_file_storage:
6540
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6541
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6542
                                                     old_file_storage_dir,
6543
                                                     new_file_storage_dir)
6544
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6545
                   " (but the instance has been renamed in Ganeti)" %
6546
                   (inst.primary_node, old_file_storage_dir,
6547
                    new_file_storage_dir))
6548

    
6549
    _StartInstanceDisks(self, inst, None)
6550
    try:
6551
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6552
                                                 old_name, self.op.debug_level)
6553
      msg = result.fail_msg
6554
      if msg:
6555
        msg = ("Could not run OS rename script for instance %s on node %s"
6556
               " (but the instance has been renamed in Ganeti): %s" %
6557
               (inst.name, inst.primary_node, msg))
6558
        self.proc.LogWarning(msg)
6559
    finally:
6560
      _ShutdownInstanceDisks(self, inst)
6561

    
6562
    return inst.name
6563

    
6564

    
6565
class LUInstanceRemove(LogicalUnit):
6566
  """Remove an instance.
6567

6568
  """
6569
  HPATH = "instance-remove"
6570
  HTYPE = constants.HTYPE_INSTANCE
6571
  REQ_BGL = False
6572

    
6573
  def ExpandNames(self):
6574
    self._ExpandAndLockInstance()
6575
    self.needed_locks[locking.LEVEL_NODE] = []
6576
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6577

    
6578
  def DeclareLocks(self, level):
6579
    if level == locking.LEVEL_NODE:
6580
      self._LockInstancesNodes()
6581

    
6582
  def BuildHooksEnv(self):
6583
    """Build hooks env.
6584

6585
    This runs on master, primary and secondary nodes of the instance.
6586

6587
    """
6588
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6589
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6590
    return env
6591

    
6592
  def BuildHooksNodes(self):
6593
    """Build hooks nodes.
6594

6595
    """
6596
    nl = [self.cfg.GetMasterNode()]
6597
    nl_post = list(self.instance.all_nodes) + nl
6598
    return (nl, nl_post)
6599

    
6600
  def CheckPrereq(self):
6601
    """Check prerequisites.
6602

6603
    This checks that the instance is in the cluster.
6604

6605
    """
6606
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6607
    assert self.instance is not None, \
6608
      "Cannot retrieve locked instance %s" % self.op.instance_name
6609

    
6610
  def Exec(self, feedback_fn):
6611
    """Remove the instance.
6612

6613
    """
6614
    instance = self.instance
6615
    logging.info("Shutting down instance %s on node %s",
6616
                 instance.name, instance.primary_node)
6617

    
6618
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6619
                                             self.op.shutdown_timeout)
6620
    msg = result.fail_msg
6621
    if msg:
6622
      if self.op.ignore_failures:
6623
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6624
      else:
6625
        raise errors.OpExecError("Could not shutdown instance %s on"
6626
                                 " node %s: %s" %
6627
                                 (instance.name, instance.primary_node, msg))
6628

    
6629
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6630

    
6631

    
6632
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6633
  """Utility function to remove an instance.
6634

6635
  """
6636
  logging.info("Removing block devices for instance %s", instance.name)
6637

    
6638
  if not _RemoveDisks(lu, instance):
6639
    if not ignore_failures:
6640
      raise errors.OpExecError("Can't remove instance's disks")
6641
    feedback_fn("Warning: can't remove instance's disks")
6642

    
6643
  logging.info("Removing instance %s out of cluster config", instance.name)
6644

    
6645
  lu.cfg.RemoveInstance(instance.name)
6646

    
6647
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6648
    "Instance lock removal conflict"
6649

    
6650
  # Remove lock for the instance
6651
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6652

    
6653

    
6654
class LUInstanceQuery(NoHooksLU):
6655
  """Logical unit for querying instances.
6656

6657
  """
6658
  # pylint: disable=W0142
6659
  REQ_BGL = False
6660

    
6661
  def CheckArguments(self):
6662
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6663
                             self.op.output_fields, self.op.use_locking)
6664

    
6665
  def ExpandNames(self):
6666
    self.iq.ExpandNames(self)
6667

    
6668
  def DeclareLocks(self, level):
6669
    self.iq.DeclareLocks(self, level)
6670

    
6671
  def Exec(self, feedback_fn):
6672
    return self.iq.OldStyleQuery(self)
6673

    
6674

    
6675
class LUInstanceFailover(LogicalUnit):
6676
  """Failover an instance.
6677

6678
  """
6679
  HPATH = "instance-failover"
6680
  HTYPE = constants.HTYPE_INSTANCE
6681
  REQ_BGL = False
6682

    
6683
  def CheckArguments(self):
6684
    """Check the arguments.
6685

6686
    """
6687
    self.iallocator = getattr(self.op, "iallocator", None)
6688
    self.target_node = getattr(self.op, "target_node", None)
6689

    
6690
  def ExpandNames(self):
6691
    self._ExpandAndLockInstance()
6692

    
6693
    if self.op.target_node is not None:
6694
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6695

    
6696
    self.needed_locks[locking.LEVEL_NODE] = []
6697
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6698

    
6699
    ignore_consistency = self.op.ignore_consistency
6700
    shutdown_timeout = self.op.shutdown_timeout
6701
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6702
                                       cleanup=False,
6703
                                       failover=True,
6704
                                       ignore_consistency=ignore_consistency,
6705
                                       shutdown_timeout=shutdown_timeout)
6706
    self.tasklets = [self._migrater]
6707

    
6708
  def DeclareLocks(self, level):
6709
    if level == locking.LEVEL_NODE:
6710
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6711
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6712
        if self.op.target_node is None:
6713
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6714
        else:
6715
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6716
                                                   self.op.target_node]
6717
        del self.recalculate_locks[locking.LEVEL_NODE]
6718
      else:
6719
        self._LockInstancesNodes()
6720

    
6721
  def BuildHooksEnv(self):
6722
    """Build hooks env.
6723

6724
    This runs on master, primary and secondary nodes of the instance.
6725

6726
    """
6727
    instance = self._migrater.instance
6728
    source_node = instance.primary_node
6729
    target_node = self.op.target_node
6730
    env = {
6731
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6732
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6733
      "OLD_PRIMARY": source_node,
6734
      "NEW_PRIMARY": target_node,
6735
      }
6736

    
6737
    if instance.disk_template in constants.DTS_INT_MIRROR:
6738
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6739
      env["NEW_SECONDARY"] = source_node
6740
    else:
6741
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6742

    
6743
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6744

    
6745
    return env
6746

    
6747
  def BuildHooksNodes(self):
6748
    """Build hooks nodes.
6749

6750
    """
6751
    instance = self._migrater.instance
6752
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6753
    return (nl, nl + [instance.primary_node])
6754

    
6755

    
6756
class LUInstanceMigrate(LogicalUnit):
6757
  """Migrate an instance.
6758

6759
  This is migration without shutting down, compared to the failover,
6760
  which is done with shutdown.
6761

6762
  """
6763
  HPATH = "instance-migrate"
6764
  HTYPE = constants.HTYPE_INSTANCE
6765
  REQ_BGL = False
6766

    
6767
  def ExpandNames(self):
6768
    self._ExpandAndLockInstance()
6769

    
6770
    if self.op.target_node is not None:
6771
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6772

    
6773
    self.needed_locks[locking.LEVEL_NODE] = []
6774
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6775

    
6776
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6777
                                       cleanup=self.op.cleanup,
6778
                                       failover=False,
6779
                                       fallback=self.op.allow_failover)
6780
    self.tasklets = [self._migrater]
6781

    
6782
  def DeclareLocks(self, level):
6783
    if level == locking.LEVEL_NODE:
6784
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6785
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6786
        if self.op.target_node is None:
6787
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6788
        else:
6789
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6790
                                                   self.op.target_node]
6791
        del self.recalculate_locks[locking.LEVEL_NODE]
6792
      else:
6793
        self._LockInstancesNodes()
6794

    
6795
  def BuildHooksEnv(self):
6796
    """Build hooks env.
6797

6798
    This runs on master, primary and secondary nodes of the instance.
6799

6800
    """
6801
    instance = self._migrater.instance
6802
    source_node = instance.primary_node
6803
    target_node = self.op.target_node
6804
    env = _BuildInstanceHookEnvByObject(self, instance)
6805
    env.update({
6806
      "MIGRATE_LIVE": self._migrater.live,
6807
      "MIGRATE_CLEANUP": self.op.cleanup,
6808
      "OLD_PRIMARY": source_node,
6809
      "NEW_PRIMARY": target_node,
6810
      })
6811

    
6812
    if instance.disk_template in constants.DTS_INT_MIRROR:
6813
      env["OLD_SECONDARY"] = target_node
6814
      env["NEW_SECONDARY"] = source_node
6815
    else:
6816
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6817

    
6818
    return env
6819

    
6820
  def BuildHooksNodes(self):
6821
    """Build hooks nodes.
6822

6823
    """
6824
    instance = self._migrater.instance
6825
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6826
    return (nl, nl + [instance.primary_node])
6827

    
6828

    
6829
class LUInstanceMove(LogicalUnit):
6830
  """Move an instance by data-copying.
6831

6832
  """
6833
  HPATH = "instance-move"
6834
  HTYPE = constants.HTYPE_INSTANCE
6835
  REQ_BGL = False
6836

    
6837
  def ExpandNames(self):
6838
    self._ExpandAndLockInstance()
6839
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6840
    self.op.target_node = target_node
6841
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6842
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6843

    
6844
  def DeclareLocks(self, level):
6845
    if level == locking.LEVEL_NODE:
6846
      self._LockInstancesNodes(primary_only=True)
6847

    
6848
  def BuildHooksEnv(self):
6849
    """Build hooks env.
6850

6851
    This runs on master, primary and secondary nodes of the instance.
6852

6853
    """
6854
    env = {
6855
      "TARGET_NODE": self.op.target_node,
6856
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6857
      }
6858
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6859
    return env
6860

    
6861
  def BuildHooksNodes(self):
6862
    """Build hooks nodes.
6863

6864
    """
6865
    nl = [
6866
      self.cfg.GetMasterNode(),
6867
      self.instance.primary_node,
6868
      self.op.target_node,
6869
      ]
6870
    return (nl, nl)
6871

    
6872
  def CheckPrereq(self):
6873
    """Check prerequisites.
6874

6875
    This checks that the instance is in the cluster.
6876

6877
    """
6878
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6879
    assert self.instance is not None, \
6880
      "Cannot retrieve locked instance %s" % self.op.instance_name
6881

    
6882
    node = self.cfg.GetNodeInfo(self.op.target_node)
6883
    assert node is not None, \
6884
      "Cannot retrieve locked node %s" % self.op.target_node
6885

    
6886
    self.target_node = target_node = node.name
6887

    
6888
    if target_node == instance.primary_node:
6889
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6890
                                 (instance.name, target_node),
6891
                                 errors.ECODE_STATE)
6892

    
6893
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6894

    
6895
    for idx, dsk in enumerate(instance.disks):
6896
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6897
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6898
                                   " cannot copy" % idx, errors.ECODE_STATE)
6899

    
6900
    _CheckNodeOnline(self, target_node)
6901
    _CheckNodeNotDrained(self, target_node)
6902
    _CheckNodeVmCapable(self, target_node)
6903

    
6904
    if instance.admin_up:
6905
      # check memory requirements on the secondary node
6906
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6907
                           instance.name, bep[constants.BE_MEMORY],
6908
                           instance.hypervisor)
6909
    else:
6910
      self.LogInfo("Not checking memory on the secondary node as"
6911
                   " instance will not be started")
6912

    
6913
    # check bridge existance
6914
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6915

    
6916
  def Exec(self, feedback_fn):
6917
    """Move an instance.
6918

6919
    The move is done by shutting it down on its present node, copying
6920
    the data over (slow) and starting it on the new node.
6921

6922
    """
6923
    instance = self.instance
6924

    
6925
    source_node = instance.primary_node
6926
    target_node = self.target_node
6927

    
6928
    self.LogInfo("Shutting down instance %s on source node %s",
6929
                 instance.name, source_node)
6930

    
6931
    result = self.rpc.call_instance_shutdown(source_node, instance,
6932
                                             self.op.shutdown_timeout)
6933
    msg = result.fail_msg
6934
    if msg:
6935
      if self.op.ignore_consistency:
6936
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6937
                             " Proceeding anyway. Please make sure node"
6938
                             " %s is down. Error details: %s",
6939
                             instance.name, source_node, source_node, msg)
6940
      else:
6941
        raise errors.OpExecError("Could not shutdown instance %s on"
6942
                                 " node %s: %s" %
6943
                                 (instance.name, source_node, msg))
6944

    
6945
    # create the target disks
6946
    try:
6947
      _CreateDisks(self, instance, target_node=target_node)
6948
    except errors.OpExecError:
6949
      self.LogWarning("Device creation failed, reverting...")
6950
      try:
6951
        _RemoveDisks(self, instance, target_node=target_node)
6952
      finally:
6953
        self.cfg.ReleaseDRBDMinors(instance.name)
6954
        raise
6955

    
6956
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6957

    
6958
    errs = []
6959
    # activate, get path, copy the data over
6960
    for idx, disk in enumerate(instance.disks):
6961
      self.LogInfo("Copying data for disk %d", idx)
6962
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6963
                                               instance.name, True, idx)
6964
      if result.fail_msg:
6965
        self.LogWarning("Can't assemble newly created disk %d: %s",
6966
                        idx, result.fail_msg)
6967
        errs.append(result.fail_msg)
6968
        break
6969
      dev_path = result.payload
6970
      result = self.rpc.call_blockdev_export(source_node, disk,
6971
                                             target_node, dev_path,
6972
                                             cluster_name)
6973
      if result.fail_msg:
6974
        self.LogWarning("Can't copy data over for disk %d: %s",
6975
                        idx, result.fail_msg)
6976
        errs.append(result.fail_msg)
6977
        break
6978

    
6979
    if errs:
6980
      self.LogWarning("Some disks failed to copy, aborting")
6981
      try:
6982
        _RemoveDisks(self, instance, target_node=target_node)
6983
      finally:
6984
        self.cfg.ReleaseDRBDMinors(instance.name)
6985
        raise errors.OpExecError("Errors during disk copy: %s" %
6986
                                 (",".join(errs),))
6987

    
6988
    instance.primary_node = target_node
6989
    self.cfg.Update(instance, feedback_fn)
6990

    
6991
    self.LogInfo("Removing the disks on the original node")
6992
    _RemoveDisks(self, instance, target_node=source_node)
6993

    
6994
    # Only start the instance if it's marked as up
6995
    if instance.admin_up:
6996
      self.LogInfo("Starting instance %s on node %s",
6997
                   instance.name, target_node)
6998

    
6999
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7000
                                           ignore_secondaries=True)
7001
      if not disks_ok:
7002
        _ShutdownInstanceDisks(self, instance)
7003
        raise errors.OpExecError("Can't activate the instance's disks")
7004

    
7005
      result = self.rpc.call_instance_start(target_node, instance,
7006
                                            None, None, False)
7007
      msg = result.fail_msg
7008
      if msg:
7009
        _ShutdownInstanceDisks(self, instance)
7010
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7011
                                 (instance.name, target_node, msg))
7012

    
7013

    
7014
class LUNodeMigrate(LogicalUnit):
7015
  """Migrate all instances from a node.
7016

7017
  """
7018
  HPATH = "node-migrate"
7019
  HTYPE = constants.HTYPE_NODE
7020
  REQ_BGL = False
7021

    
7022
  def CheckArguments(self):
7023
    pass
7024

    
7025
  def ExpandNames(self):
7026
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7027

    
7028
    self.share_locks = _ShareAll()
7029
    self.needed_locks = {
7030
      locking.LEVEL_NODE: [self.op.node_name],
7031
      }
7032

    
7033
  def BuildHooksEnv(self):
7034
    """Build hooks env.
7035

7036
    This runs on the master, the primary and all the secondaries.
7037

7038
    """
7039
    return {
7040
      "NODE_NAME": self.op.node_name,
7041
      }
7042

    
7043
  def BuildHooksNodes(self):
7044
    """Build hooks nodes.
7045

7046
    """
7047
    nl = [self.cfg.GetMasterNode()]
7048
    return (nl, nl)
7049

    
7050
  def CheckPrereq(self):
7051
    pass
7052

    
7053
  def Exec(self, feedback_fn):
7054
    # Prepare jobs for migration instances
7055
    jobs = [
7056
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7057
                                 mode=self.op.mode,
7058
                                 live=self.op.live,
7059
                                 iallocator=self.op.iallocator,
7060
                                 target_node=self.op.target_node)]
7061
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7062
      ]
7063

    
7064
    # TODO: Run iallocator in this opcode and pass correct placement options to
7065
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7066
    # running the iallocator and the actual migration, a good consistency model
7067
    # will have to be found.
7068

    
7069
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7070
            frozenset([self.op.node_name]))
7071

    
7072
    return ResultWithJobs(jobs)
7073

    
7074

    
7075
class TLMigrateInstance(Tasklet):
7076
  """Tasklet class for instance migration.
7077

7078
  @type live: boolean
7079
  @ivar live: whether the migration will be done live or non-live;
7080
      this variable is initalized only after CheckPrereq has run
7081
  @type cleanup: boolean
7082
  @ivar cleanup: Wheater we cleanup from a failed migration
7083
  @type iallocator: string
7084
  @ivar iallocator: The iallocator used to determine target_node
7085
  @type target_node: string
7086
  @ivar target_node: If given, the target_node to reallocate the instance to
7087
  @type failover: boolean
7088
  @ivar failover: Whether operation results in failover or migration
7089
  @type fallback: boolean
7090
  @ivar fallback: Whether fallback to failover is allowed if migration not
7091
                  possible
7092
  @type ignore_consistency: boolean
7093
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7094
                            and target node
7095
  @type shutdown_timeout: int
7096
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7097

7098
  """
7099
  def __init__(self, lu, instance_name, cleanup=False,
7100
               failover=False, fallback=False,
7101
               ignore_consistency=False,
7102
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7103
    """Initializes this class.
7104

7105
    """
7106
    Tasklet.__init__(self, lu)
7107

    
7108
    # Parameters
7109
    self.instance_name = instance_name
7110
    self.cleanup = cleanup
7111
    self.live = False # will be overridden later
7112
    self.failover = failover
7113
    self.fallback = fallback
7114
    self.ignore_consistency = ignore_consistency
7115
    self.shutdown_timeout = shutdown_timeout
7116

    
7117
  def CheckPrereq(self):
7118
    """Check prerequisites.
7119

7120
    This checks that the instance is in the cluster.
7121

7122
    """
7123
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7124
    instance = self.cfg.GetInstanceInfo(instance_name)
7125
    assert instance is not None
7126
    self.instance = instance
7127

    
7128
    if (not self.cleanup and not instance.admin_up and not self.failover and
7129
        self.fallback):
7130
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7131
                      " to failover")
7132
      self.failover = True
7133

    
7134
    if instance.disk_template not in constants.DTS_MIRRORED:
7135
      if self.failover:
7136
        text = "failovers"
7137
      else:
7138
        text = "migrations"
7139
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7140
                                 " %s" % (instance.disk_template, text),
7141
                                 errors.ECODE_STATE)
7142

    
7143
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7144
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7145

    
7146
      if self.lu.op.iallocator:
7147
        self._RunAllocator()
7148
      else:
7149
        # We set set self.target_node as it is required by
7150
        # BuildHooksEnv
7151
        self.target_node = self.lu.op.target_node
7152

    
7153
      # self.target_node is already populated, either directly or by the
7154
      # iallocator run
7155
      target_node = self.target_node
7156
      if self.target_node == instance.primary_node:
7157
        raise errors.OpPrereqError("Cannot migrate instance %s"
7158
                                   " to its primary (%s)" %
7159
                                   (instance.name, instance.primary_node))
7160

    
7161
      if len(self.lu.tasklets) == 1:
7162
        # It is safe to release locks only when we're the only tasklet
7163
        # in the LU
7164
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7165
                      keep=[instance.primary_node, self.target_node])
7166

    
7167
    else:
7168
      secondary_nodes = instance.secondary_nodes
7169
      if not secondary_nodes:
7170
        raise errors.ConfigurationError("No secondary node but using"
7171
                                        " %s disk template" %
7172
                                        instance.disk_template)
7173
      target_node = secondary_nodes[0]
7174
      if self.lu.op.iallocator or (self.lu.op.target_node and
7175
                                   self.lu.op.target_node != target_node):
7176
        if self.failover:
7177
          text = "failed over"
7178
        else:
7179
          text = "migrated"
7180
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7181
                                   " be %s to arbitrary nodes"
7182
                                   " (neither an iallocator nor a target"
7183
                                   " node can be passed)" %
7184
                                   (instance.disk_template, text),
7185
                                   errors.ECODE_INVAL)
7186

    
7187
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7188

    
7189
    # check memory requirements on the secondary node
7190
    if not self.cleanup and (not self.failover or instance.admin_up):
7191
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7192
                           instance.name, i_be[constants.BE_MEMORY],
7193
                           instance.hypervisor)
7194
    else:
7195
      self.lu.LogInfo("Not checking memory on the secondary node as"
7196
                      " instance will not be started")
7197

    
7198
    # check bridge existance
7199
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7200

    
7201
    if not self.cleanup:
7202
      _CheckNodeNotDrained(self.lu, target_node)
7203
      if not self.failover:
7204
        result = self.rpc.call_instance_migratable(instance.primary_node,
7205
                                                   instance)
7206
        if result.fail_msg and self.fallback:
7207
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7208
                          " failover")
7209
          self.failover = True
7210
        else:
7211
          result.Raise("Can't migrate, please use failover",
7212
                       prereq=True, ecode=errors.ECODE_STATE)
7213

    
7214
    assert not (self.failover and self.cleanup)
7215

    
7216
    if not self.failover:
7217
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7218
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7219
                                   " parameters are accepted",
7220
                                   errors.ECODE_INVAL)
7221
      if self.lu.op.live is not None:
7222
        if self.lu.op.live:
7223
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7224
        else:
7225
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7226
        # reset the 'live' parameter to None so that repeated
7227
        # invocations of CheckPrereq do not raise an exception
7228
        self.lu.op.live = None
7229
      elif self.lu.op.mode is None:
7230
        # read the default value from the hypervisor
7231
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7232
                                                skip_globals=False)
7233
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7234

    
7235
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7236
    else:
7237
      # Failover is never live
7238
      self.live = False
7239

    
7240
  def _RunAllocator(self):
7241
    """Run the allocator based on input opcode.
7242

7243
    """
7244
    ial = IAllocator(self.cfg, self.rpc,
7245
                     mode=constants.IALLOCATOR_MODE_RELOC,
7246
                     name=self.instance_name,
7247
                     # TODO See why hail breaks with a single node below
7248
                     relocate_from=[self.instance.primary_node,
7249
                                    self.instance.primary_node],
7250
                     )
7251

    
7252
    ial.Run(self.lu.op.iallocator)
7253

    
7254
    if not ial.success:
7255
      raise errors.OpPrereqError("Can't compute nodes using"
7256
                                 " iallocator '%s': %s" %
7257
                                 (self.lu.op.iallocator, ial.info),
7258
                                 errors.ECODE_NORES)
7259
    if len(ial.result) != ial.required_nodes:
7260
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7261
                                 " of nodes (%s), required %s" %
7262
                                 (self.lu.op.iallocator, len(ial.result),
7263
                                  ial.required_nodes), errors.ECODE_FAULT)
7264
    self.target_node = ial.result[0]
7265
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7266
                 self.instance_name, self.lu.op.iallocator,
7267
                 utils.CommaJoin(ial.result))
7268

    
7269
  def _WaitUntilSync(self):
7270
    """Poll with custom rpc for disk sync.
7271

7272
    This uses our own step-based rpc call.
7273

7274
    """
7275
    self.feedback_fn("* wait until resync is done")
7276
    all_done = False
7277
    while not all_done:
7278
      all_done = True
7279
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7280
                                            self.nodes_ip,
7281
                                            self.instance.disks)
7282
      min_percent = 100
7283
      for node, nres in result.items():
7284
        nres.Raise("Cannot resync disks on node %s" % node)
7285
        node_done, node_percent = nres.payload
7286
        all_done = all_done and node_done
7287
        if node_percent is not None:
7288
          min_percent = min(min_percent, node_percent)
7289
      if not all_done:
7290
        if min_percent < 100:
7291
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7292
        time.sleep(2)
7293

    
7294
  def _EnsureSecondary(self, node):
7295
    """Demote a node to secondary.
7296

7297
    """
7298
    self.feedback_fn("* switching node %s to secondary mode" % node)
7299

    
7300
    for dev in self.instance.disks:
7301
      self.cfg.SetDiskID(dev, node)
7302

    
7303
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7304
                                          self.instance.disks)
7305
    result.Raise("Cannot change disk to secondary on node %s" % node)
7306

    
7307
  def _GoStandalone(self):
7308
    """Disconnect from the network.
7309

7310
    """
7311
    self.feedback_fn("* changing into standalone mode")
7312
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7313
                                               self.instance.disks)
7314
    for node, nres in result.items():
7315
      nres.Raise("Cannot disconnect disks node %s" % node)
7316

    
7317
  def _GoReconnect(self, multimaster):
7318
    """Reconnect to the network.
7319

7320
    """
7321
    if multimaster:
7322
      msg = "dual-master"
7323
    else:
7324
      msg = "single-master"
7325
    self.feedback_fn("* changing disks into %s mode" % msg)
7326
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7327
                                           self.instance.disks,
7328
                                           self.instance.name, multimaster)
7329
    for node, nres in result.items():
7330
      nres.Raise("Cannot change disks config on node %s" % node)
7331

    
7332
  def _ExecCleanup(self):
7333
    """Try to cleanup after a failed migration.
7334

7335
    The cleanup is done by:
7336
      - check that the instance is running only on one node
7337
        (and update the config if needed)
7338
      - change disks on its secondary node to secondary
7339
      - wait until disks are fully synchronized
7340
      - disconnect from the network
7341
      - change disks into single-master mode
7342
      - wait again until disks are fully synchronized
7343

7344
    """
7345
    instance = self.instance
7346
    target_node = self.target_node
7347
    source_node = self.source_node
7348

    
7349
    # check running on only one node
7350
    self.feedback_fn("* checking where the instance actually runs"
7351
                     " (if this hangs, the hypervisor might be in"
7352
                     " a bad state)")
7353
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7354
    for node, result in ins_l.items():
7355
      result.Raise("Can't contact node %s" % node)
7356

    
7357
    runningon_source = instance.name in ins_l[source_node].payload
7358
    runningon_target = instance.name in ins_l[target_node].payload
7359

    
7360
    if runningon_source and runningon_target:
7361
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7362
                               " or the hypervisor is confused; you will have"
7363
                               " to ensure manually that it runs only on one"
7364
                               " and restart this operation")
7365

    
7366
    if not (runningon_source or runningon_target):
7367
      raise errors.OpExecError("Instance does not seem to be running at all;"
7368
                               " in this case it's safer to repair by"
7369
                               " running 'gnt-instance stop' to ensure disk"
7370
                               " shutdown, and then restarting it")
7371

    
7372
    if runningon_target:
7373
      # the migration has actually succeeded, we need to update the config
7374
      self.feedback_fn("* instance running on secondary node (%s),"
7375
                       " updating config" % target_node)
7376
      instance.primary_node = target_node
7377
      self.cfg.Update(instance, self.feedback_fn)
7378
      demoted_node = source_node
7379
    else:
7380
      self.feedback_fn("* instance confirmed to be running on its"
7381
                       " primary node (%s)" % source_node)
7382
      demoted_node = target_node
7383

    
7384
    if instance.disk_template in constants.DTS_INT_MIRROR:
7385
      self._EnsureSecondary(demoted_node)
7386
      try:
7387
        self._WaitUntilSync()
7388
      except errors.OpExecError:
7389
        # we ignore here errors, since if the device is standalone, it
7390
        # won't be able to sync
7391
        pass
7392
      self._GoStandalone()
7393
      self._GoReconnect(False)
7394
      self._WaitUntilSync()
7395

    
7396
    self.feedback_fn("* done")
7397

    
7398
  def _RevertDiskStatus(self):
7399
    """Try to revert the disk status after a failed migration.
7400

7401
    """
7402
    target_node = self.target_node
7403
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7404
      return
7405

    
7406
    try:
7407
      self._EnsureSecondary(target_node)
7408
      self._GoStandalone()
7409
      self._GoReconnect(False)
7410
      self._WaitUntilSync()
7411
    except errors.OpExecError, err:
7412
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7413
                         " please try to recover the instance manually;"
7414
                         " error '%s'" % str(err))
7415

    
7416
  def _AbortMigration(self):
7417
    """Call the hypervisor code to abort a started migration.
7418

7419
    """
7420
    instance = self.instance
7421
    target_node = self.target_node
7422
    migration_info = self.migration_info
7423

    
7424
    abort_result = self.rpc.call_finalize_migration(target_node,
7425
                                                    instance,
7426
                                                    migration_info,
7427
                                                    False)
7428
    abort_msg = abort_result.fail_msg
7429
    if abort_msg:
7430
      logging.error("Aborting migration failed on target node %s: %s",
7431
                    target_node, abort_msg)
7432
      # Don't raise an exception here, as we stil have to try to revert the
7433
      # disk status, even if this step failed.
7434

    
7435
  def _ExecMigration(self):
7436
    """Migrate an instance.
7437

7438
    The migrate is done by:
7439
      - change the disks into dual-master mode
7440
      - wait until disks are fully synchronized again
7441
      - migrate the instance
7442
      - change disks on the new secondary node (the old primary) to secondary
7443
      - wait until disks are fully synchronized
7444
      - change disks into single-master mode
7445

7446
    """
7447
    instance = self.instance
7448
    target_node = self.target_node
7449
    source_node = self.source_node
7450

    
7451
    # Check for hypervisor version mismatch and warn the user.
7452
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7453
                                       None, self.instance.hypervisor)
7454
    src_info = nodeinfo[source_node]
7455
    dst_info = nodeinfo[target_node]
7456

    
7457
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7458
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7459
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7460
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7461
      if src_version != dst_version:
7462
        self.feedback_fn("* warning: hypervisor version mismatch between"
7463
                         " source (%s) and target (%s) node" %
7464
                         (src_version, dst_version))
7465

    
7466
    self.feedback_fn("* checking disk consistency between source and target")
7467
    for dev in instance.disks:
7468
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7469
        raise errors.OpExecError("Disk %s is degraded or not fully"
7470
                                 " synchronized on target node,"
7471
                                 " aborting migration" % dev.iv_name)
7472

    
7473
    # First get the migration information from the remote node
7474
    result = self.rpc.call_migration_info(source_node, instance)
7475
    msg = result.fail_msg
7476
    if msg:
7477
      log_err = ("Failed fetching source migration information from %s: %s" %
7478
                 (source_node, msg))
7479
      logging.error(log_err)
7480
      raise errors.OpExecError(log_err)
7481

    
7482
    self.migration_info = migration_info = result.payload
7483

    
7484
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7485
      # Then switch the disks to master/master mode
7486
      self._EnsureSecondary(target_node)
7487
      self._GoStandalone()
7488
      self._GoReconnect(True)
7489
      self._WaitUntilSync()
7490

    
7491
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7492
    result = self.rpc.call_accept_instance(target_node,
7493
                                           instance,
7494
                                           migration_info,
7495
                                           self.nodes_ip[target_node])
7496

    
7497
    msg = result.fail_msg
7498
    if msg:
7499
      logging.error("Instance pre-migration failed, trying to revert"
7500
                    " disk status: %s", msg)
7501
      self.feedback_fn("Pre-migration failed, aborting")
7502
      self._AbortMigration()
7503
      self._RevertDiskStatus()
7504
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7505
                               (instance.name, msg))
7506

    
7507
    self.feedback_fn("* migrating instance to %s" % target_node)
7508
    result = self.rpc.call_instance_migrate(source_node, instance,
7509
                                            self.nodes_ip[target_node],
7510
                                            self.live)
7511
    msg = result.fail_msg
7512
    if msg:
7513
      logging.error("Instance migration failed, trying to revert"
7514
                    " disk status: %s", msg)
7515
      self.feedback_fn("Migration failed, aborting")
7516
      self._AbortMigration()
7517
      self._RevertDiskStatus()
7518
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7519
                               (instance.name, msg))
7520

    
7521
    instance.primary_node = target_node
7522
    # distribute new instance config to the other nodes
7523
    self.cfg.Update(instance, self.feedback_fn)
7524

    
7525
    result = self.rpc.call_finalize_migration(target_node,
7526
                                              instance,
7527
                                              migration_info,
7528
                                              True)
7529
    msg = result.fail_msg
7530
    if msg:
7531
      logging.error("Instance migration succeeded, but finalization failed:"
7532
                    " %s", msg)
7533
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7534
                               msg)
7535

    
7536
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7537
      self._EnsureSecondary(source_node)
7538
      self._WaitUntilSync()
7539
      self._GoStandalone()
7540
      self._GoReconnect(False)
7541
      self._WaitUntilSync()
7542

    
7543
    self.feedback_fn("* done")
7544

    
7545
  def _ExecFailover(self):
7546
    """Failover an instance.
7547

7548
    The failover is done by shutting it down on its present node and
7549
    starting it on the secondary.
7550

7551
    """
7552
    instance = self.instance
7553
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7554

    
7555
    source_node = instance.primary_node
7556
    target_node = self.target_node
7557

    
7558
    if instance.admin_up:
7559
      self.feedback_fn("* checking disk consistency between source and target")
7560
      for dev in instance.disks:
7561
        # for drbd, these are drbd over lvm
7562
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7563
          if primary_node.offline:
7564
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7565
                             " target node %s" %
7566
                             (primary_node.name, dev.iv_name, target_node))
7567
          elif not self.ignore_consistency:
7568
            raise errors.OpExecError("Disk %s is degraded on target node,"
7569
                                     " aborting failover" % dev.iv_name)
7570
    else:
7571
      self.feedback_fn("* not checking disk consistency as instance is not"
7572
                       " running")
7573

    
7574
    self.feedback_fn("* shutting down instance on source node")
7575
    logging.info("Shutting down instance %s on node %s",
7576
                 instance.name, source_node)
7577

    
7578
    result = self.rpc.call_instance_shutdown(source_node, instance,
7579
                                             self.shutdown_timeout)
7580
    msg = result.fail_msg
7581
    if msg:
7582
      if self.ignore_consistency or primary_node.offline:
7583
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7584
                           " proceeding anyway; please make sure node"
7585
                           " %s is down; error details: %s",
7586
                           instance.name, source_node, source_node, msg)
7587
      else:
7588
        raise errors.OpExecError("Could not shutdown instance %s on"
7589
                                 " node %s: %s" %
7590
                                 (instance.name, source_node, msg))
7591

    
7592
    self.feedback_fn("* deactivating the instance's disks on source node")
7593
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7594
      raise errors.OpExecError("Can't shut down the instance's disks")
7595

    
7596
    instance.primary_node = target_node
7597
    # distribute new instance config to the other nodes
7598
    self.cfg.Update(instance, self.feedback_fn)
7599

    
7600
    # Only start the instance if it's marked as up
7601
    if instance.admin_up:
7602
      self.feedback_fn("* activating the instance's disks on target node %s" %
7603
                       target_node)
7604
      logging.info("Starting instance %s on node %s",
7605
                   instance.name, target_node)
7606

    
7607
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7608
                                           ignore_secondaries=True)
7609
      if not disks_ok:
7610
        _ShutdownInstanceDisks(self.lu, instance)
7611
        raise errors.OpExecError("Can't activate the instance's disks")
7612

    
7613
      self.feedback_fn("* starting the instance on the target node %s" %
7614
                       target_node)
7615
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7616
                                            False)
7617
      msg = result.fail_msg
7618
      if msg:
7619
        _ShutdownInstanceDisks(self.lu, instance)
7620
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7621
                                 (instance.name, target_node, msg))
7622

    
7623
  def Exec(self, feedback_fn):
7624
    """Perform the migration.
7625

7626
    """
7627
    self.feedback_fn = feedback_fn
7628
    self.source_node = self.instance.primary_node
7629

    
7630
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7631
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7632
      self.target_node = self.instance.secondary_nodes[0]
7633
      # Otherwise self.target_node has been populated either
7634
      # directly, or through an iallocator.
7635

    
7636
    self.all_nodes = [self.source_node, self.target_node]
7637
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7638
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7639

    
7640
    if self.failover:
7641
      feedback_fn("Failover instance %s" % self.instance.name)
7642
      self._ExecFailover()
7643
    else:
7644
      feedback_fn("Migrating instance %s" % self.instance.name)
7645

    
7646
      if self.cleanup:
7647
        return self._ExecCleanup()
7648
      else:
7649
        return self._ExecMigration()
7650

    
7651

    
7652
def _CreateBlockDev(lu, node, instance, device, force_create,
7653
                    info, force_open):
7654
  """Create a tree of block devices on a given node.
7655

7656
  If this device type has to be created on secondaries, create it and
7657
  all its children.
7658

7659
  If not, just recurse to children keeping the same 'force' value.
7660

7661
  @param lu: the lu on whose behalf we execute
7662
  @param node: the node on which to create the device
7663
  @type instance: L{objects.Instance}
7664
  @param instance: the instance which owns the device
7665
  @type device: L{objects.Disk}
7666
  @param device: the device to create
7667
  @type force_create: boolean
7668
  @param force_create: whether to force creation of this device; this
7669
      will be change to True whenever we find a device which has
7670
      CreateOnSecondary() attribute
7671
  @param info: the extra 'metadata' we should attach to the device
7672
      (this will be represented as a LVM tag)
7673
  @type force_open: boolean
7674
  @param force_open: this parameter will be passes to the
7675
      L{backend.BlockdevCreate} function where it specifies
7676
      whether we run on primary or not, and it affects both
7677
      the child assembly and the device own Open() execution
7678

7679
  """
7680
  if device.CreateOnSecondary():
7681
    force_create = True
7682

    
7683
  if device.children:
7684
    for child in device.children:
7685
      _CreateBlockDev(lu, node, instance, child, force_create,
7686
                      info, force_open)
7687

    
7688
  if not force_create:
7689
    return
7690

    
7691
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7692

    
7693

    
7694
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7695
  """Create a single block device on a given node.
7696

7697
  This will not recurse over children of the device, so they must be
7698
  created in advance.
7699

7700
  @param lu: the lu on whose behalf we execute
7701
  @param node: the node on which to create the device
7702
  @type instance: L{objects.Instance}
7703
  @param instance: the instance which owns the device
7704
  @type device: L{objects.Disk}
7705
  @param device: the device to create
7706
  @param info: the extra 'metadata' we should attach to the device
7707
      (this will be represented as a LVM tag)
7708
  @type force_open: boolean
7709
  @param force_open: this parameter will be passes to the
7710
      L{backend.BlockdevCreate} function where it specifies
7711
      whether we run on primary or not, and it affects both
7712
      the child assembly and the device own Open() execution
7713

7714
  """
7715
  lu.cfg.SetDiskID(device, node)
7716
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7717
                                       instance.name, force_open, info)
7718
  result.Raise("Can't create block device %s on"
7719
               " node %s for instance %s" % (device, node, instance.name))
7720
  if device.physical_id is None:
7721
    device.physical_id = result.payload
7722

    
7723

    
7724
def _GenerateUniqueNames(lu, exts):
7725
  """Generate a suitable LV name.
7726

7727
  This will generate a logical volume name for the given instance.
7728

7729
  """
7730
  results = []
7731
  for val in exts:
7732
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7733
    results.append("%s%s" % (new_id, val))
7734
  return results
7735

    
7736

    
7737
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7738
                         iv_name, p_minor, s_minor):
7739
  """Generate a drbd8 device complete with its children.
7740

7741
  """
7742
  assert len(vgnames) == len(names) == 2
7743
  port = lu.cfg.AllocatePort()
7744
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7745
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7746
                          logical_id=(vgnames[0], names[0]))
7747
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7748
                          logical_id=(vgnames[1], names[1]))
7749
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7750
                          logical_id=(primary, secondary, port,
7751
                                      p_minor, s_minor,
7752
                                      shared_secret),
7753
                          children=[dev_data, dev_meta],
7754
                          iv_name=iv_name)
7755
  return drbd_dev
7756

    
7757

    
7758
def _GenerateDiskTemplate(lu, template_name,
7759
                          instance_name, primary_node,
7760
                          secondary_nodes, disk_info,
7761
                          file_storage_dir, file_driver,
7762
                          base_index, feedback_fn):
7763
  """Generate the entire disk layout for a given template type.
7764

7765
  """
7766
  #TODO: compute space requirements
7767

    
7768
  vgname = lu.cfg.GetVGName()
7769
  disk_count = len(disk_info)
7770
  disks = []
7771
  if template_name == constants.DT_DISKLESS:
7772
    pass
7773
  elif template_name == constants.DT_PLAIN:
7774
    if len(secondary_nodes) != 0:
7775
      raise errors.ProgrammerError("Wrong template configuration")
7776

    
7777
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7778
                                      for i in range(disk_count)])
7779
    for idx, disk in enumerate(disk_info):
7780
      disk_index = idx + base_index
7781
      vg = disk.get(constants.IDISK_VG, vgname)
7782
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7783
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7784
                              size=disk[constants.IDISK_SIZE],
7785
                              logical_id=(vg, names[idx]),
7786
                              iv_name="disk/%d" % disk_index,
7787
                              mode=disk[constants.IDISK_MODE])
7788
      disks.append(disk_dev)
7789
  elif template_name == constants.DT_DRBD8:
7790
    if len(secondary_nodes) != 1:
7791
      raise errors.ProgrammerError("Wrong template configuration")
7792
    remote_node = secondary_nodes[0]
7793
    minors = lu.cfg.AllocateDRBDMinor(
7794
      [primary_node, remote_node] * len(disk_info), instance_name)
7795

    
7796
    names = []
7797
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7798
                                               for i in range(disk_count)]):
7799
      names.append(lv_prefix + "_data")
7800
      names.append(lv_prefix + "_meta")
7801
    for idx, disk in enumerate(disk_info):
7802
      disk_index = idx + base_index
7803
      data_vg = disk.get(constants.IDISK_VG, vgname)
7804
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7805
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7806
                                      disk[constants.IDISK_SIZE],
7807
                                      [data_vg, meta_vg],
7808
                                      names[idx * 2:idx * 2 + 2],
7809
                                      "disk/%d" % disk_index,
7810
                                      minors[idx * 2], minors[idx * 2 + 1])
7811
      disk_dev.mode = disk[constants.IDISK_MODE]
7812
      disks.append(disk_dev)
7813
  elif template_name == constants.DT_FILE:
7814
    if len(secondary_nodes) != 0:
7815
      raise errors.ProgrammerError("Wrong template configuration")
7816

    
7817
    opcodes.RequireFileStorage()
7818

    
7819
    for idx, disk in enumerate(disk_info):
7820
      disk_index = idx + base_index
7821
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7822
                              size=disk[constants.IDISK_SIZE],
7823
                              iv_name="disk/%d" % disk_index,
7824
                              logical_id=(file_driver,
7825
                                          "%s/disk%d" % (file_storage_dir,
7826
                                                         disk_index)),
7827
                              mode=disk[constants.IDISK_MODE])
7828
      disks.append(disk_dev)
7829
  elif template_name == constants.DT_SHARED_FILE:
7830
    if len(secondary_nodes) != 0:
7831
      raise errors.ProgrammerError("Wrong template configuration")
7832

    
7833
    opcodes.RequireSharedFileStorage()
7834

    
7835
    for idx, disk in enumerate(disk_info):
7836
      disk_index = idx + base_index
7837
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7838
                              size=disk[constants.IDISK_SIZE],
7839
                              iv_name="disk/%d" % disk_index,
7840
                              logical_id=(file_driver,
7841
                                          "%s/disk%d" % (file_storage_dir,
7842
                                                         disk_index)),
7843
                              mode=disk[constants.IDISK_MODE])
7844
      disks.append(disk_dev)
7845
  elif template_name == constants.DT_BLOCK:
7846
    if len(secondary_nodes) != 0:
7847
      raise errors.ProgrammerError("Wrong template configuration")
7848

    
7849
    for idx, disk in enumerate(disk_info):
7850
      disk_index = idx + base_index
7851
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7852
                              size=disk[constants.IDISK_SIZE],
7853
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7854
                                          disk[constants.IDISK_ADOPT]),
7855
                              iv_name="disk/%d" % disk_index,
7856
                              mode=disk[constants.IDISK_MODE])
7857
      disks.append(disk_dev)
7858

    
7859
  else:
7860
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7861
  return disks
7862

    
7863

    
7864
def _GetInstanceInfoText(instance):
7865
  """Compute that text that should be added to the disk's metadata.
7866

7867
  """
7868
  return "originstname+%s" % instance.name
7869

    
7870

    
7871
def _CalcEta(time_taken, written, total_size):
7872
  """Calculates the ETA based on size written and total size.
7873

7874
  @param time_taken: The time taken so far
7875
  @param written: amount written so far
7876
  @param total_size: The total size of data to be written
7877
  @return: The remaining time in seconds
7878

7879
  """
7880
  avg_time = time_taken / float(written)
7881
  return (total_size - written) * avg_time
7882

    
7883

    
7884
def _WipeDisks(lu, instance):
7885
  """Wipes instance disks.
7886

7887
  @type lu: L{LogicalUnit}
7888
  @param lu: the logical unit on whose behalf we execute
7889
  @type instance: L{objects.Instance}
7890
  @param instance: the instance whose disks we should create
7891
  @return: the success of the wipe
7892

7893
  """
7894
  node = instance.primary_node
7895

    
7896
  for device in instance.disks:
7897
    lu.cfg.SetDiskID(device, node)
7898

    
7899
  logging.info("Pause sync of instance %s disks", instance.name)
7900
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7901

    
7902
  for idx, success in enumerate(result.payload):
7903
    if not success:
7904
      logging.warn("pause-sync of instance %s for disks %d failed",
7905
                   instance.name, idx)
7906

    
7907
  try:
7908
    for idx, device in enumerate(instance.disks):
7909
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7910
      # MAX_WIPE_CHUNK at max
7911
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7912
                            constants.MIN_WIPE_CHUNK_PERCENT)
7913
      # we _must_ make this an int, otherwise rounding errors will
7914
      # occur
7915
      wipe_chunk_size = int(wipe_chunk_size)
7916

    
7917
      lu.LogInfo("* Wiping disk %d", idx)
7918
      logging.info("Wiping disk %d for instance %s, node %s using"
7919
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7920

    
7921
      offset = 0
7922
      size = device.size
7923
      last_output = 0
7924
      start_time = time.time()
7925

    
7926
      while offset < size:
7927
        wipe_size = min(wipe_chunk_size, size - offset)
7928
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7929
                      idx, offset, wipe_size)
7930
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7931
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7932
                     (idx, offset, wipe_size))
7933
        now = time.time()
7934
        offset += wipe_size
7935
        if now - last_output >= 60:
7936
          eta = _CalcEta(now - start_time, offset, size)
7937
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7938
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7939
          last_output = now
7940
  finally:
7941
    logging.info("Resume sync of instance %s disks", instance.name)
7942

    
7943
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7944

    
7945
    for idx, success in enumerate(result.payload):
7946
      if not success:
7947
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7948
                      " look at the status and troubleshoot the issue", idx)
7949
        logging.warn("resume-sync of instance %s for disks %d failed",
7950
                     instance.name, idx)
7951

    
7952

    
7953
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7954
  """Create all disks for an instance.
7955

7956
  This abstracts away some work from AddInstance.
7957

7958
  @type lu: L{LogicalUnit}
7959
  @param lu: the logical unit on whose behalf we execute
7960
  @type instance: L{objects.Instance}
7961
  @param instance: the instance whose disks we should create
7962
  @type to_skip: list
7963
  @param to_skip: list of indices to skip
7964
  @type target_node: string
7965
  @param target_node: if passed, overrides the target node for creation
7966
  @rtype: boolean
7967
  @return: the success of the creation
7968

7969
  """
7970
  info = _GetInstanceInfoText(instance)
7971
  if target_node is None:
7972
    pnode = instance.primary_node
7973
    all_nodes = instance.all_nodes
7974
  else:
7975
    pnode = target_node
7976
    all_nodes = [pnode]
7977

    
7978
  if instance.disk_template in constants.DTS_FILEBASED:
7979
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7980
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7981

    
7982
    result.Raise("Failed to create directory '%s' on"
7983
                 " node %s" % (file_storage_dir, pnode))
7984

    
7985
  # Note: this needs to be kept in sync with adding of disks in
7986
  # LUInstanceSetParams
7987
  for idx, device in enumerate(instance.disks):
7988
    if to_skip and idx in to_skip:
7989
      continue
7990
    logging.info("Creating volume %s for instance %s",
7991
                 device.iv_name, instance.name)
7992
    #HARDCODE
7993
    for node in all_nodes:
7994
      f_create = node == pnode
7995
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7996

    
7997

    
7998
def _RemoveDisks(lu, instance, target_node=None):
7999
  """Remove all disks for an instance.
8000

8001
  This abstracts away some work from `AddInstance()` and
8002
  `RemoveInstance()`. Note that in case some of the devices couldn't
8003
  be removed, the removal will continue with the other ones (compare
8004
  with `_CreateDisks()`).
8005

8006
  @type lu: L{LogicalUnit}
8007
  @param lu: the logical unit on whose behalf we execute
8008
  @type instance: L{objects.Instance}
8009
  @param instance: the instance whose disks we should remove
8010
  @type target_node: string
8011
  @param target_node: used to override the node on which to remove the disks
8012
  @rtype: boolean
8013
  @return: the success of the removal
8014

8015
  """
8016
  logging.info("Removing block devices for instance %s", instance.name)
8017

    
8018
  all_result = True
8019
  for device in instance.disks:
8020
    if target_node:
8021
      edata = [(target_node, device)]
8022
    else:
8023
      edata = device.ComputeNodeTree(instance.primary_node)
8024
    for node, disk in edata:
8025
      lu.cfg.SetDiskID(disk, node)
8026
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8027
      if msg:
8028
        lu.LogWarning("Could not remove block device %s on node %s,"
8029
                      " continuing anyway: %s", device.iv_name, node, msg)
8030
        all_result = False
8031

    
8032
    # if this is a DRBD disk, return its port to the pool
8033
    if device.dev_type in constants.LDS_DRBD:
8034
      tcp_port = device.logical_id[2]
8035
      lu.cfg.AddTcpUdpPort(tcp_port)
8036

    
8037
  if instance.disk_template == constants.DT_FILE:
8038
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8039
    if target_node:
8040
      tgt = target_node
8041
    else:
8042
      tgt = instance.primary_node
8043
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8044
    if result.fail_msg:
8045
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8046
                    file_storage_dir, instance.primary_node, result.fail_msg)
8047
      all_result = False
8048

    
8049
  return all_result
8050

    
8051

    
8052
def _ComputeDiskSizePerVG(disk_template, disks):
8053
  """Compute disk size requirements in the volume group
8054

8055
  """
8056
  def _compute(disks, payload):
8057
    """Universal algorithm.
8058

8059
    """
8060
    vgs = {}
8061
    for disk in disks:
8062
      vgs[disk[constants.IDISK_VG]] = \
8063
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8064

    
8065
    return vgs
8066

    
8067
  # Required free disk space as a function of disk and swap space
8068
  req_size_dict = {
8069
    constants.DT_DISKLESS: {},
8070
    constants.DT_PLAIN: _compute(disks, 0),
8071
    # 128 MB are added for drbd metadata for each disk
8072
    constants.DT_DRBD8: _compute(disks, 128),
8073
    constants.DT_FILE: {},
8074
    constants.DT_SHARED_FILE: {},
8075
  }
8076

    
8077
  if disk_template not in req_size_dict:
8078
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8079
                                 " is unknown" % disk_template)
8080

    
8081
  return req_size_dict[disk_template]
8082

    
8083

    
8084
def _ComputeDiskSize(disk_template, disks):
8085
  """Compute disk size requirements in the volume group
8086

8087
  """
8088
  # Required free disk space as a function of disk and swap space
8089
  req_size_dict = {
8090
    constants.DT_DISKLESS: None,
8091
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8092
    # 128 MB are added for drbd metadata for each disk
8093
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8094
    constants.DT_FILE: None,
8095
    constants.DT_SHARED_FILE: 0,
8096
    constants.DT_BLOCK: 0,
8097
  }
8098

    
8099
  if disk_template not in req_size_dict:
8100
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8101
                                 " is unknown" % disk_template)
8102

    
8103
  return req_size_dict[disk_template]
8104

    
8105

    
8106
def _FilterVmNodes(lu, nodenames):
8107
  """Filters out non-vm_capable nodes from a list.
8108

8109
  @type lu: L{LogicalUnit}
8110
  @param lu: the logical unit for which we check
8111
  @type nodenames: list
8112
  @param nodenames: the list of nodes on which we should check
8113
  @rtype: list
8114
  @return: the list of vm-capable nodes
8115

8116
  """
8117
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8118
  return [name for name in nodenames if name not in vm_nodes]
8119

    
8120

    
8121
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8122
  """Hypervisor parameter validation.
8123

8124
  This function abstract the hypervisor parameter validation to be
8125
  used in both instance create and instance modify.
8126

8127
  @type lu: L{LogicalUnit}
8128
  @param lu: the logical unit for which we check
8129
  @type nodenames: list
8130
  @param nodenames: the list of nodes on which we should check
8131
  @type hvname: string
8132
  @param hvname: the name of the hypervisor we should use
8133
  @type hvparams: dict
8134
  @param hvparams: the parameters which we need to check
8135
  @raise errors.OpPrereqError: if the parameters are not valid
8136

8137
  """
8138
  nodenames = _FilterVmNodes(lu, nodenames)
8139
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8140
                                                  hvname,
8141
                                                  hvparams)
8142
  for node in nodenames:
8143
    info = hvinfo[node]
8144
    if info.offline:
8145
      continue
8146
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8147

    
8148

    
8149
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8150
  """OS parameters validation.
8151

8152
  @type lu: L{LogicalUnit}
8153
  @param lu: the logical unit for which we check
8154
  @type required: boolean
8155
  @param required: whether the validation should fail if the OS is not
8156
      found
8157
  @type nodenames: list
8158
  @param nodenames: the list of nodes on which we should check
8159
  @type osname: string
8160
  @param osname: the name of the hypervisor we should use
8161
  @type osparams: dict
8162
  @param osparams: the parameters which we need to check
8163
  @raise errors.OpPrereqError: if the parameters are not valid
8164

8165
  """
8166
  nodenames = _FilterVmNodes(lu, nodenames)
8167
  result = lu.rpc.call_os_validate(required, nodenames, osname,
8168
                                   [constants.OS_VALIDATE_PARAMETERS],
8169
                                   osparams)
8170
  for node, nres in result.items():
8171
    # we don't check for offline cases since this should be run only
8172
    # against the master node and/or an instance's nodes
8173
    nres.Raise("OS Parameters validation failed on node %s" % node)
8174
    if not nres.payload:
8175
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8176
                 osname, node)
8177

    
8178

    
8179
class LUInstanceCreate(LogicalUnit):
8180
  """Create an instance.
8181

8182
  """
8183
  HPATH = "instance-add"
8184
  HTYPE = constants.HTYPE_INSTANCE
8185
  REQ_BGL = False
8186

    
8187
  def CheckArguments(self):
8188
    """Check arguments.
8189

8190
    """
8191
    # do not require name_check to ease forward/backward compatibility
8192
    # for tools
8193
    if self.op.no_install and self.op.start:
8194
      self.LogInfo("No-installation mode selected, disabling startup")
8195
      self.op.start = False
8196
    # validate/normalize the instance name
8197
    self.op.instance_name = \
8198
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8199

    
8200
    if self.op.ip_check and not self.op.name_check:
8201
      # TODO: make the ip check more flexible and not depend on the name check
8202
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8203
                                 " check", errors.ECODE_INVAL)
8204

    
8205
    # check nics' parameter names
8206
    for nic in self.op.nics:
8207
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8208

    
8209
    # check disks. parameter names and consistent adopt/no-adopt strategy
8210
    has_adopt = has_no_adopt = False
8211
    for disk in self.op.disks:
8212
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8213
      if constants.IDISK_ADOPT in disk:
8214
        has_adopt = True
8215
      else:
8216
        has_no_adopt = True
8217
    if has_adopt and has_no_adopt:
8218
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8219
                                 errors.ECODE_INVAL)
8220
    if has_adopt:
8221
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8222
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8223
                                   " '%s' disk template" %
8224
                                   self.op.disk_template,
8225
                                   errors.ECODE_INVAL)
8226
      if self.op.iallocator is not None:
8227
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8228
                                   " iallocator script", errors.ECODE_INVAL)
8229
      if self.op.mode == constants.INSTANCE_IMPORT:
8230
        raise errors.OpPrereqError("Disk adoption not allowed for"
8231
                                   " instance import", errors.ECODE_INVAL)
8232
    else:
8233
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8234
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8235
                                   " but no 'adopt' parameter given" %
8236
                                   self.op.disk_template,
8237
                                   errors.ECODE_INVAL)
8238

    
8239
    self.adopt_disks = has_adopt
8240

    
8241
    # instance name verification
8242
    if self.op.name_check:
8243
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8244
      self.op.instance_name = self.hostname1.name
8245
      # used in CheckPrereq for ip ping check
8246
      self.check_ip = self.hostname1.ip
8247
    else:
8248
      self.check_ip = None
8249

    
8250
    # file storage checks
8251
    if (self.op.file_driver and
8252
        not self.op.file_driver in constants.FILE_DRIVER):
8253
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8254
                                 self.op.file_driver, errors.ECODE_INVAL)
8255

    
8256
    if self.op.disk_template == constants.DT_FILE:
8257
      opcodes.RequireFileStorage()
8258
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8259
      opcodes.RequireSharedFileStorage()
8260

    
8261
    ### Node/iallocator related checks
8262
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8263

    
8264
    if self.op.pnode is not None:
8265
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8266
        if self.op.snode is None:
8267
          raise errors.OpPrereqError("The networked disk templates need"
8268
                                     " a mirror node", errors.ECODE_INVAL)
8269
      elif self.op.snode:
8270
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8271
                        " template")
8272
        self.op.snode = None
8273

    
8274
    self._cds = _GetClusterDomainSecret()
8275

    
8276
    if self.op.mode == constants.INSTANCE_IMPORT:
8277
      # On import force_variant must be True, because if we forced it at
8278
      # initial install, our only chance when importing it back is that it
8279
      # works again!
8280
      self.op.force_variant = True
8281

    
8282
      if self.op.no_install:
8283
        self.LogInfo("No-installation mode has no effect during import")
8284

    
8285
    elif self.op.mode == constants.INSTANCE_CREATE:
8286
      if self.op.os_type is None:
8287
        raise errors.OpPrereqError("No guest OS specified",
8288
                                   errors.ECODE_INVAL)
8289
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8290
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8291
                                   " installation" % self.op.os_type,
8292
                                   errors.ECODE_STATE)
8293
      if self.op.disk_template is None:
8294
        raise errors.OpPrereqError("No disk template specified",
8295
                                   errors.ECODE_INVAL)
8296

    
8297
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8298
      # Check handshake to ensure both clusters have the same domain secret
8299
      src_handshake = self.op.source_handshake
8300
      if not src_handshake:
8301
        raise errors.OpPrereqError("Missing source handshake",
8302
                                   errors.ECODE_INVAL)
8303

    
8304
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8305
                                                           src_handshake)
8306
      if errmsg:
8307
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8308
                                   errors.ECODE_INVAL)
8309

    
8310
      # Load and check source CA
8311
      self.source_x509_ca_pem = self.op.source_x509_ca
8312
      if not self.source_x509_ca_pem:
8313
        raise errors.OpPrereqError("Missing source X509 CA",
8314
                                   errors.ECODE_INVAL)
8315

    
8316
      try:
8317
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8318
                                                    self._cds)
8319
      except OpenSSL.crypto.Error, err:
8320
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8321
                                   (err, ), errors.ECODE_INVAL)
8322

    
8323
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8324
      if errcode is not None:
8325
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8326
                                   errors.ECODE_INVAL)
8327

    
8328
      self.source_x509_ca = cert
8329

    
8330
      src_instance_name = self.op.source_instance_name
8331
      if not src_instance_name:
8332
        raise errors.OpPrereqError("Missing source instance name",
8333
                                   errors.ECODE_INVAL)
8334

    
8335
      self.source_instance_name = \
8336
          netutils.GetHostname(name=src_instance_name).name
8337

    
8338
    else:
8339
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8340
                                 self.op.mode, errors.ECODE_INVAL)
8341

    
8342
  def ExpandNames(self):
8343
    """ExpandNames for CreateInstance.
8344

8345
    Figure out the right locks for instance creation.
8346

8347
    """
8348
    self.needed_locks = {}
8349

    
8350
    instance_name = self.op.instance_name
8351
    # this is just a preventive check, but someone might still add this
8352
    # instance in the meantime, and creation will fail at lock-add time
8353
    if instance_name in self.cfg.GetInstanceList():
8354
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8355
                                 instance_name, errors.ECODE_EXISTS)
8356

    
8357
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8358

    
8359
    if self.op.iallocator:
8360
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8361
    else:
8362
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8363
      nodelist = [self.op.pnode]
8364
      if self.op.snode is not None:
8365
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8366
        nodelist.append(self.op.snode)
8367
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8368

    
8369
    # in case of import lock the source node too
8370
    if self.op.mode == constants.INSTANCE_IMPORT:
8371
      src_node = self.op.src_node
8372
      src_path = self.op.src_path
8373

    
8374
      if src_path is None:
8375
        self.op.src_path = src_path = self.op.instance_name
8376

    
8377
      if src_node is None:
8378
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8379
        self.op.src_node = None
8380
        if os.path.isabs(src_path):
8381
          raise errors.OpPrereqError("Importing an instance from a path"
8382
                                     " requires a source node option",
8383
                                     errors.ECODE_INVAL)
8384
      else:
8385
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8386
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8387
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8388
        if not os.path.isabs(src_path):
8389
          self.op.src_path = src_path = \
8390
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8391

    
8392
  def _RunAllocator(self):
8393
    """Run the allocator based on input opcode.
8394

8395
    """
8396
    nics = [n.ToDict() for n in self.nics]
8397
    ial = IAllocator(self.cfg, self.rpc,
8398
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8399
                     name=self.op.instance_name,
8400
                     disk_template=self.op.disk_template,
8401
                     tags=self.op.tags,
8402
                     os=self.op.os_type,
8403
                     vcpus=self.be_full[constants.BE_VCPUS],
8404
                     memory=self.be_full[constants.BE_MEMORY],
8405
                     disks=self.disks,
8406
                     nics=nics,
8407
                     hypervisor=self.op.hypervisor,
8408
                     )
8409

    
8410
    ial.Run(self.op.iallocator)
8411

    
8412
    if not ial.success:
8413
      raise errors.OpPrereqError("Can't compute nodes using"
8414
                                 " iallocator '%s': %s" %
8415
                                 (self.op.iallocator, ial.info),
8416
                                 errors.ECODE_NORES)
8417
    if len(ial.result) != ial.required_nodes:
8418
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8419
                                 " of nodes (%s), required %s" %
8420
                                 (self.op.iallocator, len(ial.result),
8421
                                  ial.required_nodes), errors.ECODE_FAULT)
8422
    self.op.pnode = ial.result[0]
8423
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8424
                 self.op.instance_name, self.op.iallocator,
8425
                 utils.CommaJoin(ial.result))
8426
    if ial.required_nodes == 2:
8427
      self.op.snode = ial.result[1]
8428

    
8429
  def BuildHooksEnv(self):
8430
    """Build hooks env.
8431

8432
    This runs on master, primary and secondary nodes of the instance.
8433

8434
    """
8435
    env = {
8436
      "ADD_MODE": self.op.mode,
8437
      }
8438
    if self.op.mode == constants.INSTANCE_IMPORT:
8439
      env["SRC_NODE"] = self.op.src_node
8440
      env["SRC_PATH"] = self.op.src_path
8441
      env["SRC_IMAGES"] = self.src_images
8442

    
8443
    env.update(_BuildInstanceHookEnv(
8444
      name=self.op.instance_name,
8445
      primary_node=self.op.pnode,
8446
      secondary_nodes=self.secondaries,
8447
      status=self.op.start,
8448
      os_type=self.op.os_type,
8449
      memory=self.be_full[constants.BE_MEMORY],
8450
      vcpus=self.be_full[constants.BE_VCPUS],
8451
      nics=_NICListToTuple(self, self.nics),
8452
      disk_template=self.op.disk_template,
8453
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8454
             for d in self.disks],
8455
      bep=self.be_full,
8456
      hvp=self.hv_full,
8457
      hypervisor_name=self.op.hypervisor,
8458
      tags=self.op.tags,
8459
    ))
8460

    
8461
    return env
8462

    
8463
  def BuildHooksNodes(self):
8464
    """Build hooks nodes.
8465

8466
    """
8467
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8468
    return nl, nl
8469

    
8470
  def _ReadExportInfo(self):
8471
    """Reads the export information from disk.
8472

8473
    It will override the opcode source node and path with the actual
8474
    information, if these two were not specified before.
8475

8476
    @return: the export information
8477

8478
    """
8479
    assert self.op.mode == constants.INSTANCE_IMPORT
8480

    
8481
    src_node = self.op.src_node
8482
    src_path = self.op.src_path
8483

    
8484
    if src_node is None:
8485
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8486
      exp_list = self.rpc.call_export_list(locked_nodes)
8487
      found = False
8488
      for node in exp_list:
8489
        if exp_list[node].fail_msg:
8490
          continue
8491
        if src_path in exp_list[node].payload:
8492
          found = True
8493
          self.op.src_node = src_node = node
8494
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8495
                                                       src_path)
8496
          break
8497
      if not found:
8498
        raise errors.OpPrereqError("No export found for relative path %s" %
8499
                                    src_path, errors.ECODE_INVAL)
8500

    
8501
    _CheckNodeOnline(self, src_node)
8502
    result = self.rpc.call_export_info(src_node, src_path)
8503
    result.Raise("No export or invalid export found in dir %s" % src_path)
8504

    
8505
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8506
    if not export_info.has_section(constants.INISECT_EXP):
8507
      raise errors.ProgrammerError("Corrupted export config",
8508
                                   errors.ECODE_ENVIRON)
8509

    
8510
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8511
    if (int(ei_version) != constants.EXPORT_VERSION):
8512
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8513
                                 (ei_version, constants.EXPORT_VERSION),
8514
                                 errors.ECODE_ENVIRON)
8515
    return export_info
8516

    
8517
  def _ReadExportParams(self, einfo):
8518
    """Use export parameters as defaults.
8519

8520
    In case the opcode doesn't specify (as in override) some instance
8521
    parameters, then try to use them from the export information, if
8522
    that declares them.
8523

8524
    """
8525
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8526

    
8527
    if self.op.disk_template is None:
8528
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8529
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8530
                                          "disk_template")
8531
      else:
8532
        raise errors.OpPrereqError("No disk template specified and the export"
8533
                                   " is missing the disk_template information",
8534
                                   errors.ECODE_INVAL)
8535

    
8536
    if not self.op.disks:
8537
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8538
        disks = []
8539
        # TODO: import the disk iv_name too
8540
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8541
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8542
          disks.append({constants.IDISK_SIZE: disk_sz})
8543
        self.op.disks = disks
8544
      else:
8545
        raise errors.OpPrereqError("No disk info specified and the export"
8546
                                   " is missing the disk information",
8547
                                   errors.ECODE_INVAL)
8548

    
8549
    if (not self.op.nics and
8550
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8551
      nics = []
8552
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8553
        ndict = {}
8554
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8555
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8556
          ndict[name] = v
8557
        nics.append(ndict)
8558
      self.op.nics = nics
8559

    
8560
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8561
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8562

    
8563
    if (self.op.hypervisor is None and
8564
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8565
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8566

    
8567
    if einfo.has_section(constants.INISECT_HYP):
8568
      # use the export parameters but do not override the ones
8569
      # specified by the user
8570
      for name, value in einfo.items(constants.INISECT_HYP):
8571
        if name not in self.op.hvparams:
8572
          self.op.hvparams[name] = value
8573

    
8574
    if einfo.has_section(constants.INISECT_BEP):
8575
      # use the parameters, without overriding
8576
      for name, value in einfo.items(constants.INISECT_BEP):
8577
        if name not in self.op.beparams:
8578
          self.op.beparams[name] = value
8579
    else:
8580
      # try to read the parameters old style, from the main section
8581
      for name in constants.BES_PARAMETERS:
8582
        if (name not in self.op.beparams and
8583
            einfo.has_option(constants.INISECT_INS, name)):
8584
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8585

    
8586
    if einfo.has_section(constants.INISECT_OSP):
8587
      # use the parameters, without overriding
8588
      for name, value in einfo.items(constants.INISECT_OSP):
8589
        if name not in self.op.osparams:
8590
          self.op.osparams[name] = value
8591

    
8592
  def _RevertToDefaults(self, cluster):
8593
    """Revert the instance parameters to the default values.
8594

8595
    """
8596
    # hvparams
8597
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8598
    for name in self.op.hvparams.keys():
8599
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8600
        del self.op.hvparams[name]
8601
    # beparams
8602
    be_defs = cluster.SimpleFillBE({})
8603
    for name in self.op.beparams.keys():
8604
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8605
        del self.op.beparams[name]
8606
    # nic params
8607
    nic_defs = cluster.SimpleFillNIC({})
8608
    for nic in self.op.nics:
8609
      for name in constants.NICS_PARAMETERS:
8610
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8611
          del nic[name]
8612
    # osparams
8613
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8614
    for name in self.op.osparams.keys():
8615
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8616
        del self.op.osparams[name]
8617

    
8618
  def _CalculateFileStorageDir(self):
8619
    """Calculate final instance file storage dir.
8620

8621
    """
8622
    # file storage dir calculation/check
8623
    self.instance_file_storage_dir = None
8624
    if self.op.disk_template in constants.DTS_FILEBASED:
8625
      # build the full file storage dir path
8626
      joinargs = []
8627

    
8628
      if self.op.disk_template == constants.DT_SHARED_FILE:
8629
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8630
      else:
8631
        get_fsd_fn = self.cfg.GetFileStorageDir
8632

    
8633
      cfg_storagedir = get_fsd_fn()
8634
      if not cfg_storagedir:
8635
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8636
      joinargs.append(cfg_storagedir)
8637

    
8638
      if self.op.file_storage_dir is not None:
8639
        joinargs.append(self.op.file_storage_dir)
8640

    
8641
      joinargs.append(self.op.instance_name)
8642

    
8643
      # pylint: disable=W0142
8644
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8645

    
8646
  def CheckPrereq(self):
8647
    """Check prerequisites.
8648

8649
    """
8650
    self._CalculateFileStorageDir()
8651

    
8652
    if self.op.mode == constants.INSTANCE_IMPORT:
8653
      export_info = self._ReadExportInfo()
8654
      self._ReadExportParams(export_info)
8655

    
8656
    if (not self.cfg.GetVGName() and
8657
        self.op.disk_template not in constants.DTS_NOT_LVM):
8658
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8659
                                 " instances", errors.ECODE_STATE)
8660

    
8661
    if self.op.hypervisor is None:
8662
      self.op.hypervisor = self.cfg.GetHypervisorType()
8663

    
8664
    cluster = self.cfg.GetClusterInfo()
8665
    enabled_hvs = cluster.enabled_hypervisors
8666
    if self.op.hypervisor not in enabled_hvs:
8667
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8668
                                 " cluster (%s)" % (self.op.hypervisor,
8669
                                  ",".join(enabled_hvs)),
8670
                                 errors.ECODE_STATE)
8671

    
8672
    # Check tag validity
8673
    for tag in self.op.tags:
8674
      objects.TaggableObject.ValidateTag(tag)
8675

    
8676
    # check hypervisor parameter syntax (locally)
8677
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8678
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8679
                                      self.op.hvparams)
8680
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8681
    hv_type.CheckParameterSyntax(filled_hvp)
8682
    self.hv_full = filled_hvp
8683
    # check that we don't specify global parameters on an instance
8684
    _CheckGlobalHvParams(self.op.hvparams)
8685

    
8686
    # fill and remember the beparams dict
8687
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8688
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8689

    
8690
    # build os parameters
8691
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8692

    
8693
    # now that hvp/bep are in final format, let's reset to defaults,
8694
    # if told to do so
8695
    if self.op.identify_defaults:
8696
      self._RevertToDefaults(cluster)
8697

    
8698
    # NIC buildup
8699
    self.nics = []
8700
    for idx, nic in enumerate(self.op.nics):
8701
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8702
      nic_mode = nic_mode_req
8703
      if nic_mode is None:
8704
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8705

    
8706
      # in routed mode, for the first nic, the default ip is 'auto'
8707
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8708
        default_ip_mode = constants.VALUE_AUTO
8709
      else:
8710
        default_ip_mode = constants.VALUE_NONE
8711

    
8712
      # ip validity checks
8713
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8714
      if ip is None or ip.lower() == constants.VALUE_NONE:
8715
        nic_ip = None
8716
      elif ip.lower() == constants.VALUE_AUTO:
8717
        if not self.op.name_check:
8718
          raise errors.OpPrereqError("IP address set to auto but name checks"
8719
                                     " have been skipped",
8720
                                     errors.ECODE_INVAL)
8721
        nic_ip = self.hostname1.ip
8722
      else:
8723
        if not netutils.IPAddress.IsValid(ip):
8724
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8725
                                     errors.ECODE_INVAL)
8726
        nic_ip = ip
8727

    
8728
      # TODO: check the ip address for uniqueness
8729
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8730
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8731
                                   errors.ECODE_INVAL)
8732

    
8733
      # MAC address verification
8734
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8735
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8736
        mac = utils.NormalizeAndValidateMac(mac)
8737

    
8738
        try:
8739
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8740
        except errors.ReservationError:
8741
          raise errors.OpPrereqError("MAC address %s already in use"
8742
                                     " in cluster" % mac,
8743
                                     errors.ECODE_NOTUNIQUE)
8744

    
8745
      #  Build nic parameters
8746
      link = nic.get(constants.INIC_LINK, None)
8747
      nicparams = {}
8748
      if nic_mode_req:
8749
        nicparams[constants.NIC_MODE] = nic_mode_req
8750
      if link:
8751
        nicparams[constants.NIC_LINK] = link
8752

    
8753
      check_params = cluster.SimpleFillNIC(nicparams)
8754
      objects.NIC.CheckParameterSyntax(check_params)
8755
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8756

    
8757
    # disk checks/pre-build
8758
    default_vg = self.cfg.GetVGName()
8759
    self.disks = []
8760
    for disk in self.op.disks:
8761
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8762
      if mode not in constants.DISK_ACCESS_SET:
8763
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8764
                                   mode, errors.ECODE_INVAL)
8765
      size = disk.get(constants.IDISK_SIZE, None)
8766
      if size is None:
8767
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8768
      try:
8769
        size = int(size)
8770
      except (TypeError, ValueError):
8771
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8772
                                   errors.ECODE_INVAL)
8773

    
8774
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8775
      new_disk = {
8776
        constants.IDISK_SIZE: size,
8777
        constants.IDISK_MODE: mode,
8778
        constants.IDISK_VG: data_vg,
8779
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8780
        }
8781
      if constants.IDISK_ADOPT in disk:
8782
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8783
      self.disks.append(new_disk)
8784

    
8785
    if self.op.mode == constants.INSTANCE_IMPORT:
8786

    
8787
      # Check that the new instance doesn't have less disks than the export
8788
      instance_disks = len(self.disks)
8789
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8790
      if instance_disks < export_disks:
8791
        raise errors.OpPrereqError("Not enough disks to import."
8792
                                   " (instance: %d, export: %d)" %
8793
                                   (instance_disks, export_disks),
8794
                                   errors.ECODE_INVAL)
8795

    
8796
      disk_images = []
8797
      for idx in range(export_disks):
8798
        option = "disk%d_dump" % idx
8799
        if export_info.has_option(constants.INISECT_INS, option):
8800
          # FIXME: are the old os-es, disk sizes, etc. useful?
8801
          export_name = export_info.get(constants.INISECT_INS, option)
8802
          image = utils.PathJoin(self.op.src_path, export_name)
8803
          disk_images.append(image)
8804
        else:
8805
          disk_images.append(False)
8806

    
8807
      self.src_images = disk_images
8808

    
8809
      old_name = export_info.get(constants.INISECT_INS, "name")
8810
      try:
8811
        exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8812
      except (TypeError, ValueError), err:
8813
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8814
                                   " an integer: %s" % str(err),
8815
                                   errors.ECODE_STATE)
8816
      if self.op.instance_name == old_name:
8817
        for idx, nic in enumerate(self.nics):
8818
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8819
            nic_mac_ini = "nic%d_mac" % idx
8820
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8821

    
8822
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8823

    
8824
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8825
    if self.op.ip_check:
8826
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8827
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8828
                                   (self.check_ip, self.op.instance_name),
8829
                                   errors.ECODE_NOTUNIQUE)
8830

    
8831
    #### mac address generation
8832
    # By generating here the mac address both the allocator and the hooks get
8833
    # the real final mac address rather than the 'auto' or 'generate' value.
8834
    # There is a race condition between the generation and the instance object
8835
    # creation, which means that we know the mac is valid now, but we're not
8836
    # sure it will be when we actually add the instance. If things go bad
8837
    # adding the instance will abort because of a duplicate mac, and the
8838
    # creation job will fail.
8839
    for nic in self.nics:
8840
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8841
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8842

    
8843
    #### allocator run
8844

    
8845
    if self.op.iallocator is not None:
8846
      self._RunAllocator()
8847

    
8848
    # Release all unneeded node locks
8849
    _ReleaseLocks(self, locking.LEVEL_NODE,
8850
                  keep=filter(None, [self.op.pnode, self.op.snode,
8851
                                     self.op.src_node]))
8852

    
8853
    #### node related checks
8854

    
8855
    # check primary node
8856
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8857
    assert self.pnode is not None, \
8858
      "Cannot retrieve locked node %s" % self.op.pnode
8859
    if pnode.offline:
8860
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8861
                                 pnode.name, errors.ECODE_STATE)
8862
    if pnode.drained:
8863
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8864
                                 pnode.name, errors.ECODE_STATE)
8865
    if not pnode.vm_capable:
8866
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8867
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8868

    
8869
    self.secondaries = []
8870

    
8871
    # mirror node verification
8872
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8873
      if self.op.snode == pnode.name:
8874
        raise errors.OpPrereqError("The secondary node cannot be the"
8875
                                   " primary node", errors.ECODE_INVAL)
8876
      _CheckNodeOnline(self, self.op.snode)
8877
      _CheckNodeNotDrained(self, self.op.snode)
8878
      _CheckNodeVmCapable(self, self.op.snode)
8879
      self.secondaries.append(self.op.snode)
8880

    
8881
    nodenames = [pnode.name] + self.secondaries
8882

    
8883
    if not self.adopt_disks:
8884
      # Check lv size requirements, if not adopting
8885
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8886
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8887

    
8888
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8889
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8890
                                disk[constants.IDISK_ADOPT])
8891
                     for disk in self.disks])
8892
      if len(all_lvs) != len(self.disks):
8893
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8894
                                   errors.ECODE_INVAL)
8895
      for lv_name in all_lvs:
8896
        try:
8897
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8898
          # to ReserveLV uses the same syntax
8899
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8900
        except errors.ReservationError:
8901
          raise errors.OpPrereqError("LV named %s used by another instance" %
8902
                                     lv_name, errors.ECODE_NOTUNIQUE)
8903

    
8904
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8905
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8906

    
8907
      node_lvs = self.rpc.call_lv_list([pnode.name],
8908
                                       vg_names.payload.keys())[pnode.name]
8909
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8910
      node_lvs = node_lvs.payload
8911

    
8912
      delta = all_lvs.difference(node_lvs.keys())
8913
      if delta:
8914
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8915
                                   utils.CommaJoin(delta),
8916
                                   errors.ECODE_INVAL)
8917
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8918
      if online_lvs:
8919
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8920
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8921
                                   errors.ECODE_STATE)
8922
      # update the size of disk based on what is found
8923
      for dsk in self.disks:
8924
        dsk[constants.IDISK_SIZE] = \
8925
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8926
                                        dsk[constants.IDISK_ADOPT])][0]))
8927

    
8928
    elif self.op.disk_template == constants.DT_BLOCK:
8929
      # Normalize and de-duplicate device paths
8930
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8931
                       for disk in self.disks])
8932
      if len(all_disks) != len(self.disks):
8933
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8934
                                   errors.ECODE_INVAL)
8935
      baddisks = [d for d in all_disks
8936
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8937
      if baddisks:
8938
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8939
                                   " cannot be adopted" %
8940
                                   (", ".join(baddisks),
8941
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8942
                                   errors.ECODE_INVAL)
8943

    
8944
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8945
                                            list(all_disks))[pnode.name]
8946
      node_disks.Raise("Cannot get block device information from node %s" %
8947
                       pnode.name)
8948
      node_disks = node_disks.payload
8949
      delta = all_disks.difference(node_disks.keys())
8950
      if delta:
8951
        raise errors.OpPrereqError("Missing block device(s): %s" %
8952
                                   utils.CommaJoin(delta),
8953
                                   errors.ECODE_INVAL)
8954
      for dsk in self.disks:
8955
        dsk[constants.IDISK_SIZE] = \
8956
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8957

    
8958
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8959

    
8960
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8961
    # check OS parameters (remotely)
8962
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8963

    
8964
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8965

    
8966
    # memory check on primary node
8967
    if self.op.start:
8968
      _CheckNodeFreeMemory(self, self.pnode.name,
8969
                           "creating instance %s" % self.op.instance_name,
8970
                           self.be_full[constants.BE_MEMORY],
8971
                           self.op.hypervisor)
8972

    
8973
    self.dry_run_result = list(nodenames)
8974

    
8975
  def Exec(self, feedback_fn):
8976
    """Create and add the instance to the cluster.
8977

8978
    """
8979
    instance = self.op.instance_name
8980
    pnode_name = self.pnode.name
8981

    
8982
    ht_kind = self.op.hypervisor
8983
    if ht_kind in constants.HTS_REQ_PORT:
8984
      network_port = self.cfg.AllocatePort()
8985
    else:
8986
      network_port = None
8987

    
8988
    disks = _GenerateDiskTemplate(self,
8989
                                  self.op.disk_template,
8990
                                  instance, pnode_name,
8991
                                  self.secondaries,
8992
                                  self.disks,
8993
                                  self.instance_file_storage_dir,
8994
                                  self.op.file_driver,
8995
                                  0,
8996
                                  feedback_fn)
8997

    
8998
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8999
                            primary_node=pnode_name,
9000
                            nics=self.nics, disks=disks,
9001
                            disk_template=self.op.disk_template,
9002
                            admin_up=False,
9003
                            network_port=network_port,
9004
                            beparams=self.op.beparams,
9005
                            hvparams=self.op.hvparams,
9006
                            hypervisor=self.op.hypervisor,
9007
                            osparams=self.op.osparams,
9008
                            )
9009

    
9010
    if self.op.tags:
9011
      for tag in self.op.tags:
9012
        iobj.AddTag(tag)
9013

    
9014
    if self.adopt_disks:
9015
      if self.op.disk_template == constants.DT_PLAIN:
9016
        # rename LVs to the newly-generated names; we need to construct
9017
        # 'fake' LV disks with the old data, plus the new unique_id
9018
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9019
        rename_to = []
9020
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9021
          rename_to.append(t_dsk.logical_id)
9022
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9023
          self.cfg.SetDiskID(t_dsk, pnode_name)
9024
        result = self.rpc.call_blockdev_rename(pnode_name,
9025
                                               zip(tmp_disks, rename_to))
9026
        result.Raise("Failed to rename adoped LVs")
9027
    else:
9028
      feedback_fn("* creating instance disks...")
9029
      try:
9030
        _CreateDisks(self, iobj)
9031
      except errors.OpExecError:
9032
        self.LogWarning("Device creation failed, reverting...")
9033
        try:
9034
          _RemoveDisks(self, iobj)
9035
        finally:
9036
          self.cfg.ReleaseDRBDMinors(instance)
9037
          raise
9038

    
9039
    feedback_fn("adding instance %s to cluster config" % instance)
9040

    
9041
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9042

    
9043
    # Declare that we don't want to remove the instance lock anymore, as we've
9044
    # added the instance to the config
9045
    del self.remove_locks[locking.LEVEL_INSTANCE]
9046

    
9047
    if self.op.mode == constants.INSTANCE_IMPORT:
9048
      # Release unused nodes
9049
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9050
    else:
9051
      # Release all nodes
9052
      _ReleaseLocks(self, locking.LEVEL_NODE)
9053

    
9054
    disk_abort = False
9055
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9056
      feedback_fn("* wiping instance disks...")
9057
      try:
9058
        _WipeDisks(self, iobj)
9059
      except errors.OpExecError, err:
9060
        logging.exception("Wiping disks failed")
9061
        self.LogWarning("Wiping instance disks failed (%s)", err)
9062
        disk_abort = True
9063

    
9064
    if disk_abort:
9065
      # Something is already wrong with the disks, don't do anything else
9066
      pass
9067
    elif self.op.wait_for_sync:
9068
      disk_abort = not _WaitForSync(self, iobj)
9069
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9070
      # make sure the disks are not degraded (still sync-ing is ok)
9071
      feedback_fn("* checking mirrors status")
9072
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9073
    else:
9074
      disk_abort = False
9075

    
9076
    if disk_abort:
9077
      _RemoveDisks(self, iobj)
9078
      self.cfg.RemoveInstance(iobj.name)
9079
      # Make sure the instance lock gets removed
9080
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9081
      raise errors.OpExecError("There are some degraded disks for"
9082
                               " this instance")
9083

    
9084
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9085
      if self.op.mode == constants.INSTANCE_CREATE:
9086
        if not self.op.no_install:
9087
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9088
                        not self.op.wait_for_sync)
9089
          if pause_sync:
9090
            feedback_fn("* pausing disk sync to install instance OS")
9091
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9092
                                                              iobj.disks, True)
9093
            for idx, success in enumerate(result.payload):
9094
              if not success:
9095
                logging.warn("pause-sync of instance %s for disk %d failed",
9096
                             instance, idx)
9097

    
9098
          feedback_fn("* running the instance OS create scripts...")
9099
          # FIXME: pass debug option from opcode to backend
9100
          os_add_result = \
9101
            self.rpc.call_instance_os_add(pnode_name, iobj, False,
9102
                                          self.op.debug_level)
9103
          if pause_sync:
9104
            feedback_fn("* resuming disk sync")
9105
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9106
                                                              iobj.disks, False)
9107
            for idx, success in enumerate(result.payload):
9108
              if not success:
9109
                logging.warn("resume-sync of instance %s for disk %d failed",
9110
                             instance, idx)
9111

    
9112
          os_add_result.Raise("Could not add os for instance %s"
9113
                              " on node %s" % (instance, pnode_name))
9114

    
9115
      elif self.op.mode == constants.INSTANCE_IMPORT:
9116
        feedback_fn("* running the instance OS import scripts...")
9117

    
9118
        transfers = []
9119

    
9120
        for idx, image in enumerate(self.src_images):
9121
          if not image:
9122
            continue
9123

    
9124
          # FIXME: pass debug option from opcode to backend
9125
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9126
                                             constants.IEIO_FILE, (image, ),
9127
                                             constants.IEIO_SCRIPT,
9128
                                             (iobj.disks[idx], idx),
9129
                                             None)
9130
          transfers.append(dt)
9131

    
9132
        import_result = \
9133
          masterd.instance.TransferInstanceData(self, feedback_fn,
9134
                                                self.op.src_node, pnode_name,
9135
                                                self.pnode.secondary_ip,
9136
                                                iobj, transfers)
9137
        if not compat.all(import_result):
9138
          self.LogWarning("Some disks for instance %s on node %s were not"
9139
                          " imported successfully" % (instance, pnode_name))
9140

    
9141
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9142
        feedback_fn("* preparing remote import...")
9143
        # The source cluster will stop the instance before attempting to make a
9144
        # connection. In some cases stopping an instance can take a long time,
9145
        # hence the shutdown timeout is added to the connection timeout.
9146
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9147
                           self.op.source_shutdown_timeout)
9148
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9149

    
9150
        assert iobj.primary_node == self.pnode.name
9151
        disk_results = \
9152
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9153
                                        self.source_x509_ca,
9154
                                        self._cds, timeouts)
9155
        if not compat.all(disk_results):
9156
          # TODO: Should the instance still be started, even if some disks
9157
          # failed to import (valid for local imports, too)?
9158
          self.LogWarning("Some disks for instance %s on node %s were not"
9159
                          " imported successfully" % (instance, pnode_name))
9160

    
9161
        # Run rename script on newly imported instance
9162
        assert iobj.name == instance
9163
        feedback_fn("Running rename script for %s" % instance)
9164
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9165
                                                   self.source_instance_name,
9166
                                                   self.op.debug_level)
9167
        if result.fail_msg:
9168
          self.LogWarning("Failed to run rename script for %s on node"
9169
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9170

    
9171
      else:
9172
        # also checked in the prereq part
9173
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9174
                                     % self.op.mode)
9175

    
9176
    if self.op.start:
9177
      iobj.admin_up = True
9178
      self.cfg.Update(iobj, feedback_fn)
9179
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9180
      feedback_fn("* starting instance...")
9181
      result = self.rpc.call_instance_start(pnode_name, iobj,
9182
                                            None, None, False)
9183
      result.Raise("Could not start instance")
9184

    
9185
    return list(iobj.all_nodes)
9186

    
9187

    
9188
class LUInstanceConsole(NoHooksLU):
9189
  """Connect to an instance's console.
9190

9191
  This is somewhat special in that it returns the command line that
9192
  you need to run on the master node in order to connect to the
9193
  console.
9194

9195
  """
9196
  REQ_BGL = False
9197

    
9198
  def ExpandNames(self):
9199
    self._ExpandAndLockInstance()
9200

    
9201
  def CheckPrereq(self):
9202
    """Check prerequisites.
9203

9204
    This checks that the instance is in the cluster.
9205

9206
    """
9207
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9208
    assert self.instance is not None, \
9209
      "Cannot retrieve locked instance %s" % self.op.instance_name
9210
    _CheckNodeOnline(self, self.instance.primary_node)
9211

    
9212
  def Exec(self, feedback_fn):
9213
    """Connect to the console of an instance
9214

9215
    """
9216
    instance = self.instance
9217
    node = instance.primary_node
9218

    
9219
    node_insts = self.rpc.call_instance_list([node],
9220
                                             [instance.hypervisor])[node]
9221
    node_insts.Raise("Can't get node information from %s" % node)
9222

    
9223
    if instance.name not in node_insts.payload:
9224
      if instance.admin_up:
9225
        state = constants.INSTST_ERRORDOWN
9226
      else:
9227
        state = constants.INSTST_ADMINDOWN
9228
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9229
                               (instance.name, state))
9230

    
9231
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9232

    
9233
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9234

    
9235

    
9236
def _GetInstanceConsole(cluster, instance):
9237
  """Returns console information for an instance.
9238

9239
  @type cluster: L{objects.Cluster}
9240
  @type instance: L{objects.Instance}
9241
  @rtype: dict
9242

9243
  """
9244
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9245
  # beparams and hvparams are passed separately, to avoid editing the
9246
  # instance and then saving the defaults in the instance itself.
9247
  hvparams = cluster.FillHV(instance)
9248
  beparams = cluster.FillBE(instance)
9249
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9250

    
9251
  assert console.instance == instance.name
9252
  assert console.Validate()
9253

    
9254
  return console.ToDict()
9255

    
9256

    
9257
class LUInstanceReplaceDisks(LogicalUnit):
9258
  """Replace the disks of an instance.
9259

9260
  """
9261
  HPATH = "mirrors-replace"
9262
  HTYPE = constants.HTYPE_INSTANCE
9263
  REQ_BGL = False
9264

    
9265
  def CheckArguments(self):
9266
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9267
                                  self.op.iallocator)
9268

    
9269
  def ExpandNames(self):
9270
    self._ExpandAndLockInstance()
9271

    
9272
    assert locking.LEVEL_NODE not in self.needed_locks
9273
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9274

    
9275
    assert self.op.iallocator is None or self.op.remote_node is None, \
9276
      "Conflicting options"
9277

    
9278
    if self.op.remote_node is not None:
9279
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9280

    
9281
      # Warning: do not remove the locking of the new secondary here
9282
      # unless DRBD8.AddChildren is changed to work in parallel;
9283
      # currently it doesn't since parallel invocations of
9284
      # FindUnusedMinor will conflict
9285
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9286
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9287
    else:
9288
      self.needed_locks[locking.LEVEL_NODE] = []
9289
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9290

    
9291
      if self.op.iallocator is not None:
9292
        # iallocator will select a new node in the same group
9293
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9294

    
9295
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9296
                                   self.op.iallocator, self.op.remote_node,
9297
                                   self.op.disks, False, self.op.early_release)
9298

    
9299
    self.tasklets = [self.replacer]
9300

    
9301
  def DeclareLocks(self, level):
9302
    if level == locking.LEVEL_NODEGROUP:
9303
      assert self.op.remote_node is None
9304
      assert self.op.iallocator is not None
9305
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9306

    
9307
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9308
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9309
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9310

    
9311
    elif level == locking.LEVEL_NODE:
9312
      if self.op.iallocator is not None:
9313
        assert self.op.remote_node is None
9314
        assert not self.needed_locks[locking.LEVEL_NODE]
9315

    
9316
        # Lock member nodes of all locked groups
9317
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9318
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9319
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9320
      else:
9321
        self._LockInstancesNodes()
9322

    
9323
  def BuildHooksEnv(self):
9324
    """Build hooks env.
9325

9326
    This runs on the master, the primary and all the secondaries.
9327

9328
    """
9329
    instance = self.replacer.instance
9330
    env = {
9331
      "MODE": self.op.mode,
9332
      "NEW_SECONDARY": self.op.remote_node,
9333
      "OLD_SECONDARY": instance.secondary_nodes[0],
9334
      }
9335
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9336
    return env
9337

    
9338
  def BuildHooksNodes(self):
9339
    """Build hooks nodes.
9340

9341
    """
9342
    instance = self.replacer.instance
9343
    nl = [
9344
      self.cfg.GetMasterNode(),
9345
      instance.primary_node,
9346
      ]
9347
    if self.op.remote_node is not None:
9348
      nl.append(self.op.remote_node)
9349
    return nl, nl
9350

    
9351
  def CheckPrereq(self):
9352
    """Check prerequisites.
9353

9354
    """
9355
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9356
            self.op.iallocator is None)
9357

    
9358
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9359
    if owned_groups:
9360
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9361

    
9362
    return LogicalUnit.CheckPrereq(self)
9363

    
9364

    
9365
class TLReplaceDisks(Tasklet):
9366
  """Replaces disks for an instance.
9367

9368
  Note: Locking is not within the scope of this class.
9369

9370
  """
9371
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9372
               disks, delay_iallocator, early_release):
9373
    """Initializes this class.
9374

9375
    """
9376
    Tasklet.__init__(self, lu)
9377

    
9378
    # Parameters
9379
    self.instance_name = instance_name
9380
    self.mode = mode
9381
    self.iallocator_name = iallocator_name
9382
    self.remote_node = remote_node
9383
    self.disks = disks
9384
    self.delay_iallocator = delay_iallocator
9385
    self.early_release = early_release
9386

    
9387
    # Runtime data
9388
    self.instance = None
9389
    self.new_node = None
9390
    self.target_node = None
9391
    self.other_node = None
9392
    self.remote_node_info = None
9393
    self.node_secondary_ip = None
9394

    
9395
  @staticmethod
9396
  def CheckArguments(mode, remote_node, iallocator):
9397
    """Helper function for users of this class.
9398

9399
    """
9400
    # check for valid parameter combination
9401
    if mode == constants.REPLACE_DISK_CHG:
9402
      if remote_node is None and iallocator is None:
9403
        raise errors.OpPrereqError("When changing the secondary either an"
9404
                                   " iallocator script must be used or the"
9405
                                   " new node given", errors.ECODE_INVAL)
9406

    
9407
      if remote_node is not None and iallocator is not None:
9408
        raise errors.OpPrereqError("Give either the iallocator or the new"
9409
                                   " secondary, not both", errors.ECODE_INVAL)
9410

    
9411
    elif remote_node is not None or iallocator is not None:
9412
      # Not replacing the secondary
9413
      raise errors.OpPrereqError("The iallocator and new node options can"
9414
                                 " only be used when changing the"
9415
                                 " secondary node", errors.ECODE_INVAL)
9416

    
9417
  @staticmethod
9418
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9419
    """Compute a new secondary node using an IAllocator.
9420

9421
    """
9422
    ial = IAllocator(lu.cfg, lu.rpc,
9423
                     mode=constants.IALLOCATOR_MODE_RELOC,
9424
                     name=instance_name,
9425
                     relocate_from=list(relocate_from))
9426

    
9427
    ial.Run(iallocator_name)
9428

    
9429
    if not ial.success:
9430
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9431
                                 " %s" % (iallocator_name, ial.info),
9432
                                 errors.ECODE_NORES)
9433

    
9434
    if len(ial.result) != ial.required_nodes:
9435
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9436
                                 " of nodes (%s), required %s" %
9437
                                 (iallocator_name,
9438
                                  len(ial.result), ial.required_nodes),
9439
                                 errors.ECODE_FAULT)
9440

    
9441
    remote_node_name = ial.result[0]
9442

    
9443
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9444
               instance_name, remote_node_name)
9445

    
9446
    return remote_node_name
9447

    
9448
  def _FindFaultyDisks(self, node_name):
9449
    """Wrapper for L{_FindFaultyInstanceDisks}.
9450

9451
    """
9452
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9453
                                    node_name, True)
9454

    
9455
  def _CheckDisksActivated(self, instance):
9456
    """Checks if the instance disks are activated.
9457

9458
    @param instance: The instance to check disks
9459
    @return: True if they are activated, False otherwise
9460

9461
    """
9462
    nodes = instance.all_nodes
9463

    
9464
    for idx, dev in enumerate(instance.disks):
9465
      for node in nodes:
9466
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9467
        self.cfg.SetDiskID(dev, node)
9468

    
9469
        result = self.rpc.call_blockdev_find(node, dev)
9470

    
9471
        if result.offline:
9472
          continue
9473
        elif result.fail_msg or not result.payload:
9474
          return False
9475

    
9476
    return True
9477

    
9478
  def CheckPrereq(self):
9479
    """Check prerequisites.
9480

9481
    This checks that the instance is in the cluster.
9482

9483
    """
9484
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9485
    assert instance is not None, \
9486
      "Cannot retrieve locked instance %s" % self.instance_name
9487

    
9488
    if instance.disk_template != constants.DT_DRBD8:
9489
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9490
                                 " instances", errors.ECODE_INVAL)
9491

    
9492
    if len(instance.secondary_nodes) != 1:
9493
      raise errors.OpPrereqError("The instance has a strange layout,"
9494
                                 " expected one secondary but found %d" %
9495
                                 len(instance.secondary_nodes),
9496
                                 errors.ECODE_FAULT)
9497

    
9498
    if not self.delay_iallocator:
9499
      self._CheckPrereq2()
9500

    
9501
  def _CheckPrereq2(self):
9502
    """Check prerequisites, second part.
9503

9504
    This function should always be part of CheckPrereq. It was separated and is
9505
    now called from Exec because during node evacuation iallocator was only
9506
    called with an unmodified cluster model, not taking planned changes into
9507
    account.
9508

9509
    """
9510
    instance = self.instance
9511
    secondary_node = instance.secondary_nodes[0]
9512

    
9513
    if self.iallocator_name is None:
9514
      remote_node = self.remote_node
9515
    else:
9516
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9517
                                       instance.name, instance.secondary_nodes)
9518

    
9519
    if remote_node is None:
9520
      self.remote_node_info = None
9521
    else:
9522
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9523
             "Remote node '%s' is not locked" % remote_node
9524

    
9525
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9526
      assert self.remote_node_info is not None, \
9527
        "Cannot retrieve locked node %s" % remote_node
9528

    
9529
    if remote_node == self.instance.primary_node:
9530
      raise errors.OpPrereqError("The specified node is the primary node of"
9531
                                 " the instance", errors.ECODE_INVAL)
9532

    
9533
    if remote_node == secondary_node:
9534
      raise errors.OpPrereqError("The specified node is already the"
9535
                                 " secondary node of the instance",
9536
                                 errors.ECODE_INVAL)
9537

    
9538
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9539
                                    constants.REPLACE_DISK_CHG):
9540
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9541
                                 errors.ECODE_INVAL)
9542

    
9543
    if self.mode == constants.REPLACE_DISK_AUTO:
9544
      if not self._CheckDisksActivated(instance):
9545
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9546
                                   " first" % self.instance_name,
9547
                                   errors.ECODE_STATE)
9548
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9549
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9550

    
9551
      if faulty_primary and faulty_secondary:
9552
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9553
                                   " one node and can not be repaired"
9554
                                   " automatically" % self.instance_name,
9555
                                   errors.ECODE_STATE)
9556

    
9557
      if faulty_primary:
9558
        self.disks = faulty_primary
9559
        self.target_node = instance.primary_node
9560
        self.other_node = secondary_node
9561
        check_nodes = [self.target_node, self.other_node]
9562
      elif faulty_secondary:
9563
        self.disks = faulty_secondary
9564
        self.target_node = secondary_node
9565
        self.other_node = instance.primary_node
9566
        check_nodes = [self.target_node, self.other_node]
9567
      else:
9568
        self.disks = []
9569
        check_nodes = []
9570

    
9571
    else:
9572
      # Non-automatic modes
9573
      if self.mode == constants.REPLACE_DISK_PRI:
9574
        self.target_node = instance.primary_node
9575
        self.other_node = secondary_node
9576
        check_nodes = [self.target_node, self.other_node]
9577

    
9578
      elif self.mode == constants.REPLACE_DISK_SEC:
9579
        self.target_node = secondary_node
9580
        self.other_node = instance.primary_node
9581
        check_nodes = [self.target_node, self.other_node]
9582

    
9583
      elif self.mode == constants.REPLACE_DISK_CHG:
9584
        self.new_node = remote_node
9585
        self.other_node = instance.primary_node
9586
        self.target_node = secondary_node
9587
        check_nodes = [self.new_node, self.other_node]
9588

    
9589
        _CheckNodeNotDrained(self.lu, remote_node)
9590
        _CheckNodeVmCapable(self.lu, remote_node)
9591

    
9592
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9593
        assert old_node_info is not None
9594
        if old_node_info.offline and not self.early_release:
9595
          # doesn't make sense to delay the release
9596
          self.early_release = True
9597
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9598
                          " early-release mode", secondary_node)
9599

    
9600
      else:
9601
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9602
                                     self.mode)
9603

    
9604
      # If not specified all disks should be replaced
9605
      if not self.disks:
9606
        self.disks = range(len(self.instance.disks))
9607

    
9608
    for node in check_nodes:
9609
      _CheckNodeOnline(self.lu, node)
9610

    
9611
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9612
                                                          self.other_node,
9613
                                                          self.target_node]
9614
                              if node_name is not None)
9615

    
9616
    # Release unneeded node locks
9617
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9618

    
9619
    # Release any owned node group
9620
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9621
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9622

    
9623
    # Check whether disks are valid
9624
    for disk_idx in self.disks:
9625
      instance.FindDisk(disk_idx)
9626

    
9627
    # Get secondary node IP addresses
9628
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9629
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9630

    
9631
  def Exec(self, feedback_fn):
9632
    """Execute disk replacement.
9633

9634
    This dispatches the disk replacement to the appropriate handler.
9635

9636
    """
9637
    if self.delay_iallocator:
9638
      self._CheckPrereq2()
9639

    
9640
    if __debug__:
9641
      # Verify owned locks before starting operation
9642
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9643
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9644
          ("Incorrect node locks, owning %s, expected %s" %
9645
           (owned_nodes, self.node_secondary_ip.keys()))
9646

    
9647
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9648
      assert list(owned_instances) == [self.instance_name], \
9649
          "Instance '%s' not locked" % self.instance_name
9650

    
9651
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9652
          "Should not own any node group lock at this point"
9653

    
9654
    if not self.disks:
9655
      feedback_fn("No disks need replacement")
9656
      return
9657

    
9658
    feedback_fn("Replacing disk(s) %s for %s" %
9659
                (utils.CommaJoin(self.disks), self.instance.name))
9660

    
9661
    activate_disks = (not self.instance.admin_up)
9662

    
9663
    # Activate the instance disks if we're replacing them on a down instance
9664
    if activate_disks:
9665
      _StartInstanceDisks(self.lu, self.instance, True)
9666

    
9667
    try:
9668
      # Should we replace the secondary node?
9669
      if self.new_node is not None:
9670
        fn = self._ExecDrbd8Secondary
9671
      else:
9672
        fn = self._ExecDrbd8DiskOnly
9673

    
9674
      result = fn(feedback_fn)
9675
    finally:
9676
      # Deactivate the instance disks if we're replacing them on a
9677
      # down instance
9678
      if activate_disks:
9679
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9680

    
9681
    if __debug__:
9682
      # Verify owned locks
9683
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9684
      nodes = frozenset(self.node_secondary_ip)
9685
      assert ((self.early_release and not owned_nodes) or
9686
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9687
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9688
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9689

    
9690
    return result
9691

    
9692
  def _CheckVolumeGroup(self, nodes):
9693
    self.lu.LogInfo("Checking volume groups")
9694

    
9695
    vgname = self.cfg.GetVGName()
9696

    
9697
    # Make sure volume group exists on all involved nodes
9698
    results = self.rpc.call_vg_list(nodes)
9699
    if not results:
9700
      raise errors.OpExecError("Can't list volume groups on the nodes")
9701

    
9702
    for node in nodes:
9703
      res = results[node]
9704
      res.Raise("Error checking node %s" % node)
9705
      if vgname not in res.payload:
9706
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9707
                                 (vgname, node))
9708

    
9709
  def _CheckDisksExistence(self, nodes):
9710
    # Check disk existence
9711
    for idx, dev in enumerate(self.instance.disks):
9712
      if idx not in self.disks:
9713
        continue
9714

    
9715
      for node in nodes:
9716
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9717
        self.cfg.SetDiskID(dev, node)
9718

    
9719
        result = self.rpc.call_blockdev_find(node, dev)
9720

    
9721
        msg = result.fail_msg
9722
        if msg or not result.payload:
9723
          if not msg:
9724
            msg = "disk not found"
9725
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9726
                                   (idx, node, msg))
9727

    
9728
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9729
    for idx, dev in enumerate(self.instance.disks):
9730
      if idx not in self.disks:
9731
        continue
9732

    
9733
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9734
                      (idx, node_name))
9735

    
9736
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9737
                                   ldisk=ldisk):
9738
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9739
                                 " replace disks for instance %s" %
9740
                                 (node_name, self.instance.name))
9741

    
9742
  def _CreateNewStorage(self, node_name):
9743
    """Create new storage on the primary or secondary node.
9744

9745
    This is only used for same-node replaces, not for changing the
9746
    secondary node, hence we don't want to modify the existing disk.
9747

9748
    """
9749
    iv_names = {}
9750

    
9751
    for idx, dev in enumerate(self.instance.disks):
9752
      if idx not in self.disks:
9753
        continue
9754

    
9755
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9756

    
9757
      self.cfg.SetDiskID(dev, node_name)
9758

    
9759
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9760
      names = _GenerateUniqueNames(self.lu, lv_names)
9761

    
9762
      vg_data = dev.children[0].logical_id[0]
9763
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9764
                             logical_id=(vg_data, names[0]))
9765
      vg_meta = dev.children[1].logical_id[0]
9766
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9767
                             logical_id=(vg_meta, names[1]))
9768

    
9769
      new_lvs = [lv_data, lv_meta]
9770
      old_lvs = [child.Copy() for child in dev.children]
9771
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9772

    
9773
      # we pass force_create=True to force the LVM creation
9774
      for new_lv in new_lvs:
9775
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9776
                        _GetInstanceInfoText(self.instance), False)
9777

    
9778
    return iv_names
9779

    
9780
  def _CheckDevices(self, node_name, iv_names):
9781
    for name, (dev, _, _) in iv_names.iteritems():
9782
      self.cfg.SetDiskID(dev, node_name)
9783

    
9784
      result = self.rpc.call_blockdev_find(node_name, dev)
9785

    
9786
      msg = result.fail_msg
9787
      if msg or not result.payload:
9788
        if not msg:
9789
          msg = "disk not found"
9790
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9791
                                 (name, msg))
9792

    
9793
      if result.payload.is_degraded:
9794
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9795

    
9796
  def _RemoveOldStorage(self, node_name, iv_names):
9797
    for name, (_, old_lvs, _) in iv_names.iteritems():
9798
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9799

    
9800
      for lv in old_lvs:
9801
        self.cfg.SetDiskID(lv, node_name)
9802

    
9803
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9804
        if msg:
9805
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9806
                             hint="remove unused LVs manually")
9807

    
9808
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9809
    """Replace a disk on the primary or secondary for DRBD 8.
9810

9811
    The algorithm for replace is quite complicated:
9812

9813
      1. for each disk to be replaced:
9814

9815
        1. create new LVs on the target node with unique names
9816
        1. detach old LVs from the drbd device
9817
        1. rename old LVs to name_replaced.<time_t>
9818
        1. rename new LVs to old LVs
9819
        1. attach the new LVs (with the old names now) to the drbd device
9820

9821
      1. wait for sync across all devices
9822

9823
      1. for each modified disk:
9824

9825
        1. remove old LVs (which have the name name_replaces.<time_t>)
9826

9827
    Failures are not very well handled.
9828

9829
    """
9830
    steps_total = 6
9831

    
9832
    # Step: check device activation
9833
    self.lu.LogStep(1, steps_total, "Check device existence")
9834
    self._CheckDisksExistence([self.other_node, self.target_node])
9835
    self._CheckVolumeGroup([self.target_node, self.other_node])
9836

    
9837
    # Step: check other node consistency
9838
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9839
    self._CheckDisksConsistency(self.other_node,
9840
                                self.other_node == self.instance.primary_node,
9841
                                False)
9842

    
9843
    # Step: create new storage
9844
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9845
    iv_names = self._CreateNewStorage(self.target_node)
9846

    
9847
    # Step: for each lv, detach+rename*2+attach
9848
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9849
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9850
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9851

    
9852
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9853
                                                     old_lvs)
9854
      result.Raise("Can't detach drbd from local storage on node"
9855
                   " %s for device %s" % (self.target_node, dev.iv_name))
9856
      #dev.children = []
9857
      #cfg.Update(instance)
9858

    
9859
      # ok, we created the new LVs, so now we know we have the needed
9860
      # storage; as such, we proceed on the target node to rename
9861
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9862
      # using the assumption that logical_id == physical_id (which in
9863
      # turn is the unique_id on that node)
9864

    
9865
      # FIXME(iustin): use a better name for the replaced LVs
9866
      temp_suffix = int(time.time())
9867
      ren_fn = lambda d, suff: (d.physical_id[0],
9868
                                d.physical_id[1] + "_replaced-%s" % suff)
9869

    
9870
      # Build the rename list based on what LVs exist on the node
9871
      rename_old_to_new = []
9872
      for to_ren in old_lvs:
9873
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9874
        if not result.fail_msg and result.payload:
9875
          # device exists
9876
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9877

    
9878
      self.lu.LogInfo("Renaming the old LVs on the target node")
9879
      result = self.rpc.call_blockdev_rename(self.target_node,
9880
                                             rename_old_to_new)
9881
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9882

    
9883
      # Now we rename the new LVs to the old LVs
9884
      self.lu.LogInfo("Renaming the new LVs on the target node")
9885
      rename_new_to_old = [(new, old.physical_id)
9886
                           for old, new in zip(old_lvs, new_lvs)]
9887
      result = self.rpc.call_blockdev_rename(self.target_node,
9888
                                             rename_new_to_old)
9889
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9890

    
9891
      # Intermediate steps of in memory modifications
9892
      for old, new in zip(old_lvs, new_lvs):
9893
        new.logical_id = old.logical_id
9894
        self.cfg.SetDiskID(new, self.target_node)
9895

    
9896
      # We need to modify old_lvs so that removal later removes the
9897
      # right LVs, not the newly added ones; note that old_lvs is a
9898
      # copy here
9899
      for disk in old_lvs:
9900
        disk.logical_id = ren_fn(disk, temp_suffix)
9901
        self.cfg.SetDiskID(disk, self.target_node)
9902

    
9903
      # Now that the new lvs have the old name, we can add them to the device
9904
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9905
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9906
                                                  new_lvs)
9907
      msg = result.fail_msg
9908
      if msg:
9909
        for new_lv in new_lvs:
9910
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9911
                                               new_lv).fail_msg
9912
          if msg2:
9913
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9914
                               hint=("cleanup manually the unused logical"
9915
                                     "volumes"))
9916
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9917

    
9918
    cstep = 5
9919
    if self.early_release:
9920
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9921
      cstep += 1
9922
      self._RemoveOldStorage(self.target_node, iv_names)
9923
      # WARNING: we release both node locks here, do not do other RPCs
9924
      # than WaitForSync to the primary node
9925
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9926
                    names=[self.target_node, self.other_node])
9927

    
9928
    # Wait for sync
9929
    # This can fail as the old devices are degraded and _WaitForSync
9930
    # does a combined result over all disks, so we don't check its return value
9931
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9932
    cstep += 1
9933
    _WaitForSync(self.lu, self.instance)
9934

    
9935
    # Check all devices manually
9936
    self._CheckDevices(self.instance.primary_node, iv_names)
9937

    
9938
    # Step: remove old storage
9939
    if not self.early_release:
9940
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9941
      cstep += 1
9942
      self._RemoveOldStorage(self.target_node, iv_names)
9943

    
9944
  def _ExecDrbd8Secondary(self, feedback_fn):
9945
    """Replace the secondary node for DRBD 8.
9946

9947
    The algorithm for replace is quite complicated:
9948
      - for all disks of the instance:
9949
        - create new LVs on the new node with same names
9950
        - shutdown the drbd device on the old secondary
9951
        - disconnect the drbd network on the primary
9952
        - create the drbd device on the new secondary
9953
        - network attach the drbd on the primary, using an artifice:
9954
          the drbd code for Attach() will connect to the network if it
9955
          finds a device which is connected to the good local disks but
9956
          not network enabled
9957
      - wait for sync across all devices
9958
      - remove all disks from the old secondary
9959

9960
    Failures are not very well handled.
9961

9962
    """
9963
    steps_total = 6
9964

    
9965
    pnode = self.instance.primary_node
9966

    
9967
    # Step: check device activation
9968
    self.lu.LogStep(1, steps_total, "Check device existence")
9969
    self._CheckDisksExistence([self.instance.primary_node])
9970
    self._CheckVolumeGroup([self.instance.primary_node])
9971

    
9972
    # Step: check other node consistency
9973
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9974
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9975

    
9976
    # Step: create new storage
9977
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9978
    for idx, dev in enumerate(self.instance.disks):
9979
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9980
                      (self.new_node, idx))
9981
      # we pass force_create=True to force LVM creation
9982
      for new_lv in dev.children:
9983
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9984
                        _GetInstanceInfoText(self.instance), False)
9985

    
9986
    # Step 4: dbrd minors and drbd setups changes
9987
    # after this, we must manually remove the drbd minors on both the
9988
    # error and the success paths
9989
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9990
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9991
                                         for dev in self.instance.disks],
9992
                                        self.instance.name)
9993
    logging.debug("Allocated minors %r", minors)
9994

    
9995
    iv_names = {}
9996
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9997
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9998
                      (self.new_node, idx))
9999
      # create new devices on new_node; note that we create two IDs:
10000
      # one without port, so the drbd will be activated without
10001
      # networking information on the new node at this stage, and one
10002
      # with network, for the latter activation in step 4
10003
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10004
      if self.instance.primary_node == o_node1:
10005
        p_minor = o_minor1
10006
      else:
10007
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10008
        p_minor = o_minor2
10009

    
10010
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10011
                      p_minor, new_minor, o_secret)
10012
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10013
                    p_minor, new_minor, o_secret)
10014

    
10015
      iv_names[idx] = (dev, dev.children, new_net_id)
10016
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10017
                    new_net_id)
10018
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10019
                              logical_id=new_alone_id,
10020
                              children=dev.children,
10021
                              size=dev.size)
10022
      try:
10023
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10024
                              _GetInstanceInfoText(self.instance), False)
10025
      except errors.GenericError:
10026
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10027
        raise
10028

    
10029
    # We have new devices, shutdown the drbd on the old secondary
10030
    for idx, dev in enumerate(self.instance.disks):
10031
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10032
      self.cfg.SetDiskID(dev, self.target_node)
10033
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10034
      if msg:
10035
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10036
                           "node: %s" % (idx, msg),
10037
                           hint=("Please cleanup this device manually as"
10038
                                 " soon as possible"))
10039

    
10040
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10041
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10042
                                               self.instance.disks)[pnode]
10043

    
10044
    msg = result.fail_msg
10045
    if msg:
10046
      # detaches didn't succeed (unlikely)
10047
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10048
      raise errors.OpExecError("Can't detach the disks from the network on"
10049
                               " old node: %s" % (msg,))
10050

    
10051
    # if we managed to detach at least one, we update all the disks of
10052
    # the instance to point to the new secondary
10053
    self.lu.LogInfo("Updating instance configuration")
10054
    for dev, _, new_logical_id in iv_names.itervalues():
10055
      dev.logical_id = new_logical_id
10056
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10057

    
10058
    self.cfg.Update(self.instance, feedback_fn)
10059

    
10060
    # and now perform the drbd attach
10061
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10062
                    " (standalone => connected)")
10063
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10064
                                            self.new_node],
10065
                                           self.node_secondary_ip,
10066
                                           self.instance.disks,
10067
                                           self.instance.name,
10068
                                           False)
10069
    for to_node, to_result in result.items():
10070
      msg = to_result.fail_msg
10071
      if msg:
10072
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10073
                           to_node, msg,
10074
                           hint=("please do a gnt-instance info to see the"
10075
                                 " status of disks"))
10076
    cstep = 5
10077
    if self.early_release:
10078
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10079
      cstep += 1
10080
      self._RemoveOldStorage(self.target_node, iv_names)
10081
      # WARNING: we release all node locks here, do not do other RPCs
10082
      # than WaitForSync to the primary node
10083
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10084
                    names=[self.instance.primary_node,
10085
                           self.target_node,
10086
                           self.new_node])
10087

    
10088
    # Wait for sync
10089
    # This can fail as the old devices are degraded and _WaitForSync
10090
    # does a combined result over all disks, so we don't check its return value
10091
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10092
    cstep += 1
10093
    _WaitForSync(self.lu, self.instance)
10094

    
10095
    # Check all devices manually
10096
    self._CheckDevices(self.instance.primary_node, iv_names)
10097

    
10098
    # Step: remove old storage
10099
    if not self.early_release:
10100
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10101
      self._RemoveOldStorage(self.target_node, iv_names)
10102

    
10103

    
10104
class LURepairNodeStorage(NoHooksLU):
10105
  """Repairs the volume group on a node.
10106

10107
  """
10108
  REQ_BGL = False
10109

    
10110
  def CheckArguments(self):
10111
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10112

    
10113
    storage_type = self.op.storage_type
10114

    
10115
    if (constants.SO_FIX_CONSISTENCY not in
10116
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10117
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10118
                                 " repaired" % storage_type,
10119
                                 errors.ECODE_INVAL)
10120

    
10121
  def ExpandNames(self):
10122
    self.needed_locks = {
10123
      locking.LEVEL_NODE: [self.op.node_name],
10124
      }
10125

    
10126
  def _CheckFaultyDisks(self, instance, node_name):
10127
    """Ensure faulty disks abort the opcode or at least warn."""
10128
    try:
10129
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10130
                                  node_name, True):
10131
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10132
                                   " node '%s'" % (instance.name, node_name),
10133
                                   errors.ECODE_STATE)
10134
    except errors.OpPrereqError, err:
10135
      if self.op.ignore_consistency:
10136
        self.proc.LogWarning(str(err.args[0]))
10137
      else:
10138
        raise
10139

    
10140
  def CheckPrereq(self):
10141
    """Check prerequisites.
10142

10143
    """
10144
    # Check whether any instance on this node has faulty disks
10145
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10146
      if not inst.admin_up:
10147
        continue
10148
      check_nodes = set(inst.all_nodes)
10149
      check_nodes.discard(self.op.node_name)
10150
      for inst_node_name in check_nodes:
10151
        self._CheckFaultyDisks(inst, inst_node_name)
10152

    
10153
  def Exec(self, feedback_fn):
10154
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10155
                (self.op.name, self.op.node_name))
10156

    
10157
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10158
    result = self.rpc.call_storage_execute(self.op.node_name,
10159
                                           self.op.storage_type, st_args,
10160
                                           self.op.name,
10161
                                           constants.SO_FIX_CONSISTENCY)
10162
    result.Raise("Failed to repair storage unit '%s' on %s" %
10163
                 (self.op.name, self.op.node_name))
10164

    
10165

    
10166
class LUNodeEvacuate(NoHooksLU):
10167
  """Evacuates instances off a list of nodes.
10168

10169
  """
10170
  REQ_BGL = False
10171

    
10172
  _MODE2IALLOCATOR = {
10173
    constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10174
    constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10175
    constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10176
    }
10177
  assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10178
  assert (frozenset(_MODE2IALLOCATOR.values()) ==
10179
          constants.IALLOCATOR_NEVAC_MODES)
10180

    
10181
  def CheckArguments(self):
10182
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10183

    
10184
  def ExpandNames(self):
10185
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10186

    
10187
    if self.op.remote_node is not None:
10188
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10189
      assert self.op.remote_node
10190

    
10191
      if self.op.remote_node == self.op.node_name:
10192
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10193
                                   " secondary node", errors.ECODE_INVAL)
10194

    
10195
      if self.op.mode != constants.NODE_EVAC_SEC:
10196
        raise errors.OpPrereqError("Without the use of an iallocator only"
10197
                                   " secondary instances can be evacuated",
10198
                                   errors.ECODE_INVAL)
10199

    
10200
    # Declare locks
10201
    self.share_locks = _ShareAll()
10202
    self.needed_locks = {
10203
      locking.LEVEL_INSTANCE: [],
10204
      locking.LEVEL_NODEGROUP: [],
10205
      locking.LEVEL_NODE: [],
10206
      }
10207

    
10208
    # Determine nodes (via group) optimistically, needs verification once locks
10209
    # have been acquired
10210
    self.lock_nodes = self._DetermineNodes()
10211

    
10212
  def _DetermineNodes(self):
10213
    """Gets the list of nodes to operate on.
10214

10215
    """
10216
    if self.op.remote_node is None:
10217
      # Iallocator will choose any node(s) in the same group
10218
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10219
    else:
10220
      group_nodes = frozenset([self.op.remote_node])
10221

    
10222
    # Determine nodes to be locked
10223
    return set([self.op.node_name]) | group_nodes
10224

    
10225
  def _DetermineInstances(self):
10226
    """Builds list of instances to operate on.
10227

10228
    """
10229
    assert self.op.mode in constants.NODE_EVAC_MODES
10230

    
10231
    if self.op.mode == constants.NODE_EVAC_PRI:
10232
      # Primary instances only
10233
      inst_fn = _GetNodePrimaryInstances
10234
      assert self.op.remote_node is None, \
10235
        "Evacuating primary instances requires iallocator"
10236
    elif self.op.mode == constants.NODE_EVAC_SEC:
10237
      # Secondary instances only
10238
      inst_fn = _GetNodeSecondaryInstances
10239
    else:
10240
      # All instances
10241
      assert self.op.mode == constants.NODE_EVAC_ALL
10242
      inst_fn = _GetNodeInstances
10243
      # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10244
      # per instance
10245
      raise errors.OpPrereqError("Due to an issue with the iallocator"
10246
                                 " interface it is not possible to evacuate"
10247
                                 " all instances at once; specify explicitly"
10248
                                 " whether to evacuate primary or secondary"
10249
                                 " instances",
10250
                                 errors.ECODE_INVAL)
10251

    
10252
    return inst_fn(self.cfg, self.op.node_name)
10253

    
10254
  def DeclareLocks(self, level):
10255
    if level == locking.LEVEL_INSTANCE:
10256
      # Lock instances optimistically, needs verification once node and group
10257
      # locks have been acquired
10258
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10259
        set(i.name for i in self._DetermineInstances())
10260

    
10261
    elif level == locking.LEVEL_NODEGROUP:
10262
      # Lock node groups for all potential target nodes optimistically, needs
10263
      # verification once nodes have been acquired
10264
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10265
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10266

    
10267
    elif level == locking.LEVEL_NODE:
10268
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10269

    
10270
  def CheckPrereq(self):
10271
    # Verify locks
10272
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10273
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10274
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10275

    
10276
    need_nodes = self._DetermineNodes()
10277

    
10278
    if not owned_nodes.issuperset(need_nodes):
10279
      raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10280
                                 " locks were acquired, current nodes are"
10281
                                 " are '%s', used to be '%s'; retry the"
10282
                                 " operation" %
10283
                                 (self.op.node_name,
10284
                                  utils.CommaJoin(need_nodes),
10285
                                  utils.CommaJoin(owned_nodes)),
10286
                                 errors.ECODE_STATE)
10287

    
10288
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10289
    if owned_groups != wanted_groups:
10290
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10291
                               " current groups are '%s', used to be '%s';"
10292
                               " retry the operation" %
10293
                               (utils.CommaJoin(wanted_groups),
10294
                                utils.CommaJoin(owned_groups)))
10295

    
10296
    # Determine affected instances
10297
    self.instances = self._DetermineInstances()
10298
    self.instance_names = [i.name for i in self.instances]
10299

    
10300
    if set(self.instance_names) != owned_instances:
10301
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10302
                               " were acquired, current instances are '%s',"
10303
                               " used to be '%s'; retry the operation" %
10304
                               (self.op.node_name,
10305
                                utils.CommaJoin(self.instance_names),
10306
                                utils.CommaJoin(owned_instances)))
10307

    
10308
    if self.instance_names:
10309
      self.LogInfo("Evacuating instances from node '%s': %s",
10310
                   self.op.node_name,
10311
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10312
    else:
10313
      self.LogInfo("No instances to evacuate from node '%s'",
10314
                   self.op.node_name)
10315

    
10316
    if self.op.remote_node is not None:
10317
      for i in self.instances:
10318
        if i.primary_node == self.op.remote_node:
10319
          raise errors.OpPrereqError("Node %s is the primary node of"
10320
                                     " instance %s, cannot use it as"
10321
                                     " secondary" %
10322
                                     (self.op.remote_node, i.name),
10323
                                     errors.ECODE_INVAL)
10324

    
10325
  def Exec(self, feedback_fn):
10326
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10327

    
10328
    if not self.instance_names:
10329
      # No instances to evacuate
10330
      jobs = []
10331

    
10332
    elif self.op.iallocator is not None:
10333
      # TODO: Implement relocation to other group
10334
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10335
                       evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10336
                       instances=list(self.instance_names))
10337

    
10338
      ial.Run(self.op.iallocator)
10339

    
10340
      if not ial.success:
10341
        raise errors.OpPrereqError("Can't compute node evacuation using"
10342
                                   " iallocator '%s': %s" %
10343
                                   (self.op.iallocator, ial.info),
10344
                                   errors.ECODE_NORES)
10345

    
10346
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10347

    
10348
    elif self.op.remote_node is not None:
10349
      assert self.op.mode == constants.NODE_EVAC_SEC
10350
      jobs = [
10351
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10352
                                        remote_node=self.op.remote_node,
10353
                                        disks=[],
10354
                                        mode=constants.REPLACE_DISK_CHG,
10355
                                        early_release=self.op.early_release)]
10356
        for instance_name in self.instance_names
10357
        ]
10358

    
10359
    else:
10360
      raise errors.ProgrammerError("No iallocator or remote node")
10361

    
10362
    return ResultWithJobs(jobs)
10363

    
10364

    
10365
def _SetOpEarlyRelease(early_release, op):
10366
  """Sets C{early_release} flag on opcodes if available.
10367

10368
  """
10369
  try:
10370
    op.early_release = early_release
10371
  except AttributeError:
10372
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10373

    
10374
  return op
10375

    
10376

    
10377
def _NodeEvacDest(use_nodes, group, nodes):
10378
  """Returns group or nodes depending on caller's choice.
10379

10380
  """
10381
  if use_nodes:
10382
    return utils.CommaJoin(nodes)
10383
  else:
10384
    return group
10385

    
10386

    
10387
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10388
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10389

10390
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10391
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10392

10393
  @type lu: L{LogicalUnit}
10394
  @param lu: Logical unit instance
10395
  @type alloc_result: tuple/list
10396
  @param alloc_result: Result from iallocator
10397
  @type early_release: bool
10398
  @param early_release: Whether to release locks early if possible
10399
  @type use_nodes: bool
10400
  @param use_nodes: Whether to display node names instead of groups
10401

10402
  """
10403
  (moved, failed, jobs) = alloc_result
10404

    
10405
  if failed:
10406
    failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10407
                                 for (name, reason) in failed)
10408
    lu.LogWarning("Unable to evacuate instances %s", failreason)
10409
    raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10410

    
10411
  if moved:
10412
    lu.LogInfo("Instances to be moved: %s",
10413
               utils.CommaJoin("%s (to %s)" %
10414
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10415
                               for (name, group, nodes) in moved))
10416

    
10417
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10418
              map(opcodes.OpCode.LoadOpCode, ops))
10419
          for ops in jobs]
10420

    
10421

    
10422
class LUInstanceGrowDisk(LogicalUnit):
10423
  """Grow a disk of an instance.
10424

10425
  """
10426
  HPATH = "disk-grow"
10427
  HTYPE = constants.HTYPE_INSTANCE
10428
  REQ_BGL = False
10429

    
10430
  def ExpandNames(self):
10431
    self._ExpandAndLockInstance()
10432
    self.needed_locks[locking.LEVEL_NODE] = []
10433
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10434

    
10435
  def DeclareLocks(self, level):
10436
    if level == locking.LEVEL_NODE:
10437
      self._LockInstancesNodes()
10438

    
10439
  def BuildHooksEnv(self):
10440
    """Build hooks env.
10441

10442
    This runs on the master, the primary and all the secondaries.
10443

10444
    """
10445
    env = {
10446
      "DISK": self.op.disk,
10447
      "AMOUNT": self.op.amount,
10448
      }
10449
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10450
    return env
10451

    
10452
  def BuildHooksNodes(self):
10453
    """Build hooks nodes.
10454

10455
    """
10456
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10457
    return (nl, nl)
10458

    
10459
  def CheckPrereq(self):
10460
    """Check prerequisites.
10461

10462
    This checks that the instance is in the cluster.
10463

10464
    """
10465
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10466
    assert instance is not None, \
10467
      "Cannot retrieve locked instance %s" % self.op.instance_name
10468
    nodenames = list(instance.all_nodes)
10469
    for node in nodenames:
10470
      _CheckNodeOnline(self, node)
10471

    
10472
    self.instance = instance
10473

    
10474
    if instance.disk_template not in constants.DTS_GROWABLE:
10475
      raise errors.OpPrereqError("Instance's disk layout does not support"
10476
                                 " growing", errors.ECODE_INVAL)
10477

    
10478
    self.disk = instance.FindDisk(self.op.disk)
10479

    
10480
    if instance.disk_template not in (constants.DT_FILE,
10481
                                      constants.DT_SHARED_FILE):
10482
      # TODO: check the free disk space for file, when that feature will be
10483
      # supported
10484
      _CheckNodesFreeDiskPerVG(self, nodenames,
10485
                               self.disk.ComputeGrowth(self.op.amount))
10486

    
10487
  def Exec(self, feedback_fn):
10488
    """Execute disk grow.
10489

10490
    """
10491
    instance = self.instance
10492
    disk = self.disk
10493

    
10494
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10495
    if not disks_ok:
10496
      raise errors.OpExecError("Cannot activate block device to grow")
10497

    
10498
    # First run all grow ops in dry-run mode
10499
    for node in instance.all_nodes:
10500
      self.cfg.SetDiskID(disk, node)
10501
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10502
      result.Raise("Grow request failed to node %s" % node)
10503

    
10504
    # We know that (as far as we can test) operations across different
10505
    # nodes will succeed, time to run it for real
10506
    for node in instance.all_nodes:
10507
      self.cfg.SetDiskID(disk, node)
10508
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10509
      result.Raise("Grow request failed to node %s" % node)
10510

    
10511
      # TODO: Rewrite code to work properly
10512
      # DRBD goes into sync mode for a short amount of time after executing the
10513
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10514
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10515
      # time is a work-around.
10516
      time.sleep(5)
10517

    
10518
    disk.RecordGrow(self.op.amount)
10519
    self.cfg.Update(instance, feedback_fn)
10520
    if self.op.wait_for_sync:
10521
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10522
      if disk_abort:
10523
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10524
                             " status; please check the instance")
10525
      if not instance.admin_up:
10526
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10527
    elif not instance.admin_up:
10528
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10529
                           " not supposed to be running because no wait for"
10530
                           " sync mode was requested")
10531

    
10532

    
10533
class LUInstanceQueryData(NoHooksLU):
10534
  """Query runtime instance data.
10535

10536
  """
10537
  REQ_BGL = False
10538

    
10539
  def ExpandNames(self):
10540
    self.needed_locks = {}
10541

    
10542
    # Use locking if requested or when non-static information is wanted
10543
    if not (self.op.static or self.op.use_locking):
10544
      self.LogWarning("Non-static data requested, locks need to be acquired")
10545
      self.op.use_locking = True
10546

    
10547
    if self.op.instances or not self.op.use_locking:
10548
      # Expand instance names right here
10549
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10550
    else:
10551
      # Will use acquired locks
10552
      self.wanted_names = None
10553

    
10554
    if self.op.use_locking:
10555
      self.share_locks = _ShareAll()
10556

    
10557
      if self.wanted_names is None:
10558
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10559
      else:
10560
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10561

    
10562
      self.needed_locks[locking.LEVEL_NODE] = []
10563
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10564

    
10565
  def DeclareLocks(self, level):
10566
    if self.op.use_locking and level == locking.LEVEL_NODE:
10567
      self._LockInstancesNodes()
10568

    
10569
  def CheckPrereq(self):
10570
    """Check prerequisites.
10571

10572
    This only checks the optional instance list against the existing names.
10573

10574
    """
10575
    if self.wanted_names is None:
10576
      assert self.op.use_locking, "Locking was not used"
10577
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10578

    
10579
    self.wanted_instances = \
10580
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10581

    
10582
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10583
    """Returns the status of a block device
10584

10585
    """
10586
    if self.op.static or not node:
10587
      return None
10588

    
10589
    self.cfg.SetDiskID(dev, node)
10590

    
10591
    result = self.rpc.call_blockdev_find(node, dev)
10592
    if result.offline:
10593
      return None
10594

    
10595
    result.Raise("Can't compute disk status for %s" % instance_name)
10596

    
10597
    status = result.payload
10598
    if status is None:
10599
      return None
10600

    
10601
    return (status.dev_path, status.major, status.minor,
10602
            status.sync_percent, status.estimated_time,
10603
            status.is_degraded, status.ldisk_status)
10604

    
10605
  def _ComputeDiskStatus(self, instance, snode, dev):
10606
    """Compute block device status.
10607

10608
    """
10609
    if dev.dev_type in constants.LDS_DRBD:
10610
      # we change the snode then (otherwise we use the one passed in)
10611
      if dev.logical_id[0] == instance.primary_node:
10612
        snode = dev.logical_id[1]
10613
      else:
10614
        snode = dev.logical_id[0]
10615

    
10616
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10617
                                              instance.name, dev)
10618
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10619

    
10620
    if dev.children:
10621
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10622
                                        instance, snode),
10623
                         dev.children)
10624
    else:
10625
      dev_children = []
10626

    
10627
    return {
10628
      "iv_name": dev.iv_name,
10629
      "dev_type": dev.dev_type,
10630
      "logical_id": dev.logical_id,
10631
      "physical_id": dev.physical_id,
10632
      "pstatus": dev_pstatus,
10633
      "sstatus": dev_sstatus,
10634
      "children": dev_children,
10635
      "mode": dev.mode,
10636
      "size": dev.size,
10637
      }
10638

    
10639
  def Exec(self, feedback_fn):
10640
    """Gather and return data"""
10641
    result = {}
10642

    
10643
    cluster = self.cfg.GetClusterInfo()
10644

    
10645
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10646
                                          for i in self.wanted_instances)
10647
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10648
      if self.op.static or pnode.offline:
10649
        remote_state = None
10650
        if pnode.offline:
10651
          self.LogWarning("Primary node %s is marked offline, returning static"
10652
                          " information only for instance %s" %
10653
                          (pnode.name, instance.name))
10654
      else:
10655
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10656
                                                  instance.name,
10657
                                                  instance.hypervisor)
10658
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10659
        remote_info = remote_info.payload
10660
        if remote_info and "state" in remote_info:
10661
          remote_state = "up"
10662
        else:
10663
          remote_state = "down"
10664

    
10665
      if instance.admin_up:
10666
        config_state = "up"
10667
      else:
10668
        config_state = "down"
10669

    
10670
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10671
                  instance.disks)
10672

    
10673
      result[instance.name] = {
10674
        "name": instance.name,
10675
        "config_state": config_state,
10676
        "run_state": remote_state,
10677
        "pnode": instance.primary_node,
10678
        "snodes": instance.secondary_nodes,
10679
        "os": instance.os,
10680
        # this happens to be the same format used for hooks
10681
        "nics": _NICListToTuple(self, instance.nics),
10682
        "disk_template": instance.disk_template,
10683
        "disks": disks,
10684
        "hypervisor": instance.hypervisor,
10685
        "network_port": instance.network_port,
10686
        "hv_instance": instance.hvparams,
10687
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10688
        "be_instance": instance.beparams,
10689
        "be_actual": cluster.FillBE(instance),
10690
        "os_instance": instance.osparams,
10691
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10692
        "serial_no": instance.serial_no,
10693
        "mtime": instance.mtime,
10694
        "ctime": instance.ctime,
10695
        "uuid": instance.uuid,
10696
        }
10697

    
10698
    return result
10699

    
10700

    
10701
class LUInstanceSetParams(LogicalUnit):
10702
  """Modifies an instances's parameters.
10703

10704
  """
10705
  HPATH = "instance-modify"
10706
  HTYPE = constants.HTYPE_INSTANCE
10707
  REQ_BGL = False
10708

    
10709
  def CheckArguments(self):
10710
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10711
            self.op.hvparams or self.op.beparams or self.op.os_name):
10712
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10713

    
10714
    if self.op.hvparams:
10715
      _CheckGlobalHvParams(self.op.hvparams)
10716

    
10717
    # Disk validation
10718
    disk_addremove = 0
10719
    for disk_op, disk_dict in self.op.disks:
10720
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10721
      if disk_op == constants.DDM_REMOVE:
10722
        disk_addremove += 1
10723
        continue
10724
      elif disk_op == constants.DDM_ADD:
10725
        disk_addremove += 1
10726
      else:
10727
        if not isinstance(disk_op, int):
10728
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10729
        if not isinstance(disk_dict, dict):
10730
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10731
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10732

    
10733
      if disk_op == constants.DDM_ADD:
10734
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10735
        if mode not in constants.DISK_ACCESS_SET:
10736
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10737
                                     errors.ECODE_INVAL)
10738
        size = disk_dict.get(constants.IDISK_SIZE, None)
10739
        if size is None:
10740
          raise errors.OpPrereqError("Required disk parameter size missing",
10741
                                     errors.ECODE_INVAL)
10742
        try:
10743
          size = int(size)
10744
        except (TypeError, ValueError), err:
10745
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10746
                                     str(err), errors.ECODE_INVAL)
10747
        disk_dict[constants.IDISK_SIZE] = size
10748
      else:
10749
        # modification of disk
10750
        if constants.IDISK_SIZE in disk_dict:
10751
          raise errors.OpPrereqError("Disk size change not possible, use"
10752
                                     " grow-disk", errors.ECODE_INVAL)
10753

    
10754
    if disk_addremove > 1:
10755
      raise errors.OpPrereqError("Only one disk add or remove operation"
10756
                                 " supported at a time", errors.ECODE_INVAL)
10757

    
10758
    if self.op.disks and self.op.disk_template is not None:
10759
      raise errors.OpPrereqError("Disk template conversion and other disk"
10760
                                 " changes not supported at the same time",
10761
                                 errors.ECODE_INVAL)
10762

    
10763
    if (self.op.disk_template and
10764
        self.op.disk_template in constants.DTS_INT_MIRROR and
10765
        self.op.remote_node is None):
10766
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10767
                                 " one requires specifying a secondary node",
10768
                                 errors.ECODE_INVAL)
10769

    
10770
    # NIC validation
10771
    nic_addremove = 0
10772
    for nic_op, nic_dict in self.op.nics:
10773
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10774
      if nic_op == constants.DDM_REMOVE:
10775
        nic_addremove += 1
10776
        continue
10777
      elif nic_op == constants.DDM_ADD:
10778
        nic_addremove += 1
10779
      else:
10780
        if not isinstance(nic_op, int):
10781
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10782
        if not isinstance(nic_dict, dict):
10783
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10784
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10785

    
10786
      # nic_dict should be a dict
10787
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10788
      if nic_ip is not None:
10789
        if nic_ip.lower() == constants.VALUE_NONE:
10790
          nic_dict[constants.INIC_IP] = None
10791
        else:
10792
          if not netutils.IPAddress.IsValid(nic_ip):
10793
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10794
                                       errors.ECODE_INVAL)
10795

    
10796
      nic_bridge = nic_dict.get("bridge", None)
10797
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10798
      if nic_bridge and nic_link:
10799
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10800
                                   " at the same time", errors.ECODE_INVAL)
10801
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10802
        nic_dict["bridge"] = None
10803
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10804
        nic_dict[constants.INIC_LINK] = None
10805

    
10806
      if nic_op == constants.DDM_ADD:
10807
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10808
        if nic_mac is None:
10809
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10810

    
10811
      if constants.INIC_MAC in nic_dict:
10812
        nic_mac = nic_dict[constants.INIC_MAC]
10813
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10814
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10815

    
10816
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10817
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10818
                                     " modifying an existing nic",
10819
                                     errors.ECODE_INVAL)
10820

    
10821
    if nic_addremove > 1:
10822
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10823
                                 " supported at a time", errors.ECODE_INVAL)
10824

    
10825
  def ExpandNames(self):
10826
    self._ExpandAndLockInstance()
10827
    self.needed_locks[locking.LEVEL_NODE] = []
10828
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10829

    
10830
  def DeclareLocks(self, level):
10831
    if level == locking.LEVEL_NODE:
10832
      self._LockInstancesNodes()
10833
      if self.op.disk_template and self.op.remote_node:
10834
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10835
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10836

    
10837
  def BuildHooksEnv(self):
10838
    """Build hooks env.
10839

10840
    This runs on the master, primary and secondaries.
10841

10842
    """
10843
    args = dict()
10844
    if constants.BE_MEMORY in self.be_new:
10845
      args["memory"] = self.be_new[constants.BE_MEMORY]
10846
    if constants.BE_VCPUS in self.be_new:
10847
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10848
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10849
    # information at all.
10850
    if self.op.nics:
10851
      args["nics"] = []
10852
      nic_override = dict(self.op.nics)
10853
      for idx, nic in enumerate(self.instance.nics):
10854
        if idx in nic_override:
10855
          this_nic_override = nic_override[idx]
10856
        else:
10857
          this_nic_override = {}
10858
        if constants.INIC_IP in this_nic_override:
10859
          ip = this_nic_override[constants.INIC_IP]
10860
        else:
10861
          ip = nic.ip
10862
        if constants.INIC_MAC in this_nic_override:
10863
          mac = this_nic_override[constants.INIC_MAC]
10864
        else:
10865
          mac = nic.mac
10866
        if idx in self.nic_pnew:
10867
          nicparams = self.nic_pnew[idx]
10868
        else:
10869
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10870
        mode = nicparams[constants.NIC_MODE]
10871
        link = nicparams[constants.NIC_LINK]
10872
        args["nics"].append((ip, mac, mode, link))
10873
      if constants.DDM_ADD in nic_override:
10874
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10875
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10876
        nicparams = self.nic_pnew[constants.DDM_ADD]
10877
        mode = nicparams[constants.NIC_MODE]
10878
        link = nicparams[constants.NIC_LINK]
10879
        args["nics"].append((ip, mac, mode, link))
10880
      elif constants.DDM_REMOVE in nic_override:
10881
        del args["nics"][-1]
10882

    
10883
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10884
    if self.op.disk_template:
10885
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10886

    
10887
    return env
10888

    
10889
  def BuildHooksNodes(self):
10890
    """Build hooks nodes.
10891

10892
    """
10893
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10894
    return (nl, nl)
10895

    
10896
  def CheckPrereq(self):
10897
    """Check prerequisites.
10898

10899
    This only checks the instance list against the existing names.
10900

10901
    """
10902
    # checking the new params on the primary/secondary nodes
10903

    
10904
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10905
    cluster = self.cluster = self.cfg.GetClusterInfo()
10906
    assert self.instance is not None, \
10907
      "Cannot retrieve locked instance %s" % self.op.instance_name
10908
    pnode = instance.primary_node
10909
    nodelist = list(instance.all_nodes)
10910

    
10911
    # OS change
10912
    if self.op.os_name and not self.op.force:
10913
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10914
                      self.op.force_variant)
10915
      instance_os = self.op.os_name
10916
    else:
10917
      instance_os = instance.os
10918

    
10919
    if self.op.disk_template:
10920
      if instance.disk_template == self.op.disk_template:
10921
        raise errors.OpPrereqError("Instance already has disk template %s" %
10922
                                   instance.disk_template, errors.ECODE_INVAL)
10923

    
10924
      if (instance.disk_template,
10925
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10926
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10927
                                   " %s to %s" % (instance.disk_template,
10928
                                                  self.op.disk_template),
10929
                                   errors.ECODE_INVAL)
10930
      _CheckInstanceDown(self, instance, "cannot change disk template")
10931
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10932
        if self.op.remote_node == pnode:
10933
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10934
                                     " as the primary node of the instance" %
10935
                                     self.op.remote_node, errors.ECODE_STATE)
10936
        _CheckNodeOnline(self, self.op.remote_node)
10937
        _CheckNodeNotDrained(self, self.op.remote_node)
10938
        # FIXME: here we assume that the old instance type is DT_PLAIN
10939
        assert instance.disk_template == constants.DT_PLAIN
10940
        disks = [{constants.IDISK_SIZE: d.size,
10941
                  constants.IDISK_VG: d.logical_id[0]}
10942
                 for d in instance.disks]
10943
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10944
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10945

    
10946
    # hvparams processing
10947
    if self.op.hvparams:
10948
      hv_type = instance.hypervisor
10949
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10950
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10951
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10952

    
10953
      # local check
10954
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10955
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10956
      self.hv_new = hv_new # the new actual values
10957
      self.hv_inst = i_hvdict # the new dict (without defaults)
10958
    else:
10959
      self.hv_new = self.hv_inst = {}
10960

    
10961
    # beparams processing
10962
    if self.op.beparams:
10963
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10964
                                   use_none=True)
10965
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10966
      be_new = cluster.SimpleFillBE(i_bedict)
10967
      self.be_new = be_new # the new actual values
10968
      self.be_inst = i_bedict # the new dict (without defaults)
10969
    else:
10970
      self.be_new = self.be_inst = {}
10971
    be_old = cluster.FillBE(instance)
10972

    
10973
    # osparams processing
10974
    if self.op.osparams:
10975
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10976
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10977
      self.os_inst = i_osdict # the new dict (without defaults)
10978
    else:
10979
      self.os_inst = {}
10980

    
10981
    self.warn = []
10982

    
10983
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10984
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10985
      mem_check_list = [pnode]
10986
      if be_new[constants.BE_AUTO_BALANCE]:
10987
        # either we changed auto_balance to yes or it was from before
10988
        mem_check_list.extend(instance.secondary_nodes)
10989
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10990
                                                  instance.hypervisor)
10991
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10992
                                         instance.hypervisor)
10993
      pninfo = nodeinfo[pnode]
10994
      msg = pninfo.fail_msg
10995
      if msg:
10996
        # Assume the primary node is unreachable and go ahead
10997
        self.warn.append("Can't get info from primary node %s: %s" %
10998
                         (pnode, msg))
10999
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
11000
        self.warn.append("Node data from primary node %s doesn't contain"
11001
                         " free memory information" % pnode)
11002
      elif instance_info.fail_msg:
11003
        self.warn.append("Can't get instance runtime information: %s" %
11004
                        instance_info.fail_msg)
11005
      else:
11006
        if instance_info.payload:
11007
          current_mem = int(instance_info.payload["memory"])
11008
        else:
11009
          # Assume instance not running
11010
          # (there is a slight race condition here, but it's not very probable,
11011
          # and we have no other way to check)
11012
          current_mem = 0
11013
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11014
                    pninfo.payload["memory_free"])
11015
        if miss_mem > 0:
11016
          raise errors.OpPrereqError("This change will prevent the instance"
11017
                                     " from starting, due to %d MB of memory"
11018
                                     " missing on its primary node" % miss_mem,
11019
                                     errors.ECODE_NORES)
11020

    
11021
      if be_new[constants.BE_AUTO_BALANCE]:
11022
        for node, nres in nodeinfo.items():
11023
          if node not in instance.secondary_nodes:
11024
            continue
11025
          nres.Raise("Can't get info from secondary node %s" % node,
11026
                     prereq=True, ecode=errors.ECODE_STATE)
11027
          if not isinstance(nres.payload.get("memory_free", None), int):
11028
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11029
                                       " memory information" % node,
11030
                                       errors.ECODE_STATE)
11031
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11032
            raise errors.OpPrereqError("This change will prevent the instance"
11033
                                       " from failover to its secondary node"
11034
                                       " %s, due to not enough memory" % node,
11035
                                       errors.ECODE_STATE)
11036

    
11037
    # NIC processing
11038
    self.nic_pnew = {}
11039
    self.nic_pinst = {}
11040
    for nic_op, nic_dict in self.op.nics:
11041
      if nic_op == constants.DDM_REMOVE:
11042
        if not instance.nics:
11043
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11044
                                     errors.ECODE_INVAL)
11045
        continue
11046
      if nic_op != constants.DDM_ADD:
11047
        # an existing nic
11048
        if not instance.nics:
11049
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11050
                                     " no NICs" % nic_op,
11051
                                     errors.ECODE_INVAL)
11052
        if nic_op < 0 or nic_op >= len(instance.nics):
11053
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11054
                                     " are 0 to %d" %
11055
                                     (nic_op, len(instance.nics) - 1),
11056
                                     errors.ECODE_INVAL)
11057
        old_nic_params = instance.nics[nic_op].nicparams
11058
        old_nic_ip = instance.nics[nic_op].ip
11059
      else:
11060
        old_nic_params = {}
11061
        old_nic_ip = None
11062

    
11063
      update_params_dict = dict([(key, nic_dict[key])
11064
                                 for key in constants.NICS_PARAMETERS
11065
                                 if key in nic_dict])
11066

    
11067
      if "bridge" in nic_dict:
11068
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11069

    
11070
      new_nic_params = _GetUpdatedParams(old_nic_params,
11071
                                         update_params_dict)
11072
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11073
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11074
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11075
      self.nic_pinst[nic_op] = new_nic_params
11076
      self.nic_pnew[nic_op] = new_filled_nic_params
11077
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11078

    
11079
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11080
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11081
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11082
        if msg:
11083
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11084
          if self.op.force:
11085
            self.warn.append(msg)
11086
          else:
11087
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11088
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11089
        if constants.INIC_IP in nic_dict:
11090
          nic_ip = nic_dict[constants.INIC_IP]
11091
        else:
11092
          nic_ip = old_nic_ip
11093
        if nic_ip is None:
11094
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11095
                                     " on a routed nic", errors.ECODE_INVAL)
11096
      if constants.INIC_MAC in nic_dict:
11097
        nic_mac = nic_dict[constants.INIC_MAC]
11098
        if nic_mac is None:
11099
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11100
                                     errors.ECODE_INVAL)
11101
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11102
          # otherwise generate the mac
11103
          nic_dict[constants.INIC_MAC] = \
11104
            self.cfg.GenerateMAC(self.proc.GetECId())
11105
        else:
11106
          # or validate/reserve the current one
11107
          try:
11108
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11109
          except errors.ReservationError:
11110
            raise errors.OpPrereqError("MAC address %s already in use"
11111
                                       " in cluster" % nic_mac,
11112
                                       errors.ECODE_NOTUNIQUE)
11113

    
11114
    # DISK processing
11115
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11116
      raise errors.OpPrereqError("Disk operations not supported for"
11117
                                 " diskless instances",
11118
                                 errors.ECODE_INVAL)
11119
    for disk_op, _ in self.op.disks:
11120
      if disk_op == constants.DDM_REMOVE:
11121
        if len(instance.disks) == 1:
11122
          raise errors.OpPrereqError("Cannot remove the last disk of"
11123
                                     " an instance", errors.ECODE_INVAL)
11124
        _CheckInstanceDown(self, instance, "cannot remove disks")
11125

    
11126
      if (disk_op == constants.DDM_ADD and
11127
          len(instance.disks) >= constants.MAX_DISKS):
11128
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11129
                                   " add more" % constants.MAX_DISKS,
11130
                                   errors.ECODE_STATE)
11131
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11132
        # an existing disk
11133
        if disk_op < 0 or disk_op >= len(instance.disks):
11134
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11135
                                     " are 0 to %d" %
11136
                                     (disk_op, len(instance.disks)),
11137
                                     errors.ECODE_INVAL)
11138

    
11139
    return
11140

    
11141
  def _ConvertPlainToDrbd(self, feedback_fn):
11142
    """Converts an instance from plain to drbd.
11143

11144
    """
11145
    feedback_fn("Converting template to drbd")
11146
    instance = self.instance
11147
    pnode = instance.primary_node
11148
    snode = self.op.remote_node
11149

    
11150
    # create a fake disk info for _GenerateDiskTemplate
11151
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11152
                  constants.IDISK_VG: d.logical_id[0]}
11153
                 for d in instance.disks]
11154
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11155
                                      instance.name, pnode, [snode],
11156
                                      disk_info, None, None, 0, feedback_fn)
11157
    info = _GetInstanceInfoText(instance)
11158
    feedback_fn("Creating aditional volumes...")
11159
    # first, create the missing data and meta devices
11160
    for disk in new_disks:
11161
      # unfortunately this is... not too nice
11162
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11163
                            info, True)
11164
      for child in disk.children:
11165
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11166
    # at this stage, all new LVs have been created, we can rename the
11167
    # old ones
11168
    feedback_fn("Renaming original volumes...")
11169
    rename_list = [(o, n.children[0].logical_id)
11170
                   for (o, n) in zip(instance.disks, new_disks)]
11171
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11172
    result.Raise("Failed to rename original LVs")
11173

    
11174
    feedback_fn("Initializing DRBD devices...")
11175
    # all child devices are in place, we can now create the DRBD devices
11176
    for disk in new_disks:
11177
      for node in [pnode, snode]:
11178
        f_create = node == pnode
11179
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11180

    
11181
    # at this point, the instance has been modified
11182
    instance.disk_template = constants.DT_DRBD8
11183
    instance.disks = new_disks
11184
    self.cfg.Update(instance, feedback_fn)
11185

    
11186
    # disks are created, waiting for sync
11187
    disk_abort = not _WaitForSync(self, instance,
11188
                                  oneshot=not self.op.wait_for_sync)
11189
    if disk_abort:
11190
      raise errors.OpExecError("There are some degraded disks for"
11191
                               " this instance, please cleanup manually")
11192

    
11193
  def _ConvertDrbdToPlain(self, feedback_fn):
11194
    """Converts an instance from drbd to plain.
11195

11196
    """
11197
    instance = self.instance
11198
    assert len(instance.secondary_nodes) == 1
11199
    pnode = instance.primary_node
11200
    snode = instance.secondary_nodes[0]
11201
    feedback_fn("Converting template to plain")
11202

    
11203
    old_disks = instance.disks
11204
    new_disks = [d.children[0] for d in old_disks]
11205

    
11206
    # copy over size and mode
11207
    for parent, child in zip(old_disks, new_disks):
11208
      child.size = parent.size
11209
      child.mode = parent.mode
11210

    
11211
    # update instance structure
11212
    instance.disks = new_disks
11213
    instance.disk_template = constants.DT_PLAIN
11214
    self.cfg.Update(instance, feedback_fn)
11215

    
11216
    feedback_fn("Removing volumes on the secondary node...")
11217
    for disk in old_disks:
11218
      self.cfg.SetDiskID(disk, snode)
11219
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11220
      if msg:
11221
        self.LogWarning("Could not remove block device %s on node %s,"
11222
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11223

    
11224
    feedback_fn("Removing unneeded volumes on the primary node...")
11225
    for idx, disk in enumerate(old_disks):
11226
      meta = disk.children[1]
11227
      self.cfg.SetDiskID(meta, pnode)
11228
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11229
      if msg:
11230
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11231
                        " continuing anyway: %s", idx, pnode, msg)
11232

    
11233
    # this is a DRBD disk, return its port to the pool
11234
    for disk in old_disks:
11235
      tcp_port = disk.logical_id[2]
11236
      self.cfg.AddTcpUdpPort(tcp_port)
11237

    
11238
  def Exec(self, feedback_fn):
11239
    """Modifies an instance.
11240

11241
    All parameters take effect only at the next restart of the instance.
11242

11243
    """
11244
    # Process here the warnings from CheckPrereq, as we don't have a
11245
    # feedback_fn there.
11246
    for warn in self.warn:
11247
      feedback_fn("WARNING: %s" % warn)
11248

    
11249
    result = []
11250
    instance = self.instance
11251
    # disk changes
11252
    for disk_op, disk_dict in self.op.disks:
11253
      if disk_op == constants.DDM_REMOVE:
11254
        # remove the last disk
11255
        device = instance.disks.pop()
11256
        device_idx = len(instance.disks)
11257
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11258
          self.cfg.SetDiskID(disk, node)
11259
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11260
          if msg:
11261
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11262
                            " continuing anyway", device_idx, node, msg)
11263
        result.append(("disk/%d" % device_idx, "remove"))
11264

    
11265
        # if this is a DRBD disk, return its port to the pool
11266
        if device.dev_type in constants.LDS_DRBD:
11267
          tcp_port = device.logical_id[2]
11268
          self.cfg.AddTcpUdpPort(tcp_port)
11269
      elif disk_op == constants.DDM_ADD:
11270
        # add a new disk
11271
        if instance.disk_template in (constants.DT_FILE,
11272
                                        constants.DT_SHARED_FILE):
11273
          file_driver, file_path = instance.disks[0].logical_id
11274
          file_path = os.path.dirname(file_path)
11275
        else:
11276
          file_driver = file_path = None
11277
        disk_idx_base = len(instance.disks)
11278
        new_disk = _GenerateDiskTemplate(self,
11279
                                         instance.disk_template,
11280
                                         instance.name, instance.primary_node,
11281
                                         instance.secondary_nodes,
11282
                                         [disk_dict],
11283
                                         file_path,
11284
                                         file_driver,
11285
                                         disk_idx_base, feedback_fn)[0]
11286
        instance.disks.append(new_disk)
11287
        info = _GetInstanceInfoText(instance)
11288

    
11289
        logging.info("Creating volume %s for instance %s",
11290
                     new_disk.iv_name, instance.name)
11291
        # Note: this needs to be kept in sync with _CreateDisks
11292
        #HARDCODE
11293
        for node in instance.all_nodes:
11294
          f_create = node == instance.primary_node
11295
          try:
11296
            _CreateBlockDev(self, node, instance, new_disk,
11297
                            f_create, info, f_create)
11298
          except errors.OpExecError, err:
11299
            self.LogWarning("Failed to create volume %s (%s) on"
11300
                            " node %s: %s",
11301
                            new_disk.iv_name, new_disk, node, err)
11302
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11303
                       (new_disk.size, new_disk.mode)))
11304
      else:
11305
        # change a given disk
11306
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11307
        result.append(("disk.mode/%d" % disk_op,
11308
                       disk_dict[constants.IDISK_MODE]))
11309

    
11310
    if self.op.disk_template:
11311
      r_shut = _ShutdownInstanceDisks(self, instance)
11312
      if not r_shut:
11313
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11314
                                 " proceed with disk template conversion")
11315
      mode = (instance.disk_template, self.op.disk_template)
11316
      try:
11317
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11318
      except:
11319
        self.cfg.ReleaseDRBDMinors(instance.name)
11320
        raise
11321
      result.append(("disk_template", self.op.disk_template))
11322

    
11323
    # NIC changes
11324
    for nic_op, nic_dict in self.op.nics:
11325
      if nic_op == constants.DDM_REMOVE:
11326
        # remove the last nic
11327
        del instance.nics[-1]
11328
        result.append(("nic.%d" % len(instance.nics), "remove"))
11329
      elif nic_op == constants.DDM_ADD:
11330
        # mac and bridge should be set, by now
11331
        mac = nic_dict[constants.INIC_MAC]
11332
        ip = nic_dict.get(constants.INIC_IP, None)
11333
        nicparams = self.nic_pinst[constants.DDM_ADD]
11334
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11335
        instance.nics.append(new_nic)
11336
        result.append(("nic.%d" % (len(instance.nics) - 1),
11337
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11338
                       (new_nic.mac, new_nic.ip,
11339
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11340
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11341
                       )))
11342
      else:
11343
        for key in (constants.INIC_MAC, constants.INIC_IP):
11344
          if key in nic_dict:
11345
            setattr(instance.nics[nic_op], key, nic_dict[key])
11346
        if nic_op in self.nic_pinst:
11347
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11348
        for key, val in nic_dict.iteritems():
11349
          result.append(("nic.%s/%d" % (key, nic_op), val))
11350

    
11351
    # hvparams changes
11352
    if self.op.hvparams:
11353
      instance.hvparams = self.hv_inst
11354
      for key, val in self.op.hvparams.iteritems():
11355
        result.append(("hv/%s" % key, val))
11356

    
11357
    # beparams changes
11358
    if self.op.beparams:
11359
      instance.beparams = self.be_inst
11360
      for key, val in self.op.beparams.iteritems():
11361
        result.append(("be/%s" % key, val))
11362

    
11363
    # OS change
11364
    if self.op.os_name:
11365
      instance.os = self.op.os_name
11366

    
11367
    # osparams changes
11368
    if self.op.osparams:
11369
      instance.osparams = self.os_inst
11370
      for key, val in self.op.osparams.iteritems():
11371
        result.append(("os/%s" % key, val))
11372

    
11373
    self.cfg.Update(instance, feedback_fn)
11374

    
11375
    return result
11376

    
11377
  _DISK_CONVERSIONS = {
11378
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11379
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11380
    }
11381

    
11382

    
11383
class LUInstanceChangeGroup(LogicalUnit):
11384
  HPATH = "instance-change-group"
11385
  HTYPE = constants.HTYPE_INSTANCE
11386
  REQ_BGL = False
11387

    
11388
  def ExpandNames(self):
11389
    self.share_locks = _ShareAll()
11390
    self.needed_locks = {
11391
      locking.LEVEL_NODEGROUP: [],
11392
      locking.LEVEL_NODE: [],
11393
      }
11394

    
11395
    self._ExpandAndLockInstance()
11396

    
11397
    if self.op.target_groups:
11398
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11399
                                  self.op.target_groups)
11400
    else:
11401
      self.req_target_uuids = None
11402

    
11403
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11404

    
11405
  def DeclareLocks(self, level):
11406
    if level == locking.LEVEL_NODEGROUP:
11407
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11408

    
11409
      if self.req_target_uuids:
11410
        lock_groups = set(self.req_target_uuids)
11411

    
11412
        # Lock all groups used by instance optimistically; this requires going
11413
        # via the node before it's locked, requiring verification later on
11414
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11415
        lock_groups.update(instance_groups)
11416
      else:
11417
        # No target groups, need to lock all of them
11418
        lock_groups = locking.ALL_SET
11419

    
11420
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11421

    
11422
    elif level == locking.LEVEL_NODE:
11423
      if self.req_target_uuids:
11424
        # Lock all nodes used by instances
11425
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11426
        self._LockInstancesNodes()
11427

    
11428
        # Lock all nodes in all potential target groups
11429
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11430
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11431
        member_nodes = [node_name
11432
                        for group in lock_groups
11433
                        for node_name in self.cfg.GetNodeGroup(group).members]
11434
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11435
      else:
11436
        # Lock all nodes as all groups are potential targets
11437
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11438

    
11439
  def CheckPrereq(self):
11440
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11441
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11442
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11443

    
11444
    assert (self.req_target_uuids is None or
11445
            owned_groups.issuperset(self.req_target_uuids))
11446
    assert owned_instances == set([self.op.instance_name])
11447

    
11448
    # Get instance information
11449
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11450

    
11451
    # Check if node groups for locked instance are still correct
11452
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11453
      ("Instance %s's nodes changed while we kept the lock" %
11454
       self.op.instance_name)
11455

    
11456
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11457
                                           owned_groups)
11458

    
11459
    if self.req_target_uuids:
11460
      # User requested specific target groups
11461
      self.target_uuids = self.req_target_uuids
11462
    else:
11463
      # All groups except those used by the instance are potential targets
11464
      self.target_uuids = owned_groups - inst_groups
11465

    
11466
    conflicting_groups = self.target_uuids & inst_groups
11467
    if conflicting_groups:
11468
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11469
                                 " used by the instance '%s'" %
11470
                                 (utils.CommaJoin(conflicting_groups),
11471
                                  self.op.instance_name),
11472
                                 errors.ECODE_INVAL)
11473

    
11474
    if not self.target_uuids:
11475
      raise errors.OpPrereqError("There are no possible target groups",
11476
                                 errors.ECODE_INVAL)
11477

    
11478
  def BuildHooksEnv(self):
11479
    """Build hooks env.
11480

11481
    """
11482
    assert self.target_uuids
11483

    
11484
    env = {
11485
      "TARGET_GROUPS": " ".join(self.target_uuids),
11486
      }
11487

    
11488
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11489

    
11490
    return env
11491

    
11492
  def BuildHooksNodes(self):
11493
    """Build hooks nodes.
11494

11495
    """
11496
    mn = self.cfg.GetMasterNode()
11497
    return ([mn], [mn])
11498

    
11499
  def Exec(self, feedback_fn):
11500
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11501

    
11502
    assert instances == [self.op.instance_name], "Instance not locked"
11503

    
11504
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11505
                     instances=instances, target_groups=list(self.target_uuids))
11506

    
11507
    ial.Run(self.op.iallocator)
11508

    
11509
    if not ial.success:
11510
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11511
                                 " instance '%s' using iallocator '%s': %s" %
11512
                                 (self.op.instance_name, self.op.iallocator,
11513
                                  ial.info),
11514
                                 errors.ECODE_NORES)
11515

    
11516
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11517

    
11518
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11519
                 " instance '%s'", len(jobs), self.op.instance_name)
11520

    
11521
    return ResultWithJobs(jobs)
11522

    
11523

    
11524
class LUBackupQuery(NoHooksLU):
11525
  """Query the exports list
11526

11527
  """
11528
  REQ_BGL = False
11529

    
11530
  def ExpandNames(self):
11531
    self.needed_locks = {}
11532
    self.share_locks[locking.LEVEL_NODE] = 1
11533
    if not self.op.nodes:
11534
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11535
    else:
11536
      self.needed_locks[locking.LEVEL_NODE] = \
11537
        _GetWantedNodes(self, self.op.nodes)
11538

    
11539
  def Exec(self, feedback_fn):
11540
    """Compute the list of all the exported system images.
11541

11542
    @rtype: dict
11543
    @return: a dictionary with the structure node->(export-list)
11544
        where export-list is a list of the instances exported on
11545
        that node.
11546

11547
    """
11548
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11549
    rpcresult = self.rpc.call_export_list(self.nodes)
11550
    result = {}
11551
    for node in rpcresult:
11552
      if rpcresult[node].fail_msg:
11553
        result[node] = False
11554
      else:
11555
        result[node] = rpcresult[node].payload
11556

    
11557
    return result
11558

    
11559

    
11560
class LUBackupPrepare(NoHooksLU):
11561
  """Prepares an instance for an export and returns useful information.
11562

11563
  """
11564
  REQ_BGL = False
11565

    
11566
  def ExpandNames(self):
11567
    self._ExpandAndLockInstance()
11568

    
11569
  def CheckPrereq(self):
11570
    """Check prerequisites.
11571

11572
    """
11573
    instance_name = self.op.instance_name
11574

    
11575
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11576
    assert self.instance is not None, \
11577
          "Cannot retrieve locked instance %s" % self.op.instance_name
11578
    _CheckNodeOnline(self, self.instance.primary_node)
11579

    
11580
    self._cds = _GetClusterDomainSecret()
11581

    
11582
  def Exec(self, feedback_fn):
11583
    """Prepares an instance for an export.
11584

11585
    """
11586
    instance = self.instance
11587

    
11588
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11589
      salt = utils.GenerateSecret(8)
11590

    
11591
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11592
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11593
                                              constants.RIE_CERT_VALIDITY)
11594
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11595

    
11596
      (name, cert_pem) = result.payload
11597

    
11598
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11599
                                             cert_pem)
11600

    
11601
      return {
11602
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11603
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11604
                          salt),
11605
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11606
        }
11607

    
11608
    return None
11609

    
11610

    
11611
class LUBackupExport(LogicalUnit):
11612
  """Export an instance to an image in the cluster.
11613

11614
  """
11615
  HPATH = "instance-export"
11616
  HTYPE = constants.HTYPE_INSTANCE
11617
  REQ_BGL = False
11618

    
11619
  def CheckArguments(self):
11620
    """Check the arguments.
11621

11622
    """
11623
    self.x509_key_name = self.op.x509_key_name
11624
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11625

    
11626
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11627
      if not self.x509_key_name:
11628
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11629
                                   errors.ECODE_INVAL)
11630

    
11631
      if not self.dest_x509_ca_pem:
11632
        raise errors.OpPrereqError("Missing destination X509 CA",
11633
                                   errors.ECODE_INVAL)
11634

    
11635
  def ExpandNames(self):
11636
    self._ExpandAndLockInstance()
11637

    
11638
    # Lock all nodes for local exports
11639
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11640
      # FIXME: lock only instance primary and destination node
11641
      #
11642
      # Sad but true, for now we have do lock all nodes, as we don't know where
11643
      # the previous export might be, and in this LU we search for it and
11644
      # remove it from its current node. In the future we could fix this by:
11645
      #  - making a tasklet to search (share-lock all), then create the
11646
      #    new one, then one to remove, after
11647
      #  - removing the removal operation altogether
11648
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11649

    
11650
  def DeclareLocks(self, level):
11651
    """Last minute lock declaration."""
11652
    # All nodes are locked anyway, so nothing to do here.
11653

    
11654
  def BuildHooksEnv(self):
11655
    """Build hooks env.
11656

11657
    This will run on the master, primary node and target node.
11658

11659
    """
11660
    env = {
11661
      "EXPORT_MODE": self.op.mode,
11662
      "EXPORT_NODE": self.op.target_node,
11663
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11664
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11665
      # TODO: Generic function for boolean env variables
11666
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11667
      }
11668

    
11669
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11670

    
11671
    return env
11672

    
11673
  def BuildHooksNodes(self):
11674
    """Build hooks nodes.
11675

11676
    """
11677
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11678

    
11679
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11680
      nl.append(self.op.target_node)
11681

    
11682
    return (nl, nl)
11683

    
11684
  def CheckPrereq(self):
11685
    """Check prerequisites.
11686

11687
    This checks that the instance and node names are valid.
11688

11689
    """
11690
    instance_name = self.op.instance_name
11691

    
11692
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11693
    assert self.instance is not None, \
11694
          "Cannot retrieve locked instance %s" % self.op.instance_name
11695
    _CheckNodeOnline(self, self.instance.primary_node)
11696

    
11697
    if (self.op.remove_instance and self.instance.admin_up and
11698
        not self.op.shutdown):
11699
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11700
                                 " down before")
11701

    
11702
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11703
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11704
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11705
      assert self.dst_node is not None
11706

    
11707
      _CheckNodeOnline(self, self.dst_node.name)
11708
      _CheckNodeNotDrained(self, self.dst_node.name)
11709

    
11710
      self._cds = None
11711
      self.dest_disk_info = None
11712
      self.dest_x509_ca = None
11713

    
11714
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11715
      self.dst_node = None
11716

    
11717
      if len(self.op.target_node) != len(self.instance.disks):
11718
        raise errors.OpPrereqError(("Received destination information for %s"
11719
                                    " disks, but instance %s has %s disks") %
11720
                                   (len(self.op.target_node), instance_name,
11721
                                    len(self.instance.disks)),
11722
                                   errors.ECODE_INVAL)
11723

    
11724
      cds = _GetClusterDomainSecret()
11725

    
11726
      # Check X509 key name
11727
      try:
11728
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11729
      except (TypeError, ValueError), err:
11730
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11731

    
11732
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11733
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11734
                                   errors.ECODE_INVAL)
11735

    
11736
      # Load and verify CA
11737
      try:
11738
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11739
      except OpenSSL.crypto.Error, err:
11740
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11741
                                   (err, ), errors.ECODE_INVAL)
11742

    
11743
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11744
      if errcode is not None:
11745
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11746
                                   (msg, ), errors.ECODE_INVAL)
11747

    
11748
      self.dest_x509_ca = cert
11749

    
11750
      # Verify target information
11751
      disk_info = []
11752
      for idx, disk_data in enumerate(self.op.target_node):
11753
        try:
11754
          (host, port, magic) = \
11755
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11756
        except errors.GenericError, err:
11757
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11758
                                     (idx, err), errors.ECODE_INVAL)
11759

    
11760
        disk_info.append((host, port, magic))
11761

    
11762
      assert len(disk_info) == len(self.op.target_node)
11763
      self.dest_disk_info = disk_info
11764

    
11765
    else:
11766
      raise errors.ProgrammerError("Unhandled export mode %r" %
11767
                                   self.op.mode)
11768

    
11769
    # instance disk type verification
11770
    # TODO: Implement export support for file-based disks
11771
    for disk in self.instance.disks:
11772
      if disk.dev_type == constants.LD_FILE:
11773
        raise errors.OpPrereqError("Export not supported for instances with"
11774
                                   " file-based disks", errors.ECODE_INVAL)
11775

    
11776
  def _CleanupExports(self, feedback_fn):
11777
    """Removes exports of current instance from all other nodes.
11778

11779
    If an instance in a cluster with nodes A..D was exported to node C, its
11780
    exports will be removed from the nodes A, B and D.
11781

11782
    """
11783
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11784

    
11785
    nodelist = self.cfg.GetNodeList()
11786
    nodelist.remove(self.dst_node.name)
11787

    
11788
    # on one-node clusters nodelist will be empty after the removal
11789
    # if we proceed the backup would be removed because OpBackupQuery
11790
    # substitutes an empty list with the full cluster node list.
11791
    iname = self.instance.name
11792
    if nodelist:
11793
      feedback_fn("Removing old exports for instance %s" % iname)
11794
      exportlist = self.rpc.call_export_list(nodelist)
11795
      for node in exportlist:
11796
        if exportlist[node].fail_msg:
11797
          continue
11798
        if iname in exportlist[node].payload:
11799
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11800
          if msg:
11801
            self.LogWarning("Could not remove older export for instance %s"
11802
                            " on node %s: %s", iname, node, msg)
11803

    
11804
  def Exec(self, feedback_fn):
11805
    """Export an instance to an image in the cluster.
11806

11807
    """
11808
    assert self.op.mode in constants.EXPORT_MODES
11809

    
11810
    instance = self.instance
11811
    src_node = instance.primary_node
11812

    
11813
    if self.op.shutdown:
11814
      # shutdown the instance, but not the disks
11815
      feedback_fn("Shutting down instance %s" % instance.name)
11816
      result = self.rpc.call_instance_shutdown(src_node, instance,
11817
                                               self.op.shutdown_timeout)
11818
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11819
      result.Raise("Could not shutdown instance %s on"
11820
                   " node %s" % (instance.name, src_node))
11821

    
11822
    # set the disks ID correctly since call_instance_start needs the
11823
    # correct drbd minor to create the symlinks
11824
    for disk in instance.disks:
11825
      self.cfg.SetDiskID(disk, src_node)
11826

    
11827
    activate_disks = (not instance.admin_up)
11828

    
11829
    if activate_disks:
11830
      # Activate the instance disks if we'exporting a stopped instance
11831
      feedback_fn("Activating disks for %s" % instance.name)
11832
      _StartInstanceDisks(self, instance, None)
11833

    
11834
    try:
11835
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11836
                                                     instance)
11837

    
11838
      helper.CreateSnapshots()
11839
      try:
11840
        if (self.op.shutdown and instance.admin_up and
11841
            not self.op.remove_instance):
11842
          assert not activate_disks
11843
          feedback_fn("Starting instance %s" % instance.name)
11844
          result = self.rpc.call_instance_start(src_node, instance,
11845
                                                None, None, False)
11846
          msg = result.fail_msg
11847
          if msg:
11848
            feedback_fn("Failed to start instance: %s" % msg)
11849
            _ShutdownInstanceDisks(self, instance)
11850
            raise errors.OpExecError("Could not start instance: %s" % msg)
11851

    
11852
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11853
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11854
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11855
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11856
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11857

    
11858
          (key_name, _, _) = self.x509_key_name
11859

    
11860
          dest_ca_pem = \
11861
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11862
                                            self.dest_x509_ca)
11863

    
11864
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11865
                                                     key_name, dest_ca_pem,
11866
                                                     timeouts)
11867
      finally:
11868
        helper.Cleanup()
11869

    
11870
      # Check for backwards compatibility
11871
      assert len(dresults) == len(instance.disks)
11872
      assert compat.all(isinstance(i, bool) for i in dresults), \
11873
             "Not all results are boolean: %r" % dresults
11874

    
11875
    finally:
11876
      if activate_disks:
11877
        feedback_fn("Deactivating disks for %s" % instance.name)
11878
        _ShutdownInstanceDisks(self, instance)
11879

    
11880
    if not (compat.all(dresults) and fin_resu):
11881
      failures = []
11882
      if not fin_resu:
11883
        failures.append("export finalization")
11884
      if not compat.all(dresults):
11885
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11886
                               if not dsk)
11887
        failures.append("disk export: disk(s) %s" % fdsk)
11888

    
11889
      raise errors.OpExecError("Export failed, errors in %s" %
11890
                               utils.CommaJoin(failures))
11891

    
11892
    # At this point, the export was successful, we can cleanup/finish
11893

    
11894
    # Remove instance if requested
11895
    if self.op.remove_instance:
11896
      feedback_fn("Removing instance %s" % instance.name)
11897
      _RemoveInstance(self, feedback_fn, instance,
11898
                      self.op.ignore_remove_failures)
11899

    
11900
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11901
      self._CleanupExports(feedback_fn)
11902

    
11903
    return fin_resu, dresults
11904

    
11905

    
11906
class LUBackupRemove(NoHooksLU):
11907
  """Remove exports related to the named instance.
11908

11909
  """
11910
  REQ_BGL = False
11911

    
11912
  def ExpandNames(self):
11913
    self.needed_locks = {}
11914
    # We need all nodes to be locked in order for RemoveExport to work, but we
11915
    # don't need to lock the instance itself, as nothing will happen to it (and
11916
    # we can remove exports also for a removed instance)
11917
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11918

    
11919
  def Exec(self, feedback_fn):
11920
    """Remove any export.
11921

11922
    """
11923
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11924
    # If the instance was not found we'll try with the name that was passed in.
11925
    # This will only work if it was an FQDN, though.
11926
    fqdn_warn = False
11927
    if not instance_name:
11928
      fqdn_warn = True
11929
      instance_name = self.op.instance_name
11930

    
11931
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11932
    exportlist = self.rpc.call_export_list(locked_nodes)
11933
    found = False
11934
    for node in exportlist:
11935
      msg = exportlist[node].fail_msg
11936
      if msg:
11937
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11938
        continue
11939
      if instance_name in exportlist[node].payload:
11940
        found = True
11941
        result = self.rpc.call_export_remove(node, instance_name)
11942
        msg = result.fail_msg
11943
        if msg:
11944
          logging.error("Could not remove export for instance %s"
11945
                        " on node %s: %s", instance_name, node, msg)
11946

    
11947
    if fqdn_warn and not found:
11948
      feedback_fn("Export not found. If trying to remove an export belonging"
11949
                  " to a deleted instance please use its Fully Qualified"
11950
                  " Domain Name.")
11951

    
11952

    
11953
class LUGroupAdd(LogicalUnit):
11954
  """Logical unit for creating node groups.
11955

11956
  """
11957
  HPATH = "group-add"
11958
  HTYPE = constants.HTYPE_GROUP
11959
  REQ_BGL = False
11960

    
11961
  def ExpandNames(self):
11962
    # We need the new group's UUID here so that we can create and acquire the
11963
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11964
    # that it should not check whether the UUID exists in the configuration.
11965
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11966
    self.needed_locks = {}
11967
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11968

    
11969
  def CheckPrereq(self):
11970
    """Check prerequisites.
11971

11972
    This checks that the given group name is not an existing node group
11973
    already.
11974

11975
    """
11976
    try:
11977
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11978
    except errors.OpPrereqError:
11979
      pass
11980
    else:
11981
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11982
                                 " node group (UUID: %s)" %
11983
                                 (self.op.group_name, existing_uuid),
11984
                                 errors.ECODE_EXISTS)
11985

    
11986
    if self.op.ndparams:
11987
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11988

    
11989
  def BuildHooksEnv(self):
11990
    """Build hooks env.
11991

11992
    """
11993
    return {
11994
      "GROUP_NAME": self.op.group_name,
11995
      }
11996

    
11997
  def BuildHooksNodes(self):
11998
    """Build hooks nodes.
11999

12000
    """
12001
    mn = self.cfg.GetMasterNode()
12002
    return ([mn], [mn])
12003

    
12004
  def Exec(self, feedback_fn):
12005
    """Add the node group to the cluster.
12006

12007
    """
12008
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12009
                                  uuid=self.group_uuid,
12010
                                  alloc_policy=self.op.alloc_policy,
12011
                                  ndparams=self.op.ndparams)
12012

    
12013
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12014
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12015

    
12016

    
12017
class LUGroupAssignNodes(NoHooksLU):
12018
  """Logical unit for assigning nodes to groups.
12019

12020
  """
12021
  REQ_BGL = False
12022

    
12023
  def ExpandNames(self):
12024
    # These raise errors.OpPrereqError on their own:
12025
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12026
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12027

    
12028
    # We want to lock all the affected nodes and groups. We have readily
12029
    # available the list of nodes, and the *destination* group. To gather the
12030
    # list of "source" groups, we need to fetch node information later on.
12031
    self.needed_locks = {
12032
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12033
      locking.LEVEL_NODE: self.op.nodes,
12034
      }
12035

    
12036
  def DeclareLocks(self, level):
12037
    if level == locking.LEVEL_NODEGROUP:
12038
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12039

    
12040
      # Try to get all affected nodes' groups without having the group or node
12041
      # lock yet. Needs verification later in the code flow.
12042
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12043

    
12044
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12045

    
12046
  def CheckPrereq(self):
12047
    """Check prerequisites.
12048

12049
    """
12050
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12051
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12052
            frozenset(self.op.nodes))
12053

    
12054
    expected_locks = (set([self.group_uuid]) |
12055
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12056
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12057
    if actual_locks != expected_locks:
12058
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12059
                               " current groups are '%s', used to be '%s'" %
12060
                               (utils.CommaJoin(expected_locks),
12061
                                utils.CommaJoin(actual_locks)))
12062

    
12063
    self.node_data = self.cfg.GetAllNodesInfo()
12064
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12065
    instance_data = self.cfg.GetAllInstancesInfo()
12066

    
12067
    if self.group is None:
12068
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12069
                               (self.op.group_name, self.group_uuid))
12070

    
12071
    (new_splits, previous_splits) = \
12072
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12073
                                             for node in self.op.nodes],
12074
                                            self.node_data, instance_data)
12075

    
12076
    if new_splits:
12077
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12078

    
12079
      if not self.op.force:
12080
        raise errors.OpExecError("The following instances get split by this"
12081
                                 " change and --force was not given: %s" %
12082
                                 fmt_new_splits)
12083
      else:
12084
        self.LogWarning("This operation will split the following instances: %s",
12085
                        fmt_new_splits)
12086

    
12087
        if previous_splits:
12088
          self.LogWarning("In addition, these already-split instances continue"
12089
                          " to be split across groups: %s",
12090
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12091

    
12092
  def Exec(self, feedback_fn):
12093
    """Assign nodes to a new group.
12094

12095
    """
12096
    mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12097

    
12098
    self.cfg.AssignGroupNodes(mods)
12099

    
12100
  @staticmethod
12101
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12102
    """Check for split instances after a node assignment.
12103

12104
    This method considers a series of node assignments as an atomic operation,
12105
    and returns information about split instances after applying the set of
12106
    changes.
12107

12108
    In particular, it returns information about newly split instances, and
12109
    instances that were already split, and remain so after the change.
12110

12111
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12112
    considered.
12113

12114
    @type changes: list of (node_name, new_group_uuid) pairs.
12115
    @param changes: list of node assignments to consider.
12116
    @param node_data: a dict with data for all nodes
12117
    @param instance_data: a dict with all instances to consider
12118
    @rtype: a two-tuple
12119
    @return: a list of instances that were previously okay and result split as a
12120
      consequence of this change, and a list of instances that were previously
12121
      split and this change does not fix.
12122

12123
    """
12124
    changed_nodes = dict((node, group) for node, group in changes
12125
                         if node_data[node].group != group)
12126

    
12127
    all_split_instances = set()
12128
    previously_split_instances = set()
12129

    
12130
    def InstanceNodes(instance):
12131
      return [instance.primary_node] + list(instance.secondary_nodes)
12132

    
12133
    for inst in instance_data.values():
12134
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12135
        continue
12136

    
12137
      instance_nodes = InstanceNodes(inst)
12138

    
12139
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12140
        previously_split_instances.add(inst.name)
12141

    
12142
      if len(set(changed_nodes.get(node, node_data[node].group)
12143
                 for node in instance_nodes)) > 1:
12144
        all_split_instances.add(inst.name)
12145

    
12146
    return (list(all_split_instances - previously_split_instances),
12147
            list(previously_split_instances & all_split_instances))
12148

    
12149

    
12150
class _GroupQuery(_QueryBase):
12151
  FIELDS = query.GROUP_FIELDS
12152

    
12153
  def ExpandNames(self, lu):
12154
    lu.needed_locks = {}
12155

    
12156
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12157
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12158

    
12159
    if not self.names:
12160
      self.wanted = [name_to_uuid[name]
12161
                     for name in utils.NiceSort(name_to_uuid.keys())]
12162
    else:
12163
      # Accept names to be either names or UUIDs.
12164
      missing = []
12165
      self.wanted = []
12166
      all_uuid = frozenset(self._all_groups.keys())
12167

    
12168
      for name in self.names:
12169
        if name in all_uuid:
12170
          self.wanted.append(name)
12171
        elif name in name_to_uuid:
12172
          self.wanted.append(name_to_uuid[name])
12173
        else:
12174
          missing.append(name)
12175

    
12176
      if missing:
12177
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12178
                                   utils.CommaJoin(missing),
12179
                                   errors.ECODE_NOENT)
12180

    
12181
  def DeclareLocks(self, lu, level):
12182
    pass
12183

    
12184
  def _GetQueryData(self, lu):
12185
    """Computes the list of node groups and their attributes.
12186

12187
    """
12188
    do_nodes = query.GQ_NODE in self.requested_data
12189
    do_instances = query.GQ_INST in self.requested_data
12190

    
12191
    group_to_nodes = None
12192
    group_to_instances = None
12193

    
12194
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12195
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12196
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12197
    # instance->node. Hence, we will need to process nodes even if we only need
12198
    # instance information.
12199
    if do_nodes or do_instances:
12200
      all_nodes = lu.cfg.GetAllNodesInfo()
12201
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12202
      node_to_group = {}
12203

    
12204
      for node in all_nodes.values():
12205
        if node.group in group_to_nodes:
12206
          group_to_nodes[node.group].append(node.name)
12207
          node_to_group[node.name] = node.group
12208

    
12209
      if do_instances:
12210
        all_instances = lu.cfg.GetAllInstancesInfo()
12211
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12212

    
12213
        for instance in all_instances.values():
12214
          node = instance.primary_node
12215
          if node in node_to_group:
12216
            group_to_instances[node_to_group[node]].append(instance.name)
12217

    
12218
        if not do_nodes:
12219
          # Do not pass on node information if it was not requested.
12220
          group_to_nodes = None
12221

    
12222
    return query.GroupQueryData([self._all_groups[uuid]
12223
                                 for uuid in self.wanted],
12224
                                group_to_nodes, group_to_instances)
12225

    
12226

    
12227
class LUGroupQuery(NoHooksLU):
12228
  """Logical unit for querying node groups.
12229

12230
  """
12231
  REQ_BGL = False
12232

    
12233
  def CheckArguments(self):
12234
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12235
                          self.op.output_fields, False)
12236

    
12237
  def ExpandNames(self):
12238
    self.gq.ExpandNames(self)
12239

    
12240
  def DeclareLocks(self, level):
12241
    self.gq.DeclareLocks(self, level)
12242

    
12243
  def Exec(self, feedback_fn):
12244
    return self.gq.OldStyleQuery(self)
12245

    
12246

    
12247
class LUGroupSetParams(LogicalUnit):
12248
  """Modifies the parameters of a node group.
12249

12250
  """
12251
  HPATH = "group-modify"
12252
  HTYPE = constants.HTYPE_GROUP
12253
  REQ_BGL = False
12254

    
12255
  def CheckArguments(self):
12256
    all_changes = [
12257
      self.op.ndparams,
12258
      self.op.alloc_policy,
12259
      ]
12260

    
12261
    if all_changes.count(None) == len(all_changes):
12262
      raise errors.OpPrereqError("Please pass at least one modification",
12263
                                 errors.ECODE_INVAL)
12264

    
12265
  def ExpandNames(self):
12266
    # This raises errors.OpPrereqError on its own:
12267
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12268

    
12269
    self.needed_locks = {
12270
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12271
      }
12272

    
12273
  def CheckPrereq(self):
12274
    """Check prerequisites.
12275

12276
    """
12277
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12278

    
12279
    if self.group is None:
12280
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12281
                               (self.op.group_name, self.group_uuid))
12282

    
12283
    if self.op.ndparams:
12284
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12285
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12286
      self.new_ndparams = new_ndparams
12287

    
12288
  def BuildHooksEnv(self):
12289
    """Build hooks env.
12290

12291
    """
12292
    return {
12293
      "GROUP_NAME": self.op.group_name,
12294
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12295
      }
12296

    
12297
  def BuildHooksNodes(self):
12298
    """Build hooks nodes.
12299

12300
    """
12301
    mn = self.cfg.GetMasterNode()
12302
    return ([mn], [mn])
12303

    
12304
  def Exec(self, feedback_fn):
12305
    """Modifies the node group.
12306

12307
    """
12308
    result = []
12309

    
12310
    if self.op.ndparams:
12311
      self.group.ndparams = self.new_ndparams
12312
      result.append(("ndparams", str(self.group.ndparams)))
12313

    
12314
    if self.op.alloc_policy:
12315
      self.group.alloc_policy = self.op.alloc_policy
12316

    
12317
    self.cfg.Update(self.group, feedback_fn)
12318
    return result
12319

    
12320

    
12321
class LUGroupRemove(LogicalUnit):
12322
  HPATH = "group-remove"
12323
  HTYPE = constants.HTYPE_GROUP
12324
  REQ_BGL = False
12325

    
12326
  def ExpandNames(self):
12327
    # This will raises errors.OpPrereqError on its own:
12328
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12329
    self.needed_locks = {
12330
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12331
      }
12332

    
12333
  def CheckPrereq(self):
12334
    """Check prerequisites.
12335

12336
    This checks that the given group name exists as a node group, that is
12337
    empty (i.e., contains no nodes), and that is not the last group of the
12338
    cluster.
12339

12340
    """
12341
    # Verify that the group is empty.
12342
    group_nodes = [node.name
12343
                   for node in self.cfg.GetAllNodesInfo().values()
12344
                   if node.group == self.group_uuid]
12345

    
12346
    if group_nodes:
12347
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12348
                                 " nodes: %s" %
12349
                                 (self.op.group_name,
12350
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12351
                                 errors.ECODE_STATE)
12352

    
12353
    # Verify the cluster would not be left group-less.
12354
    if len(self.cfg.GetNodeGroupList()) == 1:
12355
      raise errors.OpPrereqError("Group '%s' is the only group,"
12356
                                 " cannot be removed" %
12357
                                 self.op.group_name,
12358
                                 errors.ECODE_STATE)
12359

    
12360
  def BuildHooksEnv(self):
12361
    """Build hooks env.
12362

12363
    """
12364
    return {
12365
      "GROUP_NAME": self.op.group_name,
12366
      }
12367

    
12368
  def BuildHooksNodes(self):
12369
    """Build hooks nodes.
12370

12371
    """
12372
    mn = self.cfg.GetMasterNode()
12373
    return ([mn], [mn])
12374

    
12375
  def Exec(self, feedback_fn):
12376
    """Remove the node group.
12377

12378
    """
12379
    try:
12380
      self.cfg.RemoveNodeGroup(self.group_uuid)
12381
    except errors.ConfigurationError:
12382
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12383
                               (self.op.group_name, self.group_uuid))
12384

    
12385
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12386

    
12387

    
12388
class LUGroupRename(LogicalUnit):
12389
  HPATH = "group-rename"
12390
  HTYPE = constants.HTYPE_GROUP
12391
  REQ_BGL = False
12392

    
12393
  def ExpandNames(self):
12394
    # This raises errors.OpPrereqError on its own:
12395
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12396

    
12397
    self.needed_locks = {
12398
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12399
      }
12400

    
12401
  def CheckPrereq(self):
12402
    """Check prerequisites.
12403

12404
    Ensures requested new name is not yet used.
12405

12406
    """
12407
    try:
12408
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12409
    except errors.OpPrereqError:
12410
      pass
12411
    else:
12412
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12413
                                 " node group (UUID: %s)" %
12414
                                 (self.op.new_name, new_name_uuid),
12415
                                 errors.ECODE_EXISTS)
12416

    
12417
  def BuildHooksEnv(self):
12418
    """Build hooks env.
12419

12420
    """
12421
    return {
12422
      "OLD_NAME": self.op.group_name,
12423
      "NEW_NAME": self.op.new_name,
12424
      }
12425

    
12426
  def BuildHooksNodes(self):
12427
    """Build hooks nodes.
12428

12429
    """
12430
    mn = self.cfg.GetMasterNode()
12431

    
12432
    all_nodes = self.cfg.GetAllNodesInfo()
12433
    all_nodes.pop(mn, None)
12434

    
12435
    run_nodes = [mn]
12436
    run_nodes.extend(node.name for node in all_nodes.values()
12437
                     if node.group == self.group_uuid)
12438

    
12439
    return (run_nodes, run_nodes)
12440

    
12441
  def Exec(self, feedback_fn):
12442
    """Rename the node group.
12443

12444
    """
12445
    group = self.cfg.GetNodeGroup(self.group_uuid)
12446

    
12447
    if group is None:
12448
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12449
                               (self.op.group_name, self.group_uuid))
12450

    
12451
    group.name = self.op.new_name
12452
    self.cfg.Update(group, feedback_fn)
12453

    
12454
    return self.op.new_name
12455

    
12456

    
12457
class LUGroupEvacuate(LogicalUnit):
12458
  HPATH = "group-evacuate"
12459
  HTYPE = constants.HTYPE_GROUP
12460
  REQ_BGL = False
12461

    
12462
  def ExpandNames(self):
12463
    # This raises errors.OpPrereqError on its own:
12464
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12465

    
12466
    if self.op.target_groups:
12467
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12468
                                  self.op.target_groups)
12469
    else:
12470
      self.req_target_uuids = []
12471

    
12472
    if self.group_uuid in self.req_target_uuids:
12473
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12474
                                 " as a target group (targets are %s)" %
12475
                                 (self.group_uuid,
12476
                                  utils.CommaJoin(self.req_target_uuids)),
12477
                                 errors.ECODE_INVAL)
12478

    
12479
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12480

    
12481
    self.share_locks = _ShareAll()
12482
    self.needed_locks = {
12483
      locking.LEVEL_INSTANCE: [],
12484
      locking.LEVEL_NODEGROUP: [],
12485
      locking.LEVEL_NODE: [],
12486
      }
12487

    
12488
  def DeclareLocks(self, level):
12489
    if level == locking.LEVEL_INSTANCE:
12490
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12491

    
12492
      # Lock instances optimistically, needs verification once node and group
12493
      # locks have been acquired
12494
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12495
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12496

    
12497
    elif level == locking.LEVEL_NODEGROUP:
12498
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12499

    
12500
      if self.req_target_uuids:
12501
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12502

    
12503
        # Lock all groups used by instances optimistically; this requires going
12504
        # via the node before it's locked, requiring verification later on
12505
        lock_groups.update(group_uuid
12506
                           for instance_name in
12507
                             self.owned_locks(locking.LEVEL_INSTANCE)
12508
                           for group_uuid in
12509
                             self.cfg.GetInstanceNodeGroups(instance_name))
12510
      else:
12511
        # No target groups, need to lock all of them
12512
        lock_groups = locking.ALL_SET
12513

    
12514
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12515

    
12516
    elif level == locking.LEVEL_NODE:
12517
      # This will only lock the nodes in the group to be evacuated which
12518
      # contain actual instances
12519
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12520
      self._LockInstancesNodes()
12521

    
12522
      # Lock all nodes in group to be evacuated and target groups
12523
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12524
      assert self.group_uuid in owned_groups
12525
      member_nodes = [node_name
12526
                      for group in owned_groups
12527
                      for node_name in self.cfg.GetNodeGroup(group).members]
12528
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12529

    
12530
  def CheckPrereq(self):
12531
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12532
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12533
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12534

    
12535
    assert owned_groups.issuperset(self.req_target_uuids)
12536
    assert self.group_uuid in owned_groups
12537

    
12538
    # Check if locked instances are still correct
12539
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12540

    
12541
    # Get instance information
12542
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12543

    
12544
    # Check if node groups for locked instances are still correct
12545
    _CheckInstancesNodeGroups(self.cfg, self.instances,
12546
                              owned_groups, owned_nodes, self.group_uuid)
12547

    
12548
    if self.req_target_uuids:
12549
      # User requested specific target groups
12550
      self.target_uuids = self.req_target_uuids
12551
    else:
12552
      # All groups except the one to be evacuated are potential targets
12553
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12554
                           if group_uuid != self.group_uuid]
12555

    
12556
      if not self.target_uuids:
12557
        raise errors.OpPrereqError("There are no possible target groups",
12558
                                   errors.ECODE_INVAL)
12559

    
12560
  def BuildHooksEnv(self):
12561
    """Build hooks env.
12562

12563
    """
12564
    return {
12565
      "GROUP_NAME": self.op.group_name,
12566
      "TARGET_GROUPS": " ".join(self.target_uuids),
12567
      }
12568

    
12569
  def BuildHooksNodes(self):
12570
    """Build hooks nodes.
12571

12572
    """
12573
    mn = self.cfg.GetMasterNode()
12574

    
12575
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12576

    
12577
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12578

    
12579
    return (run_nodes, run_nodes)
12580

    
12581
  def Exec(self, feedback_fn):
12582
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12583

    
12584
    assert self.group_uuid not in self.target_uuids
12585

    
12586
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12587
                     instances=instances, target_groups=self.target_uuids)
12588

    
12589
    ial.Run(self.op.iallocator)
12590

    
12591
    if not ial.success:
12592
      raise errors.OpPrereqError("Can't compute group evacuation using"
12593
                                 " iallocator '%s': %s" %
12594
                                 (self.op.iallocator, ial.info),
12595
                                 errors.ECODE_NORES)
12596

    
12597
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12598

    
12599
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12600
                 len(jobs), self.op.group_name)
12601

    
12602
    return ResultWithJobs(jobs)
12603

    
12604

    
12605
class TagsLU(NoHooksLU): # pylint: disable=W0223
12606
  """Generic tags LU.
12607

12608
  This is an abstract class which is the parent of all the other tags LUs.
12609

12610
  """
12611
  def ExpandNames(self):
12612
    self.group_uuid = None
12613
    self.needed_locks = {}
12614
    if self.op.kind == constants.TAG_NODE:
12615
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12616
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12617
    elif self.op.kind == constants.TAG_INSTANCE:
12618
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12619
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12620
    elif self.op.kind == constants.TAG_NODEGROUP:
12621
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12622

    
12623
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12624
    # not possible to acquire the BGL based on opcode parameters)
12625

    
12626
  def CheckPrereq(self):
12627
    """Check prerequisites.
12628

12629
    """
12630
    if self.op.kind == constants.TAG_CLUSTER:
12631
      self.target = self.cfg.GetClusterInfo()
12632
    elif self.op.kind == constants.TAG_NODE:
12633
      self.target = self.cfg.GetNodeInfo(self.op.name)
12634
    elif self.op.kind == constants.TAG_INSTANCE:
12635
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12636
    elif self.op.kind == constants.TAG_NODEGROUP:
12637
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12638
    else:
12639
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12640
                                 str(self.op.kind), errors.ECODE_INVAL)
12641

    
12642

    
12643
class LUTagsGet(TagsLU):
12644
  """Returns the tags of a given object.
12645

12646
  """
12647
  REQ_BGL = False
12648

    
12649
  def ExpandNames(self):
12650
    TagsLU.ExpandNames(self)
12651

    
12652
    # Share locks as this is only a read operation
12653
    self.share_locks = _ShareAll()
12654

    
12655
  def Exec(self, feedback_fn):
12656
    """Returns the tag list.
12657

12658
    """
12659
    return list(self.target.GetTags())
12660

    
12661

    
12662
class LUTagsSearch(NoHooksLU):
12663
  """Searches the tags for a given pattern.
12664

12665
  """
12666
  REQ_BGL = False
12667

    
12668
  def ExpandNames(self):
12669
    self.needed_locks = {}
12670

    
12671
  def CheckPrereq(self):
12672
    """Check prerequisites.
12673

12674
    This checks the pattern passed for validity by compiling it.
12675

12676
    """
12677
    try:
12678
      self.re = re.compile(self.op.pattern)
12679
    except re.error, err:
12680
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12681
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12682

    
12683
  def Exec(self, feedback_fn):
12684
    """Returns the tag list.
12685

12686
    """
12687
    cfg = self.cfg
12688
    tgts = [("/cluster", cfg.GetClusterInfo())]
12689
    ilist = cfg.GetAllInstancesInfo().values()
12690
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12691
    nlist = cfg.GetAllNodesInfo().values()
12692
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12693
    tgts.extend(("/nodegroup/%s" % n.name, n)
12694
                for n in cfg.GetAllNodeGroupsInfo().values())
12695
    results = []
12696
    for path, target in tgts:
12697
      for tag in target.GetTags():
12698
        if self.re.search(tag):
12699
          results.append((path, tag))
12700
    return results
12701

    
12702

    
12703
class LUTagsSet(TagsLU):
12704
  """Sets a tag on a given object.
12705

12706
  """
12707
  REQ_BGL = False
12708

    
12709
  def CheckPrereq(self):
12710
    """Check prerequisites.
12711

12712
    This checks the type and length of the tag name and value.
12713

12714
    """
12715
    TagsLU.CheckPrereq(self)
12716
    for tag in self.op.tags:
12717
      objects.TaggableObject.ValidateTag(tag)
12718

    
12719
  def Exec(self, feedback_fn):
12720
    """Sets the tag.
12721

12722
    """
12723
    try:
12724
      for tag in self.op.tags:
12725
        self.target.AddTag(tag)
12726
    except errors.TagError, err:
12727
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12728
    self.cfg.Update(self.target, feedback_fn)
12729

    
12730

    
12731
class LUTagsDel(TagsLU):
12732
  """Delete a list of tags from a given object.
12733

12734
  """
12735
  REQ_BGL = False
12736

    
12737
  def CheckPrereq(self):
12738
    """Check prerequisites.
12739

12740
    This checks that we have the given tag.
12741

12742
    """
12743
    TagsLU.CheckPrereq(self)
12744
    for tag in self.op.tags:
12745
      objects.TaggableObject.ValidateTag(tag)
12746
    del_tags = frozenset(self.op.tags)
12747
    cur_tags = self.target.GetTags()
12748

    
12749
    diff_tags = del_tags - cur_tags
12750
    if diff_tags:
12751
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12752
      raise errors.OpPrereqError("Tag(s) %s not found" %
12753
                                 (utils.CommaJoin(diff_names), ),
12754
                                 errors.ECODE_NOENT)
12755

    
12756
  def Exec(self, feedback_fn):
12757
    """Remove the tag from the object.
12758

12759
    """
12760
    for tag in self.op.tags:
12761
      self.target.RemoveTag(tag)
12762
    self.cfg.Update(self.target, feedback_fn)
12763

    
12764

    
12765
class LUTestDelay(NoHooksLU):
12766
  """Sleep for a specified amount of time.
12767

12768
  This LU sleeps on the master and/or nodes for a specified amount of
12769
  time.
12770

12771
  """
12772
  REQ_BGL = False
12773

    
12774
  def ExpandNames(self):
12775
    """Expand names and set required locks.
12776

12777
    This expands the node list, if any.
12778

12779
    """
12780
    self.needed_locks = {}
12781
    if self.op.on_nodes:
12782
      # _GetWantedNodes can be used here, but is not always appropriate to use
12783
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12784
      # more information.
12785
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12786
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12787

    
12788
  def _TestDelay(self):
12789
    """Do the actual sleep.
12790

12791
    """
12792
    if self.op.on_master:
12793
      if not utils.TestDelay(self.op.duration):
12794
        raise errors.OpExecError("Error during master delay test")
12795
    if self.op.on_nodes:
12796
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12797
      for node, node_result in result.items():
12798
        node_result.Raise("Failure during rpc call to node %s" % node)
12799

    
12800
  def Exec(self, feedback_fn):
12801
    """Execute the test delay opcode, with the wanted repetitions.
12802

12803
    """
12804
    if self.op.repeat == 0:
12805
      self._TestDelay()
12806
    else:
12807
      top_value = self.op.repeat - 1
12808
      for i in range(self.op.repeat):
12809
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12810
        self._TestDelay()
12811

    
12812

    
12813
class LUTestJqueue(NoHooksLU):
12814
  """Utility LU to test some aspects of the job queue.
12815

12816
  """
12817
  REQ_BGL = False
12818

    
12819
  # Must be lower than default timeout for WaitForJobChange to see whether it
12820
  # notices changed jobs
12821
  _CLIENT_CONNECT_TIMEOUT = 20.0
12822
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12823

    
12824
  @classmethod
12825
  def _NotifyUsingSocket(cls, cb, errcls):
12826
    """Opens a Unix socket and waits for another program to connect.
12827

12828
    @type cb: callable
12829
    @param cb: Callback to send socket name to client
12830
    @type errcls: class
12831
    @param errcls: Exception class to use for errors
12832

12833
    """
12834
    # Using a temporary directory as there's no easy way to create temporary
12835
    # sockets without writing a custom loop around tempfile.mktemp and
12836
    # socket.bind
12837
    tmpdir = tempfile.mkdtemp()
12838
    try:
12839
      tmpsock = utils.PathJoin(tmpdir, "sock")
12840

    
12841
      logging.debug("Creating temporary socket at %s", tmpsock)
12842
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12843
      try:
12844
        sock.bind(tmpsock)
12845
        sock.listen(1)
12846

    
12847
        # Send details to client
12848
        cb(tmpsock)
12849

    
12850
        # Wait for client to connect before continuing
12851
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12852
        try:
12853
          (conn, _) = sock.accept()
12854
        except socket.error, err:
12855
          raise errcls("Client didn't connect in time (%s)" % err)
12856
      finally:
12857
        sock.close()
12858
    finally:
12859
      # Remove as soon as client is connected
12860
      shutil.rmtree(tmpdir)
12861

    
12862
    # Wait for client to close
12863
    try:
12864
      try:
12865
        # pylint: disable=E1101
12866
        # Instance of '_socketobject' has no ... member
12867
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12868
        conn.recv(1)
12869
      except socket.error, err:
12870
        raise errcls("Client failed to confirm notification (%s)" % err)
12871
    finally:
12872
      conn.close()
12873

    
12874
  def _SendNotification(self, test, arg, sockname):
12875
    """Sends a notification to the client.
12876

12877
    @type test: string
12878
    @param test: Test name
12879
    @param arg: Test argument (depends on test)
12880
    @type sockname: string
12881
    @param sockname: Socket path
12882

12883
    """
12884
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12885

    
12886
  def _Notify(self, prereq, test, arg):
12887
    """Notifies the client of a test.
12888

12889
    @type prereq: bool
12890
    @param prereq: Whether this is a prereq-phase test
12891
    @type test: string
12892
    @param test: Test name
12893
    @param arg: Test argument (depends on test)
12894

12895
    """
12896
    if prereq:
12897
      errcls = errors.OpPrereqError
12898
    else:
12899
      errcls = errors.OpExecError
12900

    
12901
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12902
                                                  test, arg),
12903
                                   errcls)
12904

    
12905
  def CheckArguments(self):
12906
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12907
    self.expandnames_calls = 0
12908

    
12909
  def ExpandNames(self):
12910
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12911
    if checkargs_calls < 1:
12912
      raise errors.ProgrammerError("CheckArguments was not called")
12913

    
12914
    self.expandnames_calls += 1
12915

    
12916
    if self.op.notify_waitlock:
12917
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12918

    
12919
    self.LogInfo("Expanding names")
12920

    
12921
    # Get lock on master node (just to get a lock, not for a particular reason)
12922
    self.needed_locks = {
12923
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12924
      }
12925

    
12926
  def Exec(self, feedback_fn):
12927
    if self.expandnames_calls < 1:
12928
      raise errors.ProgrammerError("ExpandNames was not called")
12929

    
12930
    if self.op.notify_exec:
12931
      self._Notify(False, constants.JQT_EXEC, None)
12932

    
12933
    self.LogInfo("Executing")
12934

    
12935
    if self.op.log_messages:
12936
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12937
      for idx, msg in enumerate(self.op.log_messages):
12938
        self.LogInfo("Sending log message %s", idx + 1)
12939
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12940
        # Report how many test messages have been sent
12941
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12942

    
12943
    if self.op.fail:
12944
      raise errors.OpExecError("Opcode failure was requested")
12945

    
12946
    return True
12947

    
12948

    
12949
class IAllocator(object):
12950
  """IAllocator framework.
12951

12952
  An IAllocator instance has three sets of attributes:
12953
    - cfg that is needed to query the cluster
12954
    - input data (all members of the _KEYS class attribute are required)
12955
    - four buffer attributes (in|out_data|text), that represent the
12956
      input (to the external script) in text and data structure format,
12957
      and the output from it, again in two formats
12958
    - the result variables from the script (success, info, nodes) for
12959
      easy usage
12960

12961
  """
12962
  # pylint: disable=R0902
12963
  # lots of instance attributes
12964

    
12965
  def __init__(self, cfg, rpc, mode, **kwargs):
12966
    self.cfg = cfg
12967
    self.rpc = rpc
12968
    # init buffer variables
12969
    self.in_text = self.out_text = self.in_data = self.out_data = None
12970
    # init all input fields so that pylint is happy
12971
    self.mode = mode
12972
    self.memory = self.disks = self.disk_template = None
12973
    self.os = self.tags = self.nics = self.vcpus = None
12974
    self.hypervisor = None
12975
    self.relocate_from = None
12976
    self.name = None
12977
    self.instances = None
12978
    self.evac_mode = None
12979
    self.target_groups = []
12980
    # computed fields
12981
    self.required_nodes = None
12982
    # init result fields
12983
    self.success = self.info = self.result = None
12984

    
12985
    try:
12986
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12987
    except KeyError:
12988
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12989
                                   " IAllocator" % self.mode)
12990

    
12991
    keyset = [n for (n, _) in keydata]
12992

    
12993
    for key in kwargs:
12994
      if key not in keyset:
12995
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12996
                                     " IAllocator" % key)
12997
      setattr(self, key, kwargs[key])
12998

    
12999
    for key in keyset:
13000
      if key not in kwargs:
13001
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13002
                                     " IAllocator" % key)
13003
    self._BuildInputData(compat.partial(fn, self), keydata)
13004

    
13005
  def _ComputeClusterData(self):
13006
    """Compute the generic allocator input data.
13007

13008
    This is the data that is independent of the actual operation.
13009

13010
    """
13011
    cfg = self.cfg
13012
    cluster_info = cfg.GetClusterInfo()
13013
    # cluster data
13014
    data = {
13015
      "version": constants.IALLOCATOR_VERSION,
13016
      "cluster_name": cfg.GetClusterName(),
13017
      "cluster_tags": list(cluster_info.GetTags()),
13018
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13019
      # we don't have job IDs
13020
      }
13021
    ninfo = cfg.GetAllNodesInfo()
13022
    iinfo = cfg.GetAllInstancesInfo().values()
13023
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13024

    
13025
    # node data
13026
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13027

    
13028
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13029
      hypervisor_name = self.hypervisor
13030
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13031
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13032
    else:
13033
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13034

    
13035
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13036
                                        hypervisor_name)
13037
    node_iinfo = \
13038
      self.rpc.call_all_instances_info(node_list,
13039
                                       cluster_info.enabled_hypervisors)
13040

    
13041
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13042

    
13043
    config_ndata = self._ComputeBasicNodeData(ninfo)
13044
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13045
                                                 i_list, config_ndata)
13046
    assert len(data["nodes"]) == len(ninfo), \
13047
        "Incomplete node data computed"
13048

    
13049
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13050

    
13051
    self.in_data = data
13052

    
13053
  @staticmethod
13054
  def _ComputeNodeGroupData(cfg):
13055
    """Compute node groups data.
13056

13057
    """
13058
    ng = dict((guuid, {
13059
      "name": gdata.name,
13060
      "alloc_policy": gdata.alloc_policy,
13061
      })
13062
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13063

    
13064
    return ng
13065

    
13066
  @staticmethod
13067
  def _ComputeBasicNodeData(node_cfg):
13068
    """Compute global node data.
13069

13070
    @rtype: dict
13071
    @returns: a dict of name: (node dict, node config)
13072

13073
    """
13074
    # fill in static (config-based) values
13075
    node_results = dict((ninfo.name, {
13076
      "tags": list(ninfo.GetTags()),
13077
      "primary_ip": ninfo.primary_ip,
13078
      "secondary_ip": ninfo.secondary_ip,
13079
      "offline": ninfo.offline,
13080
      "drained": ninfo.drained,
13081
      "master_candidate": ninfo.master_candidate,
13082
      "group": ninfo.group,
13083
      "master_capable": ninfo.master_capable,
13084
      "vm_capable": ninfo.vm_capable,
13085
      })
13086
      for ninfo in node_cfg.values())
13087

    
13088
    return node_results
13089

    
13090
  @staticmethod
13091
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13092
                              node_results):
13093
    """Compute global node data.
13094

13095
    @param node_results: the basic node structures as filled from the config
13096

13097
    """
13098
    # make a copy of the current dict
13099
    node_results = dict(node_results)
13100
    for nname, nresult in node_data.items():
13101
      assert nname in node_results, "Missing basic data for node %s" % nname
13102
      ninfo = node_cfg[nname]
13103

    
13104
      if not (ninfo.offline or ninfo.drained):
13105
        nresult.Raise("Can't get data for node %s" % nname)
13106
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13107
                                nname)
13108
        remote_info = nresult.payload
13109

    
13110
        for attr in ["memory_total", "memory_free", "memory_dom0",
13111
                     "vg_size", "vg_free", "cpu_total"]:
13112
          if attr not in remote_info:
13113
            raise errors.OpExecError("Node '%s' didn't return attribute"
13114
                                     " '%s'" % (nname, attr))
13115
          if not isinstance(remote_info[attr], int):
13116
            raise errors.OpExecError("Node '%s' returned invalid value"
13117
                                     " for '%s': %s" %
13118
                                     (nname, attr, remote_info[attr]))
13119
        # compute memory used by primary instances
13120
        i_p_mem = i_p_up_mem = 0
13121
        for iinfo, beinfo in i_list:
13122
          if iinfo.primary_node == nname:
13123
            i_p_mem += beinfo[constants.BE_MEMORY]
13124
            if iinfo.name not in node_iinfo[nname].payload:
13125
              i_used_mem = 0
13126
            else:
13127
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13128
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13129
            remote_info["memory_free"] -= max(0, i_mem_diff)
13130

    
13131
            if iinfo.admin_up:
13132
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13133

    
13134
        # compute memory used by instances
13135
        pnr_dyn = {
13136
          "total_memory": remote_info["memory_total"],
13137
          "reserved_memory": remote_info["memory_dom0"],
13138
          "free_memory": remote_info["memory_free"],
13139
          "total_disk": remote_info["vg_size"],
13140
          "free_disk": remote_info["vg_free"],
13141
          "total_cpus": remote_info["cpu_total"],
13142
          "i_pri_memory": i_p_mem,
13143
          "i_pri_up_memory": i_p_up_mem,
13144
          }
13145
        pnr_dyn.update(node_results[nname])
13146
        node_results[nname] = pnr_dyn
13147

    
13148
    return node_results
13149

    
13150
  @staticmethod
13151
  def _ComputeInstanceData(cluster_info, i_list):
13152
    """Compute global instance data.
13153

13154
    """
13155
    instance_data = {}
13156
    for iinfo, beinfo in i_list:
13157
      nic_data = []
13158
      for nic in iinfo.nics:
13159
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13160
        nic_dict = {
13161
          "mac": nic.mac,
13162
          "ip": nic.ip,
13163
          "mode": filled_params[constants.NIC_MODE],
13164
          "link": filled_params[constants.NIC_LINK],
13165
          }
13166
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13167
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13168
        nic_data.append(nic_dict)
13169
      pir = {
13170
        "tags": list(iinfo.GetTags()),
13171
        "admin_up": iinfo.admin_up,
13172
        "vcpus": beinfo[constants.BE_VCPUS],
13173
        "memory": beinfo[constants.BE_MEMORY],
13174
        "os": iinfo.os,
13175
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13176
        "nics": nic_data,
13177
        "disks": [{constants.IDISK_SIZE: dsk.size,
13178
                   constants.IDISK_MODE: dsk.mode}
13179
                  for dsk in iinfo.disks],
13180
        "disk_template": iinfo.disk_template,
13181
        "hypervisor": iinfo.hypervisor,
13182
        }
13183
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13184
                                                 pir["disks"])
13185
      instance_data[iinfo.name] = pir
13186

    
13187
    return instance_data
13188

    
13189
  def _AddNewInstance(self):
13190
    """Add new instance data to allocator structure.
13191

13192
    This in combination with _AllocatorGetClusterData will create the
13193
    correct structure needed as input for the allocator.
13194

13195
    The checks for the completeness of the opcode must have already been
13196
    done.
13197

13198
    """
13199
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13200

    
13201
    if self.disk_template in constants.DTS_INT_MIRROR:
13202
      self.required_nodes = 2
13203
    else:
13204
      self.required_nodes = 1
13205

    
13206
    request = {
13207
      "name": self.name,
13208
      "disk_template": self.disk_template,
13209
      "tags": self.tags,
13210
      "os": self.os,
13211
      "vcpus": self.vcpus,
13212
      "memory": self.memory,
13213
      "disks": self.disks,
13214
      "disk_space_total": disk_space,
13215
      "nics": self.nics,
13216
      "required_nodes": self.required_nodes,
13217
      "hypervisor": self.hypervisor,
13218
      }
13219

    
13220
    return request
13221

    
13222
  def _AddRelocateInstance(self):
13223
    """Add relocate instance data to allocator structure.
13224

13225
    This in combination with _IAllocatorGetClusterData will create the
13226
    correct structure needed as input for the allocator.
13227

13228
    The checks for the completeness of the opcode must have already been
13229
    done.
13230

13231
    """
13232
    instance = self.cfg.GetInstanceInfo(self.name)
13233
    if instance is None:
13234
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13235
                                   " IAllocator" % self.name)
13236

    
13237
    if instance.disk_template not in constants.DTS_MIRRORED:
13238
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13239
                                 errors.ECODE_INVAL)
13240

    
13241
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13242
        len(instance.secondary_nodes) != 1:
13243
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13244
                                 errors.ECODE_STATE)
13245

    
13246
    self.required_nodes = 1
13247
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13248
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13249

    
13250
    request = {
13251
      "name": self.name,
13252
      "disk_space_total": disk_space,
13253
      "required_nodes": self.required_nodes,
13254
      "relocate_from": self.relocate_from,
13255
      }
13256
    return request
13257

    
13258
  def _AddNodeEvacuate(self):
13259
    """Get data for node-evacuate requests.
13260

13261
    """
13262
    return {
13263
      "instances": self.instances,
13264
      "evac_mode": self.evac_mode,
13265
      }
13266

    
13267
  def _AddChangeGroup(self):
13268
    """Get data for node-evacuate requests.
13269

13270
    """
13271
    return {
13272
      "instances": self.instances,
13273
      "target_groups": self.target_groups,
13274
      }
13275

    
13276
  def _BuildInputData(self, fn, keydata):
13277
    """Build input data structures.
13278

13279
    """
13280
    self._ComputeClusterData()
13281

    
13282
    request = fn()
13283
    request["type"] = self.mode
13284
    for keyname, keytype in keydata:
13285
      if keyname not in request:
13286
        raise errors.ProgrammerError("Request parameter %s is missing" %
13287
                                     keyname)
13288
      val = request[keyname]
13289
      if not keytype(val):
13290
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13291
                                     " validation, value %s, expected"
13292
                                     " type %s" % (keyname, val, keytype))
13293
    self.in_data["request"] = request
13294

    
13295
    self.in_text = serializer.Dump(self.in_data)
13296

    
13297
  _STRING_LIST = ht.TListOf(ht.TString)
13298
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13299
     # pylint: disable=E1101
13300
     # Class '...' has no 'OP_ID' member
13301
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13302
                          opcodes.OpInstanceMigrate.OP_ID,
13303
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13304
     })))
13305

    
13306
  _NEVAC_MOVED = \
13307
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13308
                       ht.TItems([ht.TNonEmptyString,
13309
                                  ht.TNonEmptyString,
13310
                                  ht.TListOf(ht.TNonEmptyString),
13311
                                 ])))
13312
  _NEVAC_FAILED = \
13313
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13314
                       ht.TItems([ht.TNonEmptyString,
13315
                                  ht.TMaybeString,
13316
                                 ])))
13317
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13318
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13319

    
13320
  _MODE_DATA = {
13321
    constants.IALLOCATOR_MODE_ALLOC:
13322
      (_AddNewInstance,
13323
       [
13324
        ("name", ht.TString),
13325
        ("memory", ht.TInt),
13326
        ("disks", ht.TListOf(ht.TDict)),
13327
        ("disk_template", ht.TString),
13328
        ("os", ht.TString),
13329
        ("tags", _STRING_LIST),
13330
        ("nics", ht.TListOf(ht.TDict)),
13331
        ("vcpus", ht.TInt),
13332
        ("hypervisor", ht.TString),
13333
        ], ht.TList),
13334
    constants.IALLOCATOR_MODE_RELOC:
13335
      (_AddRelocateInstance,
13336
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13337
       ht.TList),
13338
     constants.IALLOCATOR_MODE_NODE_EVAC:
13339
      (_AddNodeEvacuate, [
13340
        ("instances", _STRING_LIST),
13341
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13342
        ], _NEVAC_RESULT),
13343
     constants.IALLOCATOR_MODE_CHG_GROUP:
13344
      (_AddChangeGroup, [
13345
        ("instances", _STRING_LIST),
13346
        ("target_groups", _STRING_LIST),
13347
        ], _NEVAC_RESULT),
13348
    }
13349

    
13350
  def Run(self, name, validate=True, call_fn=None):
13351
    """Run an instance allocator and return the results.
13352

13353
    """
13354
    if call_fn is None:
13355
      call_fn = self.rpc.call_iallocator_runner
13356

    
13357
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13358
    result.Raise("Failure while running the iallocator script")
13359

    
13360
    self.out_text = result.payload
13361
    if validate:
13362
      self._ValidateResult()
13363

    
13364
  def _ValidateResult(self):
13365
    """Process the allocator results.
13366

13367
    This will process and if successful save the result in
13368
    self.out_data and the other parameters.
13369

13370
    """
13371
    try:
13372
      rdict = serializer.Load(self.out_text)
13373
    except Exception, err:
13374
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13375

    
13376
    if not isinstance(rdict, dict):
13377
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13378

    
13379
    # TODO: remove backwards compatiblity in later versions
13380
    if "nodes" in rdict and "result" not in rdict:
13381
      rdict["result"] = rdict["nodes"]
13382
      del rdict["nodes"]
13383

    
13384
    for key in "success", "info", "result":
13385
      if key not in rdict:
13386
        raise errors.OpExecError("Can't parse iallocator results:"
13387
                                 " missing key '%s'" % key)
13388
      setattr(self, key, rdict[key])
13389

    
13390
    if not self._result_check(self.result):
13391
      raise errors.OpExecError("Iallocator returned invalid result,"
13392
                               " expected %s, got %s" %
13393
                               (self._result_check, self.result),
13394
                               errors.ECODE_INVAL)
13395

    
13396
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13397
      assert self.relocate_from is not None
13398
      assert self.required_nodes == 1
13399

    
13400
      node2group = dict((name, ndata["group"])
13401
                        for (name, ndata) in self.in_data["nodes"].items())
13402

    
13403
      fn = compat.partial(self._NodesToGroups, node2group,
13404
                          self.in_data["nodegroups"])
13405

    
13406
      instance = self.cfg.GetInstanceInfo(self.name)
13407
      request_groups = fn(self.relocate_from + [instance.primary_node])
13408
      result_groups = fn(rdict["result"] + [instance.primary_node])
13409

    
13410
      if self.success and not set(result_groups).issubset(request_groups):
13411
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13412
                                 " differ from original groups (%s)" %
13413
                                 (utils.CommaJoin(result_groups),
13414
                                  utils.CommaJoin(request_groups)))
13415

    
13416
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13417
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13418

    
13419
    self.out_data = rdict
13420

    
13421
  @staticmethod
13422
  def _NodesToGroups(node2group, groups, nodes):
13423
    """Returns a list of unique group names for a list of nodes.
13424

13425
    @type node2group: dict
13426
    @param node2group: Map from node name to group UUID
13427
    @type groups: dict
13428
    @param groups: Group information
13429
    @type nodes: list
13430
    @param nodes: Node names
13431

13432
    """
13433
    result = set()
13434

    
13435
    for node in nodes:
13436
      try:
13437
        group_uuid = node2group[node]
13438
      except KeyError:
13439
        # Ignore unknown node
13440
        pass
13441
      else:
13442
        try:
13443
          group = groups[group_uuid]
13444
        except KeyError:
13445
          # Can't find group, let's use UUID
13446
          group_name = group_uuid
13447
        else:
13448
          group_name = group["name"]
13449

    
13450
        result.add(group_name)
13451

    
13452
    return sorted(result)
13453

    
13454

    
13455
class LUTestAllocator(NoHooksLU):
13456
  """Run allocator tests.
13457

13458
  This LU runs the allocator tests
13459

13460
  """
13461
  def CheckPrereq(self):
13462
    """Check prerequisites.
13463

13464
    This checks the opcode parameters depending on the director and mode test.
13465

13466
    """
13467
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13468
      for attr in ["memory", "disks", "disk_template",
13469
                   "os", "tags", "nics", "vcpus"]:
13470
        if not hasattr(self.op, attr):
13471
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13472
                                     attr, errors.ECODE_INVAL)
13473
      iname = self.cfg.ExpandInstanceName(self.op.name)
13474
      if iname is not None:
13475
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13476
                                   iname, errors.ECODE_EXISTS)
13477
      if not isinstance(self.op.nics, list):
13478
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13479
                                   errors.ECODE_INVAL)
13480
      if not isinstance(self.op.disks, list):
13481
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13482
                                   errors.ECODE_INVAL)
13483
      for row in self.op.disks:
13484
        if (not isinstance(row, dict) or
13485
            constants.IDISK_SIZE not in row or
13486
            not isinstance(row[constants.IDISK_SIZE], int) or
13487
            constants.IDISK_MODE not in row or
13488
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13489
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13490
                                     " parameter", errors.ECODE_INVAL)
13491
      if self.op.hypervisor is None:
13492
        self.op.hypervisor = self.cfg.GetHypervisorType()
13493
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13494
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13495
      self.op.name = fname
13496
      self.relocate_from = \
13497
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13498
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13499
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13500
      if not self.op.instances:
13501
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13502
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13503
    else:
13504
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13505
                                 self.op.mode, errors.ECODE_INVAL)
13506

    
13507
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13508
      if self.op.allocator is None:
13509
        raise errors.OpPrereqError("Missing allocator name",
13510
                                   errors.ECODE_INVAL)
13511
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13512
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13513
                                 self.op.direction, errors.ECODE_INVAL)
13514

    
13515
  def Exec(self, feedback_fn):
13516
    """Run the allocator test.
13517

13518
    """
13519
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13520
      ial = IAllocator(self.cfg, self.rpc,
13521
                       mode=self.op.mode,
13522
                       name=self.op.name,
13523
                       memory=self.op.memory,
13524
                       disks=self.op.disks,
13525
                       disk_template=self.op.disk_template,
13526
                       os=self.op.os,
13527
                       tags=self.op.tags,
13528
                       nics=self.op.nics,
13529
                       vcpus=self.op.vcpus,
13530
                       hypervisor=self.op.hypervisor,
13531
                       )
13532
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13533
      ial = IAllocator(self.cfg, self.rpc,
13534
                       mode=self.op.mode,
13535
                       name=self.op.name,
13536
                       relocate_from=list(self.relocate_from),
13537
                       )
13538
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13539
      ial = IAllocator(self.cfg, self.rpc,
13540
                       mode=self.op.mode,
13541
                       instances=self.op.instances,
13542
                       target_groups=self.op.target_groups)
13543
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13544
      ial = IAllocator(self.cfg, self.rpc,
13545
                       mode=self.op.mode,
13546
                       instances=self.op.instances,
13547
                       evac_mode=self.op.evac_mode)
13548
    else:
13549
      raise errors.ProgrammerError("Uncatched mode %s in"
13550
                                   " LUTestAllocator.Exec", self.op.mode)
13551

    
13552
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13553
      result = ial.in_text
13554
    else:
13555
      ial.Run(self.op.allocator, validate=False)
13556
      result = ial.out_text
13557
    return result
13558

    
13559

    
13560
#: Query type implementations
13561
_QUERY_IMPL = {
13562
  constants.QR_INSTANCE: _InstanceQuery,
13563
  constants.QR_NODE: _NodeQuery,
13564
  constants.QR_GROUP: _GroupQuery,
13565
  constants.QR_OS: _OsQuery,
13566
  }
13567

    
13568
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13569

    
13570

    
13571
def _GetQueryImplementation(name):
13572
  """Returns the implemtnation for a query type.
13573

13574
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13575

13576
  """
13577
  try:
13578
    return _QUERY_IMPL[name]
13579
  except KeyError:
13580
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13581
                               errors.ECODE_INVAL)