Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ e2ea8de1

History | View | Annotate | Download (479.5 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import logging
36
import copy
37
import OpenSSL
38
import socket
39
import tempfile
40
import shutil
41
import itertools
42
import operator
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import query
58
from ganeti import qlang
59
from ganeti import opcodes
60
from ganeti import ht
61
from ganeti import runtime
62

    
63
import ganeti.masterd.instance # pylint: disable=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    # readability alias
123
    self.owned_locks = context.glm.list_owned
124
    self.context = context
125
    self.rpc = rpc
126
    # Dicts used to declare locking needs to mcpu
127
    self.needed_locks = None
128
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
129
    self.add_locks = {}
130
    self.remove_locks = {}
131
    # Used to force good behavior when calling helper functions
132
    self.recalculate_locks = {}
133
    # logging
134
    self.Log = processor.Log # pylint: disable=C0103
135
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
136
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
137
    self.LogStep = processor.LogStep # pylint: disable=C0103
138
    # support for dry-run
139
    self.dry_run_result = None
140
    # support for generic debug attribute
141
    if (not hasattr(self.op, "debug_level") or
142
        not isinstance(self.op.debug_level, int)):
143
      self.op.debug_level = 0
144

    
145
    # Tasklets
146
    self.tasklets = None
147

    
148
    # Validate opcode parameters and set defaults
149
    self.op.Validate(True)
150

    
151
    self.CheckArguments()
152

    
153
  def CheckArguments(self):
154
    """Check syntactic validity for the opcode arguments.
155

156
    This method is for doing a simple syntactic check and ensure
157
    validity of opcode parameters, without any cluster-related
158
    checks. While the same can be accomplished in ExpandNames and/or
159
    CheckPrereq, doing these separate is better because:
160

161
      - ExpandNames is left as as purely a lock-related function
162
      - CheckPrereq is run after we have acquired locks (and possible
163
        waited for them)
164

165
    The function is allowed to change the self.op attribute so that
166
    later methods can no longer worry about missing parameters.
167

168
    """
169
    pass
170

    
171
  def ExpandNames(self):
172
    """Expand names for this LU.
173

174
    This method is called before starting to execute the opcode, and it should
175
    update all the parameters of the opcode to their canonical form (e.g. a
176
    short node name must be fully expanded after this method has successfully
177
    completed). This way locking, hooks, logging, etc. can work correctly.
178

179
    LUs which implement this method must also populate the self.needed_locks
180
    member, as a dict with lock levels as keys, and a list of needed lock names
181
    as values. Rules:
182

183
      - use an empty dict if you don't need any lock
184
      - if you don't need any lock at a particular level omit that level
185
      - don't put anything for the BGL level
186
      - if you want all locks at a level use locking.ALL_SET as a value
187

188
    If you need to share locks (rather than acquire them exclusively) at one
189
    level you can modify self.share_locks, setting a true value (usually 1) for
190
    that level. By default locks are not shared.
191

192
    This function can also define a list of tasklets, which then will be
193
    executed in order instead of the usual LU-level CheckPrereq and Exec
194
    functions, if those are not defined by the LU.
195

196
    Examples::
197

198
      # Acquire all nodes and one instance
199
      self.needed_locks = {
200
        locking.LEVEL_NODE: locking.ALL_SET,
201
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
202
      }
203
      # Acquire just two nodes
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206
      }
207
      # Acquire no locks
208
      self.needed_locks = {} # No, you can't leave it to the default value None
209

210
    """
211
    # The implementation of this method is mandatory only if the new LU is
212
    # concurrent, so that old LUs don't need to be changed all at the same
213
    # time.
214
    if self.REQ_BGL:
215
      self.needed_locks = {} # Exclusive LUs don't need locks.
216
    else:
217
      raise NotImplementedError
218

    
219
  def DeclareLocks(self, level):
220
    """Declare LU locking needs for a level
221

222
    While most LUs can just declare their locking needs at ExpandNames time,
223
    sometimes there's the need to calculate some locks after having acquired
224
    the ones before. This function is called just before acquiring locks at a
225
    particular level, but after acquiring the ones at lower levels, and permits
226
    such calculations. It can be used to modify self.needed_locks, and by
227
    default it does nothing.
228

229
    This function is only called if you have something already set in
230
    self.needed_locks for the level.
231

232
    @param level: Locking level which is going to be locked
233
    @type level: member of ganeti.locking.LEVELS
234

235
    """
236

    
237
  def CheckPrereq(self):
238
    """Check prerequisites for this LU.
239

240
    This method should check that the prerequisites for the execution
241
    of this LU are fulfilled. It can do internode communication, but
242
    it should be idempotent - no cluster or system changes are
243
    allowed.
244

245
    The method should raise errors.OpPrereqError in case something is
246
    not fulfilled. Its return value is ignored.
247

248
    This method should also update all the parameters of the opcode to
249
    their canonical form if it hasn't been done by ExpandNames before.
250

251
    """
252
    if self.tasklets is not None:
253
      for (idx, tl) in enumerate(self.tasklets):
254
        logging.debug("Checking prerequisites for tasklet %s/%s",
255
                      idx + 1, len(self.tasklets))
256
        tl.CheckPrereq()
257
    else:
258
      pass
259

    
260
  def Exec(self, feedback_fn):
261
    """Execute the LU.
262

263
    This method should implement the actual work. It should raise
264
    errors.OpExecError for failures that are somewhat dealt with in
265
    code, or expected.
266

267
    """
268
    if self.tasklets is not None:
269
      for (idx, tl) in enumerate(self.tasklets):
270
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271
        tl.Exec(feedback_fn)
272
    else:
273
      raise NotImplementedError
274

    
275
  def BuildHooksEnv(self):
276
    """Build hooks environment for this LU.
277

278
    @rtype: dict
279
    @return: Dictionary containing the environment that will be used for
280
      running the hooks for this LU. The keys of the dict must not be prefixed
281
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282
      will extend the environment with additional variables. If no environment
283
      should be defined, an empty dictionary should be returned (not C{None}).
284
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
285
      will not be called.
286

287
    """
288
    raise NotImplementedError
289

    
290
  def BuildHooksNodes(self):
291
    """Build list of nodes to run LU's hooks.
292

293
    @rtype: tuple; (list, list)
294
    @return: Tuple containing a list of node names on which the hook
295
      should run before the execution and a list of node names on which the
296
      hook should run after the execution. No nodes should be returned as an
297
      empty list (and not None).
298
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
299
      will not be called.
300

301
    """
302
    raise NotImplementedError
303

    
304
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305
    """Notify the LU about the results of its hooks.
306

307
    This method is called every time a hooks phase is executed, and notifies
308
    the Logical Unit about the hooks' result. The LU can then use it to alter
309
    its result based on the hooks.  By default the method does nothing and the
310
    previous result is passed back unchanged but any LU can define it if it
311
    wants to use the local cluster hook-scripts somehow.
312

313
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
314
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315
    @param hook_results: the results of the multi-node hooks rpc call
316
    @param feedback_fn: function used send feedback back to the caller
317
    @param lu_result: the previous Exec result this LU had, or None
318
        in the PRE phase
319
    @return: the new Exec result, based on the previous result
320
        and hook results
321

322
    """
323
    # API must be kept, thus we ignore the unused argument and could
324
    # be a function warnings
325
    # pylint: disable=W0613,R0201
326
    return lu_result
327

    
328
  def _ExpandAndLockInstance(self):
329
    """Helper function to expand and lock an instance.
330

331
    Many LUs that work on an instance take its name in self.op.instance_name
332
    and need to expand it and then declare the expanded name for locking. This
333
    function does it, and then updates self.op.instance_name to the expanded
334
    name. It also initializes needed_locks as a dict, if this hasn't been done
335
    before.
336

337
    """
338
    if self.needed_locks is None:
339
      self.needed_locks = {}
340
    else:
341
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342
        "_ExpandAndLockInstance called with instance-level locks set"
343
    self.op.instance_name = _ExpandInstanceName(self.cfg,
344
                                                self.op.instance_name)
345
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
346

    
347
  def _LockInstancesNodes(self, primary_only=False):
348
    """Helper function to declare instances' nodes for locking.
349

350
    This function should be called after locking one or more instances to lock
351
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352
    with all primary or secondary nodes for instances already locked and
353
    present in self.needed_locks[locking.LEVEL_INSTANCE].
354

355
    It should be called from DeclareLocks, and for safety only works if
356
    self.recalculate_locks[locking.LEVEL_NODE] is set.
357

358
    In the future it may grow parameters to just lock some instance's nodes, or
359
    to just lock primaries or secondary nodes, if needed.
360

361
    If should be called in DeclareLocks in a way similar to::
362

363
      if level == locking.LEVEL_NODE:
364
        self._LockInstancesNodes()
365

366
    @type primary_only: boolean
367
    @param primary_only: only lock primary nodes of locked instances
368

369
    """
370
    assert locking.LEVEL_NODE in self.recalculate_locks, \
371
      "_LockInstancesNodes helper function called with no nodes to recalculate"
372

    
373
    # TODO: check if we're really been called with the instance locks held
374

    
375
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376
    # future we might want to have different behaviors depending on the value
377
    # of self.recalculate_locks[locking.LEVEL_NODE]
378
    wanted_nodes = []
379
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381
      wanted_nodes.append(instance.primary_node)
382
      if not primary_only:
383
        wanted_nodes.extend(instance.secondary_nodes)
384

    
385
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
389

    
390
    del self.recalculate_locks[locking.LEVEL_NODE]
391

    
392

    
393
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
394
  """Simple LU which runs no hooks.
395

396
  This LU is intended as a parent for other LogicalUnits which will
397
  run no hooks, in order to reduce duplicate code.
398

399
  """
400
  HPATH = None
401
  HTYPE = None
402

    
403
  def BuildHooksEnv(self):
404
    """Empty BuildHooksEnv for NoHooksLu.
405

406
    This just raises an error.
407

408
    """
409
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
410

    
411
  def BuildHooksNodes(self):
412
    """Empty BuildHooksNodes for NoHooksLU.
413

414
    """
415
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
416

    
417

    
418
class Tasklet:
419
  """Tasklet base class.
420

421
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
423
  tasklets know nothing about locks.
424

425
  Subclasses must follow these rules:
426
    - Implement CheckPrereq
427
    - Implement Exec
428

429
  """
430
  def __init__(self, lu):
431
    self.lu = lu
432

    
433
    # Shortcuts
434
    self.cfg = lu.cfg
435
    self.rpc = lu.rpc
436

    
437
  def CheckPrereq(self):
438
    """Check prerequisites for this tasklets.
439

440
    This method should check whether the prerequisites for the execution of
441
    this tasklet are fulfilled. It can do internode communication, but it
442
    should be idempotent - no cluster or system changes are allowed.
443

444
    The method should raise errors.OpPrereqError in case something is not
445
    fulfilled. Its return value is ignored.
446

447
    This method should also update all parameters to their canonical form if it
448
    hasn't been done before.
449

450
    """
451
    pass
452

    
453
  def Exec(self, feedback_fn):
454
    """Execute the tasklet.
455

456
    This method should implement the actual work. It should raise
457
    errors.OpExecError for failures that are somewhat dealt with in code, or
458
    expected.
459

460
    """
461
    raise NotImplementedError
462

    
463

    
464
class _QueryBase:
465
  """Base for query utility classes.
466

467
  """
468
  #: Attribute holding field definitions
469
  FIELDS = None
470

    
471
  def __init__(self, filter_, fields, use_locking):
472
    """Initializes this class.
473

474
    """
475
    self.use_locking = use_locking
476

    
477
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
478
                             namefield="name")
479
    self.requested_data = self.query.RequestedData()
480
    self.names = self.query.RequestedNames()
481

    
482
    # Sort only if no names were requested
483
    self.sort_by_name = not self.names
484

    
485
    self.do_locking = None
486
    self.wanted = None
487

    
488
  def _GetNames(self, lu, all_names, lock_level):
489
    """Helper function to determine names asked for in the query.
490

491
    """
492
    if self.do_locking:
493
      names = lu.owned_locks(lock_level)
494
    else:
495
      names = all_names
496

    
497
    if self.wanted == locking.ALL_SET:
498
      assert not self.names
499
      # caller didn't specify names, so ordering is not important
500
      return utils.NiceSort(names)
501

    
502
    # caller specified names and we must keep the same order
503
    assert self.names
504
    assert not self.do_locking or lu.glm.is_owned(lock_level)
505

    
506
    missing = set(self.wanted).difference(names)
507
    if missing:
508
      raise errors.OpExecError("Some items were removed before retrieving"
509
                               " their data: %s" % missing)
510

    
511
    # Return expanded names
512
    return self.wanted
513

    
514
  def ExpandNames(self, lu):
515
    """Expand names for this query.
516

517
    See L{LogicalUnit.ExpandNames}.
518

519
    """
520
    raise NotImplementedError()
521

    
522
  def DeclareLocks(self, lu, level):
523
    """Declare locks for this query.
524

525
    See L{LogicalUnit.DeclareLocks}.
526

527
    """
528
    raise NotImplementedError()
529

    
530
  def _GetQueryData(self, lu):
531
    """Collects all data for this query.
532

533
    @return: Query data object
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def NewStyleQuery(self, lu):
539
    """Collect data and execute query.
540

541
    """
542
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543
                                  sort_by_name=self.sort_by_name)
544

    
545
  def OldStyleQuery(self, lu):
546
    """Collect data and execute query.
547

548
    """
549
    return self.query.OldStyleQuery(self._GetQueryData(lu),
550
                                    sort_by_name=self.sort_by_name)
551

    
552

    
553
def _ShareAll():
554
  """Returns a dict declaring all lock levels shared.
555

556
  """
557
  return dict.fromkeys(locking.LEVELS, 1)
558

    
559

    
560
def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
561
                              cur_group_uuid):
562
  """Checks if node groups for locked instances are still correct.
563

564
  @type cfg: L{config.ConfigWriter}
565
  @param cfg: Cluster configuration
566
  @type instances: dict; string as key, L{objects.Instance} as value
567
  @param instances: Dictionary, instance name as key, instance object as value
568
  @type owned_groups: iterable of string
569
  @param owned_groups: List of owned groups
570
  @type owned_nodes: iterable of string
571
  @param owned_nodes: List of owned nodes
572
  @type cur_group_uuid: string or None
573
  @type cur_group_uuid: Optional group UUID to check against instance's groups
574

575
  """
576
  for (name, inst) in instances.items():
577
    assert owned_nodes.issuperset(inst.all_nodes), \
578
      "Instance %s's nodes changed while we kept the lock" % name
579

    
580
    inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
581

    
582
    assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
583
      "Instance %s has no node in group %s" % (name, cur_group_uuid)
584

    
585

    
586
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
587
  """Checks if the owned node groups are still correct for an instance.
588

589
  @type cfg: L{config.ConfigWriter}
590
  @param cfg: The cluster configuration
591
  @type instance_name: string
592
  @param instance_name: Instance name
593
  @type owned_groups: set or frozenset
594
  @param owned_groups: List of currently owned node groups
595

596
  """
597
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
598

    
599
  if not owned_groups.issuperset(inst_groups):
600
    raise errors.OpPrereqError("Instance %s's node groups changed since"
601
                               " locks were acquired, current groups are"
602
                               " are '%s', owning groups '%s'; retry the"
603
                               " operation" %
604
                               (instance_name,
605
                                utils.CommaJoin(inst_groups),
606
                                utils.CommaJoin(owned_groups)),
607
                               errors.ECODE_STATE)
608

    
609
  return inst_groups
610

    
611

    
612
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
613
  """Checks if the instances in a node group are still correct.
614

615
  @type cfg: L{config.ConfigWriter}
616
  @param cfg: The cluster configuration
617
  @type group_uuid: string
618
  @param group_uuid: Node group UUID
619
  @type owned_instances: set or frozenset
620
  @param owned_instances: List of currently owned instances
621

622
  """
623
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
624
  if owned_instances != wanted_instances:
625
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
626
                               " locks were acquired, wanted '%s', have '%s';"
627
                               " retry the operation" %
628
                               (group_uuid,
629
                                utils.CommaJoin(wanted_instances),
630
                                utils.CommaJoin(owned_instances)),
631
                               errors.ECODE_STATE)
632

    
633
  return wanted_instances
634

    
635

    
636
def _SupportsOob(cfg, node):
637
  """Tells if node supports OOB.
638

639
  @type cfg: L{config.ConfigWriter}
640
  @param cfg: The cluster configuration
641
  @type node: L{objects.Node}
642
  @param node: The node
643
  @return: The OOB script if supported or an empty string otherwise
644

645
  """
646
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
647

    
648

    
649
def _GetWantedNodes(lu, nodes):
650
  """Returns list of checked and expanded node names.
651

652
  @type lu: L{LogicalUnit}
653
  @param lu: the logical unit on whose behalf we execute
654
  @type nodes: list
655
  @param nodes: list of node names or None for all nodes
656
  @rtype: list
657
  @return: the list of nodes, sorted
658
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
659

660
  """
661
  if nodes:
662
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
663

    
664
  return utils.NiceSort(lu.cfg.GetNodeList())
665

    
666

    
667
def _GetWantedInstances(lu, instances):
668
  """Returns list of checked and expanded instance names.
669

670
  @type lu: L{LogicalUnit}
671
  @param lu: the logical unit on whose behalf we execute
672
  @type instances: list
673
  @param instances: list of instance names or None for all instances
674
  @rtype: list
675
  @return: the list of instances, sorted
676
  @raise errors.OpPrereqError: if the instances parameter is wrong type
677
  @raise errors.OpPrereqError: if any of the passed instances is not found
678

679
  """
680
  if instances:
681
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
682
  else:
683
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
684
  return wanted
685

    
686

    
687
def _GetUpdatedParams(old_params, update_dict,
688
                      use_default=True, use_none=False):
689
  """Return the new version of a parameter dictionary.
690

691
  @type old_params: dict
692
  @param old_params: old parameters
693
  @type update_dict: dict
694
  @param update_dict: dict containing new parameter values, or
695
      constants.VALUE_DEFAULT to reset the parameter to its default
696
      value
697
  @param use_default: boolean
698
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
699
      values as 'to be deleted' values
700
  @param use_none: boolean
701
  @type use_none: whether to recognise C{None} values as 'to be
702
      deleted' values
703
  @rtype: dict
704
  @return: the new parameter dictionary
705

706
  """
707
  params_copy = copy.deepcopy(old_params)
708
  for key, val in update_dict.iteritems():
709
    if ((use_default and val == constants.VALUE_DEFAULT) or
710
        (use_none and val is None)):
711
      try:
712
        del params_copy[key]
713
      except KeyError:
714
        pass
715
    else:
716
      params_copy[key] = val
717
  return params_copy
718

    
719

    
720
def _ReleaseLocks(lu, level, names=None, keep=None):
721
  """Releases locks owned by an LU.
722

723
  @type lu: L{LogicalUnit}
724
  @param level: Lock level
725
  @type names: list or None
726
  @param names: Names of locks to release
727
  @type keep: list or None
728
  @param keep: Names of locks to retain
729

730
  """
731
  assert not (keep is not None and names is not None), \
732
         "Only one of the 'names' and the 'keep' parameters can be given"
733

    
734
  if names is not None:
735
    should_release = names.__contains__
736
  elif keep:
737
    should_release = lambda name: name not in keep
738
  else:
739
    should_release = None
740

    
741
  if should_release:
742
    retain = []
743
    release = []
744

    
745
    # Determine which locks to release
746
    for name in lu.owned_locks(level):
747
      if should_release(name):
748
        release.append(name)
749
      else:
750
        retain.append(name)
751

    
752
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
753

    
754
    # Release just some locks
755
    lu.glm.release(level, names=release)
756

    
757
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
758
  else:
759
    # Release everything
760
    lu.glm.release(level)
761

    
762
    assert not lu.glm.is_owned(level), "No locks should be owned"
763

    
764

    
765
def _MapInstanceDisksToNodes(instances):
766
  """Creates a map from (node, volume) to instance name.
767

768
  @type instances: list of L{objects.Instance}
769
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
770

771
  """
772
  return dict(((node, vol), inst.name)
773
              for inst in instances
774
              for (node, vols) in inst.MapLVsByNode().items()
775
              for vol in vols)
776

    
777

    
778
def _RunPostHook(lu, node_name):
779
  """Runs the post-hook for an opcode on a single node.
780

781
  """
782
  hm = lu.proc.BuildHooksManager(lu)
783
  try:
784
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
785
  except:
786
    # pylint: disable=W0702
787
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
788

    
789

    
790
def _CheckOutputFields(static, dynamic, selected):
791
  """Checks whether all selected fields are valid.
792

793
  @type static: L{utils.FieldSet}
794
  @param static: static fields set
795
  @type dynamic: L{utils.FieldSet}
796
  @param dynamic: dynamic fields set
797

798
  """
799
  f = utils.FieldSet()
800
  f.Extend(static)
801
  f.Extend(dynamic)
802

    
803
  delta = f.NonMatching(selected)
804
  if delta:
805
    raise errors.OpPrereqError("Unknown output fields selected: %s"
806
                               % ",".join(delta), errors.ECODE_INVAL)
807

    
808

    
809
def _CheckGlobalHvParams(params):
810
  """Validates that given hypervisor params are not global ones.
811

812
  This will ensure that instances don't get customised versions of
813
  global params.
814

815
  """
816
  used_globals = constants.HVC_GLOBALS.intersection(params)
817
  if used_globals:
818
    msg = ("The following hypervisor parameters are global and cannot"
819
           " be customized at instance level, please modify them at"
820
           " cluster level: %s" % utils.CommaJoin(used_globals))
821
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
822

    
823

    
824
def _CheckNodeOnline(lu, node, msg=None):
825
  """Ensure that a given node is online.
826

827
  @param lu: the LU on behalf of which we make the check
828
  @param node: the node to check
829
  @param msg: if passed, should be a message to replace the default one
830
  @raise errors.OpPrereqError: if the node is offline
831

832
  """
833
  if msg is None:
834
    msg = "Can't use offline node"
835
  if lu.cfg.GetNodeInfo(node).offline:
836
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
837

    
838

    
839
def _CheckNodeNotDrained(lu, node):
840
  """Ensure that a given node is not drained.
841

842
  @param lu: the LU on behalf of which we make the check
843
  @param node: the node to check
844
  @raise errors.OpPrereqError: if the node is drained
845

846
  """
847
  if lu.cfg.GetNodeInfo(node).drained:
848
    raise errors.OpPrereqError("Can't use drained node %s" % node,
849
                               errors.ECODE_STATE)
850

    
851

    
852
def _CheckNodeVmCapable(lu, node):
853
  """Ensure that a given node is vm capable.
854

855
  @param lu: the LU on behalf of which we make the check
856
  @param node: the node to check
857
  @raise errors.OpPrereqError: if the node is not vm capable
858

859
  """
860
  if not lu.cfg.GetNodeInfo(node).vm_capable:
861
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
862
                               errors.ECODE_STATE)
863

    
864

    
865
def _CheckNodeHasOS(lu, node, os_name, force_variant):
866
  """Ensure that a node supports a given OS.
867

868
  @param lu: the LU on behalf of which we make the check
869
  @param node: the node to check
870
  @param os_name: the OS to query about
871
  @param force_variant: whether to ignore variant errors
872
  @raise errors.OpPrereqError: if the node is not supporting the OS
873

874
  """
875
  result = lu.rpc.call_os_get(node, os_name)
876
  result.Raise("OS '%s' not in supported OS list for node %s" %
877
               (os_name, node),
878
               prereq=True, ecode=errors.ECODE_INVAL)
879
  if not force_variant:
880
    _CheckOSVariant(result.payload, os_name)
881

    
882

    
883
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
884
  """Ensure that a node has the given secondary ip.
885

886
  @type lu: L{LogicalUnit}
887
  @param lu: the LU on behalf of which we make the check
888
  @type node: string
889
  @param node: the node to check
890
  @type secondary_ip: string
891
  @param secondary_ip: the ip to check
892
  @type prereq: boolean
893
  @param prereq: whether to throw a prerequisite or an execute error
894
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
895
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
896

897
  """
898
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
899
  result.Raise("Failure checking secondary ip on node %s" % node,
900
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
901
  if not result.payload:
902
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
903
           " please fix and re-run this command" % secondary_ip)
904
    if prereq:
905
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
906
    else:
907
      raise errors.OpExecError(msg)
908

    
909

    
910
def _GetClusterDomainSecret():
911
  """Reads the cluster domain secret.
912

913
  """
914
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
915
                               strict=True)
916

    
917

    
918
def _CheckInstanceDown(lu, instance, reason):
919
  """Ensure that an instance is not running."""
920
  if instance.admin_up:
921
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
922
                               (instance.name, reason), errors.ECODE_STATE)
923

    
924
  pnode = instance.primary_node
925
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
926
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
927
              prereq=True, ecode=errors.ECODE_ENVIRON)
928

    
929
  if instance.name in ins_l.payload:
930
    raise errors.OpPrereqError("Instance %s is running, %s" %
931
                               (instance.name, reason), errors.ECODE_STATE)
932

    
933

    
934
def _ExpandItemName(fn, name, kind):
935
  """Expand an item name.
936

937
  @param fn: the function to use for expansion
938
  @param name: requested item name
939
  @param kind: text description ('Node' or 'Instance')
940
  @return: the resolved (full) name
941
  @raise errors.OpPrereqError: if the item is not found
942

943
  """
944
  full_name = fn(name)
945
  if full_name is None:
946
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
947
                               errors.ECODE_NOENT)
948
  return full_name
949

    
950

    
951
def _ExpandNodeName(cfg, name):
952
  """Wrapper over L{_ExpandItemName} for nodes."""
953
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
954

    
955

    
956
def _ExpandInstanceName(cfg, name):
957
  """Wrapper over L{_ExpandItemName} for instance."""
958
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
959

    
960

    
961
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
962
                          memory, vcpus, nics, disk_template, disks,
963
                          bep, hvp, hypervisor_name, tags):
964
  """Builds instance related env variables for hooks
965

966
  This builds the hook environment from individual variables.
967

968
  @type name: string
969
  @param name: the name of the instance
970
  @type primary_node: string
971
  @param primary_node: the name of the instance's primary node
972
  @type secondary_nodes: list
973
  @param secondary_nodes: list of secondary nodes as strings
974
  @type os_type: string
975
  @param os_type: the name of the instance's OS
976
  @type status: boolean
977
  @param status: the should_run status of the instance
978
  @type memory: string
979
  @param memory: the memory size of the instance
980
  @type vcpus: string
981
  @param vcpus: the count of VCPUs the instance has
982
  @type nics: list
983
  @param nics: list of tuples (ip, mac, mode, link) representing
984
      the NICs the instance has
985
  @type disk_template: string
986
  @param disk_template: the disk template of the instance
987
  @type disks: list
988
  @param disks: the list of (size, mode) pairs
989
  @type bep: dict
990
  @param bep: the backend parameters for the instance
991
  @type hvp: dict
992
  @param hvp: the hypervisor parameters for the instance
993
  @type hypervisor_name: string
994
  @param hypervisor_name: the hypervisor for the instance
995
  @type tags: list
996
  @param tags: list of instance tags as strings
997
  @rtype: dict
998
  @return: the hook environment for this instance
999

1000
  """
1001
  if status:
1002
    str_status = "up"
1003
  else:
1004
    str_status = "down"
1005
  env = {
1006
    "OP_TARGET": name,
1007
    "INSTANCE_NAME": name,
1008
    "INSTANCE_PRIMARY": primary_node,
1009
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1010
    "INSTANCE_OS_TYPE": os_type,
1011
    "INSTANCE_STATUS": str_status,
1012
    "INSTANCE_MEMORY": memory,
1013
    "INSTANCE_VCPUS": vcpus,
1014
    "INSTANCE_DISK_TEMPLATE": disk_template,
1015
    "INSTANCE_HYPERVISOR": hypervisor_name,
1016
  }
1017

    
1018
  if nics:
1019
    nic_count = len(nics)
1020
    for idx, (ip, mac, mode, link) in enumerate(nics):
1021
      if ip is None:
1022
        ip = ""
1023
      env["INSTANCE_NIC%d_IP" % idx] = ip
1024
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1025
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1026
      env["INSTANCE_NIC%d_LINK" % idx] = link
1027
      if mode == constants.NIC_MODE_BRIDGED:
1028
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1029
  else:
1030
    nic_count = 0
1031

    
1032
  env["INSTANCE_NIC_COUNT"] = nic_count
1033

    
1034
  if disks:
1035
    disk_count = len(disks)
1036
    for idx, (size, mode) in enumerate(disks):
1037
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1038
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1039
  else:
1040
    disk_count = 0
1041

    
1042
  env["INSTANCE_DISK_COUNT"] = disk_count
1043

    
1044
  if not tags:
1045
    tags = []
1046

    
1047
  env["INSTANCE_TAGS"] = " ".join(tags)
1048

    
1049
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1050
    for key, value in source.items():
1051
      env["INSTANCE_%s_%s" % (kind, key)] = value
1052

    
1053
  return env
1054

    
1055

    
1056
def _NICListToTuple(lu, nics):
1057
  """Build a list of nic information tuples.
1058

1059
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1060
  value in LUInstanceQueryData.
1061

1062
  @type lu:  L{LogicalUnit}
1063
  @param lu: the logical unit on whose behalf we execute
1064
  @type nics: list of L{objects.NIC}
1065
  @param nics: list of nics to convert to hooks tuples
1066

1067
  """
1068
  hooks_nics = []
1069
  cluster = lu.cfg.GetClusterInfo()
1070
  for nic in nics:
1071
    ip = nic.ip
1072
    mac = nic.mac
1073
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1074
    mode = filled_params[constants.NIC_MODE]
1075
    link = filled_params[constants.NIC_LINK]
1076
    hooks_nics.append((ip, mac, mode, link))
1077
  return hooks_nics
1078

    
1079

    
1080
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1081
  """Builds instance related env variables for hooks from an object.
1082

1083
  @type lu: L{LogicalUnit}
1084
  @param lu: the logical unit on whose behalf we execute
1085
  @type instance: L{objects.Instance}
1086
  @param instance: the instance for which we should build the
1087
      environment
1088
  @type override: dict
1089
  @param override: dictionary with key/values that will override
1090
      our values
1091
  @rtype: dict
1092
  @return: the hook environment dictionary
1093

1094
  """
1095
  cluster = lu.cfg.GetClusterInfo()
1096
  bep = cluster.FillBE(instance)
1097
  hvp = cluster.FillHV(instance)
1098
  args = {
1099
    "name": instance.name,
1100
    "primary_node": instance.primary_node,
1101
    "secondary_nodes": instance.secondary_nodes,
1102
    "os_type": instance.os,
1103
    "status": instance.admin_up,
1104
    "memory": bep[constants.BE_MEMORY],
1105
    "vcpus": bep[constants.BE_VCPUS],
1106
    "nics": _NICListToTuple(lu, instance.nics),
1107
    "disk_template": instance.disk_template,
1108
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1109
    "bep": bep,
1110
    "hvp": hvp,
1111
    "hypervisor_name": instance.hypervisor,
1112
    "tags": instance.tags,
1113
  }
1114
  if override:
1115
    args.update(override)
1116
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1117

    
1118

    
1119
def _AdjustCandidatePool(lu, exceptions):
1120
  """Adjust the candidate pool after node operations.
1121

1122
  """
1123
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1124
  if mod_list:
1125
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1126
               utils.CommaJoin(node.name for node in mod_list))
1127
    for name in mod_list:
1128
      lu.context.ReaddNode(name)
1129
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1130
  if mc_now > mc_max:
1131
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1132
               (mc_now, mc_max))
1133

    
1134

    
1135
def _DecideSelfPromotion(lu, exceptions=None):
1136
  """Decide whether I should promote myself as a master candidate.
1137

1138
  """
1139
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1140
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1141
  # the new node will increase mc_max with one, so:
1142
  mc_should = min(mc_should + 1, cp_size)
1143
  return mc_now < mc_should
1144

    
1145

    
1146
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1147
  """Check that the brigdes needed by a list of nics exist.
1148

1149
  """
1150
  cluster = lu.cfg.GetClusterInfo()
1151
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1152
  brlist = [params[constants.NIC_LINK] for params in paramslist
1153
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1154
  if brlist:
1155
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1156
    result.Raise("Error checking bridges on destination node '%s'" %
1157
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1158

    
1159

    
1160
def _CheckInstanceBridgesExist(lu, instance, node=None):
1161
  """Check that the brigdes needed by an instance exist.
1162

1163
  """
1164
  if node is None:
1165
    node = instance.primary_node
1166
  _CheckNicsBridgesExist(lu, instance.nics, node)
1167

    
1168

    
1169
def _CheckOSVariant(os_obj, name):
1170
  """Check whether an OS name conforms to the os variants specification.
1171

1172
  @type os_obj: L{objects.OS}
1173
  @param os_obj: OS object to check
1174
  @type name: string
1175
  @param name: OS name passed by the user, to check for validity
1176

1177
  """
1178
  variant = objects.OS.GetVariant(name)
1179
  if not os_obj.supported_variants:
1180
    if variant:
1181
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1182
                                 " passed)" % (os_obj.name, variant),
1183
                                 errors.ECODE_INVAL)
1184
    return
1185
  if not variant:
1186
    raise errors.OpPrereqError("OS name must include a variant",
1187
                               errors.ECODE_INVAL)
1188

    
1189
  if variant not in os_obj.supported_variants:
1190
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1191

    
1192

    
1193
def _GetNodeInstancesInner(cfg, fn):
1194
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1195

    
1196

    
1197
def _GetNodeInstances(cfg, node_name):
1198
  """Returns a list of all primary and secondary instances on a node.
1199

1200
  """
1201

    
1202
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1203

    
1204

    
1205
def _GetNodePrimaryInstances(cfg, node_name):
1206
  """Returns primary instances on a node.
1207

1208
  """
1209
  return _GetNodeInstancesInner(cfg,
1210
                                lambda inst: node_name == inst.primary_node)
1211

    
1212

    
1213
def _GetNodeSecondaryInstances(cfg, node_name):
1214
  """Returns secondary instances on a node.
1215

1216
  """
1217
  return _GetNodeInstancesInner(cfg,
1218
                                lambda inst: node_name in inst.secondary_nodes)
1219

    
1220

    
1221
def _GetStorageTypeArgs(cfg, storage_type):
1222
  """Returns the arguments for a storage type.
1223

1224
  """
1225
  # Special case for file storage
1226
  if storage_type == constants.ST_FILE:
1227
    # storage.FileStorage wants a list of storage directories
1228
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1229

    
1230
  return []
1231

    
1232

    
1233
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1234
  faulty = []
1235

    
1236
  for dev in instance.disks:
1237
    cfg.SetDiskID(dev, node_name)
1238

    
1239
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1240
  result.Raise("Failed to get disk status from node %s" % node_name,
1241
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1242

    
1243
  for idx, bdev_status in enumerate(result.payload):
1244
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1245
      faulty.append(idx)
1246

    
1247
  return faulty
1248

    
1249

    
1250
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1251
  """Check the sanity of iallocator and node arguments and use the
1252
  cluster-wide iallocator if appropriate.
1253

1254
  Check that at most one of (iallocator, node) is specified. If none is
1255
  specified, then the LU's opcode's iallocator slot is filled with the
1256
  cluster-wide default iallocator.
1257

1258
  @type iallocator_slot: string
1259
  @param iallocator_slot: the name of the opcode iallocator slot
1260
  @type node_slot: string
1261
  @param node_slot: the name of the opcode target node slot
1262

1263
  """
1264
  node = getattr(lu.op, node_slot, None)
1265
  iallocator = getattr(lu.op, iallocator_slot, None)
1266

    
1267
  if node is not None and iallocator is not None:
1268
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1269
                               errors.ECODE_INVAL)
1270
  elif node is None and iallocator is None:
1271
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1272
    if default_iallocator:
1273
      setattr(lu.op, iallocator_slot, default_iallocator)
1274
    else:
1275
      raise errors.OpPrereqError("No iallocator or node given and no"
1276
                                 " cluster-wide default iallocator found;"
1277
                                 " please specify either an iallocator or a"
1278
                                 " node, or set a cluster-wide default"
1279
                                 " iallocator")
1280

    
1281

    
1282
def _GetDefaultIAllocator(cfg, iallocator):
1283
  """Decides on which iallocator to use.
1284

1285
  @type cfg: L{config.ConfigWriter}
1286
  @param cfg: Cluster configuration object
1287
  @type iallocator: string or None
1288
  @param iallocator: Iallocator specified in opcode
1289
  @rtype: string
1290
  @return: Iallocator name
1291

1292
  """
1293
  if not iallocator:
1294
    # Use default iallocator
1295
    iallocator = cfg.GetDefaultIAllocator()
1296

    
1297
  if not iallocator:
1298
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1299
                               " opcode nor as a cluster-wide default",
1300
                               errors.ECODE_INVAL)
1301

    
1302
  return iallocator
1303

    
1304

    
1305
class LUClusterPostInit(LogicalUnit):
1306
  """Logical unit for running hooks after cluster initialization.
1307

1308
  """
1309
  HPATH = "cluster-init"
1310
  HTYPE = constants.HTYPE_CLUSTER
1311

    
1312
  def BuildHooksEnv(self):
1313
    """Build hooks env.
1314

1315
    """
1316
    return {
1317
      "OP_TARGET": self.cfg.GetClusterName(),
1318
      }
1319

    
1320
  def BuildHooksNodes(self):
1321
    """Build hooks nodes.
1322

1323
    """
1324
    return ([], [self.cfg.GetMasterNode()])
1325

    
1326
  def Exec(self, feedback_fn):
1327
    """Nothing to do.
1328

1329
    """
1330
    return True
1331

    
1332

    
1333
class LUClusterDestroy(LogicalUnit):
1334
  """Logical unit for destroying the cluster.
1335

1336
  """
1337
  HPATH = "cluster-destroy"
1338
  HTYPE = constants.HTYPE_CLUSTER
1339

    
1340
  def BuildHooksEnv(self):
1341
    """Build hooks env.
1342

1343
    """
1344
    return {
1345
      "OP_TARGET": self.cfg.GetClusterName(),
1346
      }
1347

    
1348
  def BuildHooksNodes(self):
1349
    """Build hooks nodes.
1350

1351
    """
1352
    return ([], [])
1353

    
1354
  def CheckPrereq(self):
1355
    """Check prerequisites.
1356

1357
    This checks whether the cluster is empty.
1358

1359
    Any errors are signaled by raising errors.OpPrereqError.
1360

1361
    """
1362
    master = self.cfg.GetMasterNode()
1363

    
1364
    nodelist = self.cfg.GetNodeList()
1365
    if len(nodelist) != 1 or nodelist[0] != master:
1366
      raise errors.OpPrereqError("There are still %d node(s) in"
1367
                                 " this cluster." % (len(nodelist) - 1),
1368
                                 errors.ECODE_INVAL)
1369
    instancelist = self.cfg.GetInstanceList()
1370
    if instancelist:
1371
      raise errors.OpPrereqError("There are still %d instance(s) in"
1372
                                 " this cluster." % len(instancelist),
1373
                                 errors.ECODE_INVAL)
1374

    
1375
  def Exec(self, feedback_fn):
1376
    """Destroys the cluster.
1377

1378
    """
1379
    master = self.cfg.GetMasterNode()
1380

    
1381
    # Run post hooks on master node before it's removed
1382
    _RunPostHook(self, master)
1383

    
1384
    result = self.rpc.call_node_deactivate_master_ip(master)
1385
    result.Raise("Could not disable the master role")
1386

    
1387
    return master
1388

    
1389

    
1390
def _VerifyCertificate(filename):
1391
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1392

1393
  @type filename: string
1394
  @param filename: Path to PEM file
1395

1396
  """
1397
  try:
1398
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1399
                                           utils.ReadFile(filename))
1400
  except Exception, err: # pylint: disable=W0703
1401
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1402
            "Failed to load X509 certificate %s: %s" % (filename, err))
1403

    
1404
  (errcode, msg) = \
1405
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1406
                                constants.SSL_CERT_EXPIRATION_ERROR)
1407

    
1408
  if msg:
1409
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1410
  else:
1411
    fnamemsg = None
1412

    
1413
  if errcode is None:
1414
    return (None, fnamemsg)
1415
  elif errcode == utils.CERT_WARNING:
1416
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1417
  elif errcode == utils.CERT_ERROR:
1418
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1419

    
1420
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1421

    
1422

    
1423
def _GetAllHypervisorParameters(cluster, instances):
1424
  """Compute the set of all hypervisor parameters.
1425

1426
  @type cluster: L{objects.Cluster}
1427
  @param cluster: the cluster object
1428
  @param instances: list of L{objects.Instance}
1429
  @param instances: additional instances from which to obtain parameters
1430
  @rtype: list of (origin, hypervisor, parameters)
1431
  @return: a list with all parameters found, indicating the hypervisor they
1432
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1433

1434
  """
1435
  hvp_data = []
1436

    
1437
  for hv_name in cluster.enabled_hypervisors:
1438
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1439

    
1440
  for os_name, os_hvp in cluster.os_hvp.items():
1441
    for hv_name, hv_params in os_hvp.items():
1442
      if hv_params:
1443
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1444
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1445

    
1446
  # TODO: collapse identical parameter values in a single one
1447
  for instance in instances:
1448
    if instance.hvparams:
1449
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1450
                       cluster.FillHV(instance)))
1451

    
1452
  return hvp_data
1453

    
1454

    
1455
class _VerifyErrors(object):
1456
  """Mix-in for cluster/group verify LUs.
1457

1458
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1459
  self.op and self._feedback_fn to be available.)
1460

1461
  """
1462
  TCLUSTER = "cluster"
1463
  TNODE = "node"
1464
  TINSTANCE = "instance"
1465

    
1466
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1467
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1468
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1469
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1470
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1471
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1472
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1473
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1474
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1475
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1476
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1477
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1478
  ENODEDRBD = (TNODE, "ENODEDRBD")
1479
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1480
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1481
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1482
  ENODEHV = (TNODE, "ENODEHV")
1483
  ENODELVM = (TNODE, "ENODELVM")
1484
  ENODEN1 = (TNODE, "ENODEN1")
1485
  ENODENET = (TNODE, "ENODENET")
1486
  ENODEOS = (TNODE, "ENODEOS")
1487
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1488
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1489
  ENODERPC = (TNODE, "ENODERPC")
1490
  ENODESSH = (TNODE, "ENODESSH")
1491
  ENODEVERSION = (TNODE, "ENODEVERSION")
1492
  ENODESETUP = (TNODE, "ENODESETUP")
1493
  ENODETIME = (TNODE, "ENODETIME")
1494
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1495

    
1496
  ETYPE_FIELD = "code"
1497
  ETYPE_ERROR = "ERROR"
1498
  ETYPE_WARNING = "WARNING"
1499

    
1500
  def _Error(self, ecode, item, msg, *args, **kwargs):
1501
    """Format an error message.
1502

1503
    Based on the opcode's error_codes parameter, either format a
1504
    parseable error code, or a simpler error string.
1505

1506
    This must be called only from Exec and functions called from Exec.
1507

1508
    """
1509
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1510
    itype, etxt = ecode
1511
    # first complete the msg
1512
    if args:
1513
      msg = msg % args
1514
    # then format the whole message
1515
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1516
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1517
    else:
1518
      if item:
1519
        item = " " + item
1520
      else:
1521
        item = ""
1522
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1523
    # and finally report it via the feedback_fn
1524
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1525

    
1526
  def _ErrorIf(self, cond, *args, **kwargs):
1527
    """Log an error message if the passed condition is True.
1528

1529
    """
1530
    cond = (bool(cond)
1531
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1532
    if cond:
1533
      self._Error(*args, **kwargs)
1534
    # do not mark the operation as failed for WARN cases only
1535
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1536
      self.bad = self.bad or cond
1537

    
1538

    
1539
class LUClusterVerify(NoHooksLU):
1540
  """Submits all jobs necessary to verify the cluster.
1541

1542
  """
1543
  REQ_BGL = False
1544

    
1545
  def ExpandNames(self):
1546
    self.needed_locks = {}
1547

    
1548
  def Exec(self, feedback_fn):
1549
    jobs = []
1550

    
1551
    if self.op.group_name:
1552
      groups = [self.op.group_name]
1553
      depends_fn = lambda: None
1554
    else:
1555
      groups = self.cfg.GetNodeGroupList()
1556

    
1557
      # Verify global configuration
1558
      jobs.append([opcodes.OpClusterVerifyConfig()])
1559

    
1560
      # Always depend on global verification
1561
      depends_fn = lambda: [(-len(jobs), [])]
1562

    
1563
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1564
                                              depends=depends_fn())]
1565
                for group in groups)
1566

    
1567
    # Fix up all parameters
1568
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1569
      op.debug_simulate_errors = self.op.debug_simulate_errors
1570
      op.verbose = self.op.verbose
1571
      op.error_codes = self.op.error_codes
1572
      try:
1573
        op.skip_checks = self.op.skip_checks
1574
      except AttributeError:
1575
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1576

    
1577
    return ResultWithJobs(jobs)
1578

    
1579

    
1580
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1581
  """Verifies the cluster config.
1582

1583
  """
1584
  REQ_BGL = True
1585

    
1586
  def _VerifyHVP(self, hvp_data):
1587
    """Verifies locally the syntax of the hypervisor parameters.
1588

1589
    """
1590
    for item, hv_name, hv_params in hvp_data:
1591
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1592
             (item, hv_name))
1593
      try:
1594
        hv_class = hypervisor.GetHypervisor(hv_name)
1595
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1596
        hv_class.CheckParameterSyntax(hv_params)
1597
      except errors.GenericError, err:
1598
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1599

    
1600
  def ExpandNames(self):
1601
    # Information can be safely retrieved as the BGL is acquired in exclusive
1602
    # mode
1603
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1604
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1605
    self.all_node_info = self.cfg.GetAllNodesInfo()
1606
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1607
    self.needed_locks = {}
1608

    
1609
  def Exec(self, feedback_fn):
1610
    """Verify integrity of cluster, performing various test on nodes.
1611

1612
    """
1613
    self.bad = False
1614
    self._feedback_fn = feedback_fn
1615

    
1616
    feedback_fn("* Verifying cluster config")
1617

    
1618
    for msg in self.cfg.VerifyConfig():
1619
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1620

    
1621
    feedback_fn("* Verifying cluster certificate files")
1622

    
1623
    for cert_filename in constants.ALL_CERT_FILES:
1624
      (errcode, msg) = _VerifyCertificate(cert_filename)
1625
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1626

    
1627
    feedback_fn("* Verifying hypervisor parameters")
1628

    
1629
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1630
                                                self.all_inst_info.values()))
1631

    
1632
    feedback_fn("* Verifying all nodes belong to an existing group")
1633

    
1634
    # We do this verification here because, should this bogus circumstance
1635
    # occur, it would never be caught by VerifyGroup, which only acts on
1636
    # nodes/instances reachable from existing node groups.
1637

    
1638
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1639
                         if node.group not in self.all_group_info)
1640

    
1641
    dangling_instances = {}
1642
    no_node_instances = []
1643

    
1644
    for inst in self.all_inst_info.values():
1645
      if inst.primary_node in dangling_nodes:
1646
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1647
      elif inst.primary_node not in self.all_node_info:
1648
        no_node_instances.append(inst.name)
1649

    
1650
    pretty_dangling = [
1651
        "%s (%s)" %
1652
        (node.name,
1653
         utils.CommaJoin(dangling_instances.get(node.name,
1654
                                                ["no instances"])))
1655
        for node in dangling_nodes]
1656

    
1657
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1658
                  "the following nodes (and their instances) belong to a non"
1659
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1660

    
1661
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1662
                  "the following instances have a non-existing primary-node:"
1663
                  " %s", utils.CommaJoin(no_node_instances))
1664

    
1665
    return not self.bad
1666

    
1667

    
1668
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1669
  """Verifies the status of a node group.
1670

1671
  """
1672
  HPATH = "cluster-verify"
1673
  HTYPE = constants.HTYPE_CLUSTER
1674
  REQ_BGL = False
1675

    
1676
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1677

    
1678
  class NodeImage(object):
1679
    """A class representing the logical and physical status of a node.
1680

1681
    @type name: string
1682
    @ivar name: the node name to which this object refers
1683
    @ivar volumes: a structure as returned from
1684
        L{ganeti.backend.GetVolumeList} (runtime)
1685
    @ivar instances: a list of running instances (runtime)
1686
    @ivar pinst: list of configured primary instances (config)
1687
    @ivar sinst: list of configured secondary instances (config)
1688
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1689
        instances for which this node is secondary (config)
1690
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1691
    @ivar dfree: free disk, as reported by the node (runtime)
1692
    @ivar offline: the offline status (config)
1693
    @type rpc_fail: boolean
1694
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1695
        not whether the individual keys were correct) (runtime)
1696
    @type lvm_fail: boolean
1697
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1698
    @type hyp_fail: boolean
1699
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1700
    @type ghost: boolean
1701
    @ivar ghost: whether this is a known node or not (config)
1702
    @type os_fail: boolean
1703
    @ivar os_fail: whether the RPC call didn't return valid OS data
1704
    @type oslist: list
1705
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1706
    @type vm_capable: boolean
1707
    @ivar vm_capable: whether the node can host instances
1708

1709
    """
1710
    def __init__(self, offline=False, name=None, vm_capable=True):
1711
      self.name = name
1712
      self.volumes = {}
1713
      self.instances = []
1714
      self.pinst = []
1715
      self.sinst = []
1716
      self.sbp = {}
1717
      self.mfree = 0
1718
      self.dfree = 0
1719
      self.offline = offline
1720
      self.vm_capable = vm_capable
1721
      self.rpc_fail = False
1722
      self.lvm_fail = False
1723
      self.hyp_fail = False
1724
      self.ghost = False
1725
      self.os_fail = False
1726
      self.oslist = {}
1727

    
1728
  def ExpandNames(self):
1729
    # This raises errors.OpPrereqError on its own:
1730
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1731

    
1732
    # Get instances in node group; this is unsafe and needs verification later
1733
    inst_names = \
1734
      self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1735

    
1736
    self.needed_locks = {
1737
      locking.LEVEL_INSTANCE: inst_names,
1738
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1739
      locking.LEVEL_NODE: [],
1740
      }
1741

    
1742
    self.share_locks = _ShareAll()
1743

    
1744
  def DeclareLocks(self, level):
1745
    if level == locking.LEVEL_NODE:
1746
      # Get members of node group; this is unsafe and needs verification later
1747
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1748

    
1749
      all_inst_info = self.cfg.GetAllInstancesInfo()
1750

    
1751
      # In Exec(), we warn about mirrored instances that have primary and
1752
      # secondary living in separate node groups. To fully verify that
1753
      # volumes for these instances are healthy, we will need to do an
1754
      # extra call to their secondaries. We ensure here those nodes will
1755
      # be locked.
1756
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1757
        # Important: access only the instances whose lock is owned
1758
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1759
          nodes.update(all_inst_info[inst].secondary_nodes)
1760

    
1761
      self.needed_locks[locking.LEVEL_NODE] = nodes
1762

    
1763
  def CheckPrereq(self):
1764
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1765
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1766

    
1767
    group_nodes = set(self.group_info.members)
1768
    group_instances = \
1769
      self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1770

    
1771
    unlocked_nodes = \
1772
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1773

    
1774
    unlocked_instances = \
1775
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1776

    
1777
    if unlocked_nodes:
1778
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1779
                                 utils.CommaJoin(unlocked_nodes),
1780
                                 errors.ECODE_STATE)
1781

    
1782
    if unlocked_instances:
1783
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1784
                                 utils.CommaJoin(unlocked_instances),
1785
                                 errors.ECODE_STATE)
1786

    
1787
    self.all_node_info = self.cfg.GetAllNodesInfo()
1788
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1789

    
1790
    self.my_node_names = utils.NiceSort(group_nodes)
1791
    self.my_inst_names = utils.NiceSort(group_instances)
1792

    
1793
    self.my_node_info = dict((name, self.all_node_info[name])
1794
                             for name in self.my_node_names)
1795

    
1796
    self.my_inst_info = dict((name, self.all_inst_info[name])
1797
                             for name in self.my_inst_names)
1798

    
1799
    # We detect here the nodes that will need the extra RPC calls for verifying
1800
    # split LV volumes; they should be locked.
1801
    extra_lv_nodes = set()
1802

    
1803
    for inst in self.my_inst_info.values():
1804
      if inst.disk_template in constants.DTS_INT_MIRROR:
1805
        for nname in inst.all_nodes:
1806
          if self.all_node_info[nname].group != self.group_uuid:
1807
            extra_lv_nodes.add(nname)
1808

    
1809
    unlocked_lv_nodes = \
1810
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1811

    
1812
    if unlocked_lv_nodes:
1813
      raise errors.OpPrereqError("Missing node locks for LV check: %s" %
1814
                                 utils.CommaJoin(unlocked_lv_nodes),
1815
                                 errors.ECODE_STATE)
1816
    self.extra_lv_nodes = list(extra_lv_nodes)
1817

    
1818
  def _VerifyNode(self, ninfo, nresult):
1819
    """Perform some basic validation on data returned from a node.
1820

1821
      - check the result data structure is well formed and has all the
1822
        mandatory fields
1823
      - check ganeti version
1824

1825
    @type ninfo: L{objects.Node}
1826
    @param ninfo: the node to check
1827
    @param nresult: the results from the node
1828
    @rtype: boolean
1829
    @return: whether overall this call was successful (and we can expect
1830
         reasonable values in the respose)
1831

1832
    """
1833
    node = ninfo.name
1834
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1835

    
1836
    # main result, nresult should be a non-empty dict
1837
    test = not nresult or not isinstance(nresult, dict)
1838
    _ErrorIf(test, self.ENODERPC, node,
1839
                  "unable to verify node: no data returned")
1840
    if test:
1841
      return False
1842

    
1843
    # compares ganeti version
1844
    local_version = constants.PROTOCOL_VERSION
1845
    remote_version = nresult.get("version", None)
1846
    test = not (remote_version and
1847
                isinstance(remote_version, (list, tuple)) and
1848
                len(remote_version) == 2)
1849
    _ErrorIf(test, self.ENODERPC, node,
1850
             "connection to node returned invalid data")
1851
    if test:
1852
      return False
1853

    
1854
    test = local_version != remote_version[0]
1855
    _ErrorIf(test, self.ENODEVERSION, node,
1856
             "incompatible protocol versions: master %s,"
1857
             " node %s", local_version, remote_version[0])
1858
    if test:
1859
      return False
1860

    
1861
    # node seems compatible, we can actually try to look into its results
1862

    
1863
    # full package version
1864
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1865
                  self.ENODEVERSION, node,
1866
                  "software version mismatch: master %s, node %s",
1867
                  constants.RELEASE_VERSION, remote_version[1],
1868
                  code=self.ETYPE_WARNING)
1869

    
1870
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1871
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1872
      for hv_name, hv_result in hyp_result.iteritems():
1873
        test = hv_result is not None
1874
        _ErrorIf(test, self.ENODEHV, node,
1875
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1876

    
1877
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1878
    if ninfo.vm_capable and isinstance(hvp_result, list):
1879
      for item, hv_name, hv_result in hvp_result:
1880
        _ErrorIf(True, self.ENODEHV, node,
1881
                 "hypervisor %s parameter verify failure (source %s): %s",
1882
                 hv_name, item, hv_result)
1883

    
1884
    test = nresult.get(constants.NV_NODESETUP,
1885
                       ["Missing NODESETUP results"])
1886
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1887
             "; ".join(test))
1888

    
1889
    return True
1890

    
1891
  def _VerifyNodeTime(self, ninfo, nresult,
1892
                      nvinfo_starttime, nvinfo_endtime):
1893
    """Check the node time.
1894

1895
    @type ninfo: L{objects.Node}
1896
    @param ninfo: the node to check
1897
    @param nresult: the remote results for the node
1898
    @param nvinfo_starttime: the start time of the RPC call
1899
    @param nvinfo_endtime: the end time of the RPC call
1900

1901
    """
1902
    node = ninfo.name
1903
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1904

    
1905
    ntime = nresult.get(constants.NV_TIME, None)
1906
    try:
1907
      ntime_merged = utils.MergeTime(ntime)
1908
    except (ValueError, TypeError):
1909
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1910
      return
1911

    
1912
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1913
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1914
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1915
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1916
    else:
1917
      ntime_diff = None
1918

    
1919
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1920
             "Node time diverges by at least %s from master node time",
1921
             ntime_diff)
1922

    
1923
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1924
    """Check the node LVM results.
1925

1926
    @type ninfo: L{objects.Node}
1927
    @param ninfo: the node to check
1928
    @param nresult: the remote results for the node
1929
    @param vg_name: the configured VG name
1930

1931
    """
1932
    if vg_name is None:
1933
      return
1934

    
1935
    node = ninfo.name
1936
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1937

    
1938
    # checks vg existence and size > 20G
1939
    vglist = nresult.get(constants.NV_VGLIST, None)
1940
    test = not vglist
1941
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1942
    if not test:
1943
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1944
                                            constants.MIN_VG_SIZE)
1945
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1946

    
1947
    # check pv names
1948
    pvlist = nresult.get(constants.NV_PVLIST, None)
1949
    test = pvlist is None
1950
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1951
    if not test:
1952
      # check that ':' is not present in PV names, since it's a
1953
      # special character for lvcreate (denotes the range of PEs to
1954
      # use on the PV)
1955
      for _, pvname, owner_vg in pvlist:
1956
        test = ":" in pvname
1957
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1958
                 " '%s' of VG '%s'", pvname, owner_vg)
1959

    
1960
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1961
    """Check the node bridges.
1962

1963
    @type ninfo: L{objects.Node}
1964
    @param ninfo: the node to check
1965
    @param nresult: the remote results for the node
1966
    @param bridges: the expected list of bridges
1967

1968
    """
1969
    if not bridges:
1970
      return
1971

    
1972
    node = ninfo.name
1973
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1974

    
1975
    missing = nresult.get(constants.NV_BRIDGES, None)
1976
    test = not isinstance(missing, list)
1977
    _ErrorIf(test, self.ENODENET, node,
1978
             "did not return valid bridge information")
1979
    if not test:
1980
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1981
               utils.CommaJoin(sorted(missing)))
1982

    
1983
  def _VerifyNodeNetwork(self, ninfo, nresult):
1984
    """Check the node network connectivity results.
1985

1986
    @type ninfo: L{objects.Node}
1987
    @param ninfo: the node to check
1988
    @param nresult: the remote results for the node
1989

1990
    """
1991
    node = ninfo.name
1992
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1993

    
1994
    test = constants.NV_NODELIST not in nresult
1995
    _ErrorIf(test, self.ENODESSH, node,
1996
             "node hasn't returned node ssh connectivity data")
1997
    if not test:
1998
      if nresult[constants.NV_NODELIST]:
1999
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2000
          _ErrorIf(True, self.ENODESSH, node,
2001
                   "ssh communication with node '%s': %s", a_node, a_msg)
2002

    
2003
    test = constants.NV_NODENETTEST not in nresult
2004
    _ErrorIf(test, self.ENODENET, node,
2005
             "node hasn't returned node tcp connectivity data")
2006
    if not test:
2007
      if nresult[constants.NV_NODENETTEST]:
2008
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2009
        for anode in nlist:
2010
          _ErrorIf(True, self.ENODENET, node,
2011
                   "tcp communication with node '%s': %s",
2012
                   anode, nresult[constants.NV_NODENETTEST][anode])
2013

    
2014
    test = constants.NV_MASTERIP not in nresult
2015
    _ErrorIf(test, self.ENODENET, node,
2016
             "node hasn't returned node master IP reachability data")
2017
    if not test:
2018
      if not nresult[constants.NV_MASTERIP]:
2019
        if node == self.master_node:
2020
          msg = "the master node cannot reach the master IP (not configured?)"
2021
        else:
2022
          msg = "cannot reach the master IP"
2023
        _ErrorIf(True, self.ENODENET, node, msg)
2024

    
2025
  def _VerifyInstance(self, instance, instanceconfig, node_image,
2026
                      diskstatus):
2027
    """Verify an instance.
2028

2029
    This function checks to see if the required block devices are
2030
    available on the instance's node.
2031

2032
    """
2033
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2034
    node_current = instanceconfig.primary_node
2035

    
2036
    node_vol_should = {}
2037
    instanceconfig.MapLVsByNode(node_vol_should)
2038

    
2039
    for node in node_vol_should:
2040
      n_img = node_image[node]
2041
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2042
        # ignore missing volumes on offline or broken nodes
2043
        continue
2044
      for volume in node_vol_should[node]:
2045
        test = volume not in n_img.volumes
2046
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2047
                 "volume %s missing on node %s", volume, node)
2048

    
2049
    if instanceconfig.admin_up:
2050
      pri_img = node_image[node_current]
2051
      test = instance not in pri_img.instances and not pri_img.offline
2052
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
2053
               "instance not running on its primary node %s",
2054
               node_current)
2055

    
2056
    diskdata = [(nname, success, status, idx)
2057
                for (nname, disks) in diskstatus.items()
2058
                for idx, (success, status) in enumerate(disks)]
2059

    
2060
    for nname, success, bdev_status, idx in diskdata:
2061
      # the 'ghost node' construction in Exec() ensures that we have a
2062
      # node here
2063
      snode = node_image[nname]
2064
      bad_snode = snode.ghost or snode.offline
2065
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2066
               self.EINSTANCEFAULTYDISK, instance,
2067
               "couldn't retrieve status for disk/%s on %s: %s",
2068
               idx, nname, bdev_status)
2069
      _ErrorIf((instanceconfig.admin_up and success and
2070
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2071
               self.EINSTANCEFAULTYDISK, instance,
2072
               "disk/%s on %s is faulty", idx, nname)
2073

    
2074
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2075
    """Verify if there are any unknown volumes in the cluster.
2076

2077
    The .os, .swap and backup volumes are ignored. All other volumes are
2078
    reported as unknown.
2079

2080
    @type reserved: L{ganeti.utils.FieldSet}
2081
    @param reserved: a FieldSet of reserved volume names
2082

2083
    """
2084
    for node, n_img in node_image.items():
2085
      if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2086
          self.all_node_info[node].group != self.group_uuid):
2087
        # skip non-healthy nodes
2088
        continue
2089
      for volume in n_img.volumes:
2090
        test = ((node not in node_vol_should or
2091
                volume not in node_vol_should[node]) and
2092
                not reserved.Matches(volume))
2093
        self._ErrorIf(test, self.ENODEORPHANLV, node,
2094
                      "volume %s is unknown", volume)
2095

    
2096
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2097
    """Verify N+1 Memory Resilience.
2098

2099
    Check that if one single node dies we can still start all the
2100
    instances it was primary for.
2101

2102
    """
2103
    cluster_info = self.cfg.GetClusterInfo()
2104
    for node, n_img in node_image.items():
2105
      # This code checks that every node which is now listed as
2106
      # secondary has enough memory to host all instances it is
2107
      # supposed to should a single other node in the cluster fail.
2108
      # FIXME: not ready for failover to an arbitrary node
2109
      # FIXME: does not support file-backed instances
2110
      # WARNING: we currently take into account down instances as well
2111
      # as up ones, considering that even if they're down someone
2112
      # might want to start them even in the event of a node failure.
2113
      if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2114
        # we're skipping nodes marked offline and nodes in other groups from
2115
        # the N+1 warning, since most likely we don't have good memory
2116
        # infromation from them; we already list instances living on such
2117
        # nodes, and that's enough warning
2118
        continue
2119
      for prinode, instances in n_img.sbp.items():
2120
        needed_mem = 0
2121
        for instance in instances:
2122
          bep = cluster_info.FillBE(instance_cfg[instance])
2123
          if bep[constants.BE_AUTO_BALANCE]:
2124
            needed_mem += bep[constants.BE_MEMORY]
2125
        test = n_img.mfree < needed_mem
2126
        self._ErrorIf(test, self.ENODEN1, node,
2127
                      "not enough memory to accomodate instance failovers"
2128
                      " should node %s fail (%dMiB needed, %dMiB available)",
2129
                      prinode, needed_mem, n_img.mfree)
2130

    
2131
  @classmethod
2132
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2133
                   (files_all, files_opt, files_mc, files_vm)):
2134
    """Verifies file checksums collected from all nodes.
2135

2136
    @param errorif: Callback for reporting errors
2137
    @param nodeinfo: List of L{objects.Node} objects
2138
    @param master_node: Name of master node
2139
    @param all_nvinfo: RPC results
2140

2141
    """
2142
    # Define functions determining which nodes to consider for a file
2143
    files2nodefn = [
2144
      (files_all, None),
2145
      (files_mc, lambda node: (node.master_candidate or
2146
                               node.name == master_node)),
2147
      (files_vm, lambda node: node.vm_capable),
2148
      ]
2149

    
2150
    # Build mapping from filename to list of nodes which should have the file
2151
    nodefiles = {}
2152
    for (files, fn) in files2nodefn:
2153
      if fn is None:
2154
        filenodes = nodeinfo
2155
      else:
2156
        filenodes = filter(fn, nodeinfo)
2157
      nodefiles.update((filename,
2158
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2159
                       for filename in files)
2160

    
2161
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2162

    
2163
    fileinfo = dict((filename, {}) for filename in nodefiles)
2164
    ignore_nodes = set()
2165

    
2166
    for node in nodeinfo:
2167
      if node.offline:
2168
        ignore_nodes.add(node.name)
2169
        continue
2170

    
2171
      nresult = all_nvinfo[node.name]
2172

    
2173
      if nresult.fail_msg or not nresult.payload:
2174
        node_files = None
2175
      else:
2176
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2177

    
2178
      test = not (node_files and isinstance(node_files, dict))
2179
      errorif(test, cls.ENODEFILECHECK, node.name,
2180
              "Node did not return file checksum data")
2181
      if test:
2182
        ignore_nodes.add(node.name)
2183
        continue
2184

    
2185
      # Build per-checksum mapping from filename to nodes having it
2186
      for (filename, checksum) in node_files.items():
2187
        assert filename in nodefiles
2188
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2189

    
2190
    for (filename, checksums) in fileinfo.items():
2191
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2192

    
2193
      # Nodes having the file
2194
      with_file = frozenset(node_name
2195
                            for nodes in fileinfo[filename].values()
2196
                            for node_name in nodes) - ignore_nodes
2197

    
2198
      expected_nodes = nodefiles[filename] - ignore_nodes
2199

    
2200
      # Nodes missing file
2201
      missing_file = expected_nodes - with_file
2202

    
2203
      if filename in files_opt:
2204
        # All or no nodes
2205
        errorif(missing_file and missing_file != expected_nodes,
2206
                cls.ECLUSTERFILECHECK, None,
2207
                "File %s is optional, but it must exist on all or no"
2208
                " nodes (not found on %s)",
2209
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2210
      else:
2211
        # Non-optional files
2212
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2213
                "File %s is missing from node(s) %s", filename,
2214
                utils.CommaJoin(utils.NiceSort(missing_file)))
2215

    
2216
        # Warn if a node has a file it shouldn't
2217
        unexpected = with_file - expected_nodes
2218
        errorif(unexpected,
2219
                cls.ECLUSTERFILECHECK, None,
2220
                "File %s should not exist on node(s) %s",
2221
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2222

    
2223
      # See if there are multiple versions of the file
2224
      test = len(checksums) > 1
2225
      if test:
2226
        variants = ["variant %s on %s" %
2227
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2228
                    for (idx, (checksum, nodes)) in
2229
                      enumerate(sorted(checksums.items()))]
2230
      else:
2231
        variants = []
2232

    
2233
      errorif(test, cls.ECLUSTERFILECHECK, None,
2234
              "File %s found with %s different checksums (%s)",
2235
              filename, len(checksums), "; ".join(variants))
2236

    
2237
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2238
                      drbd_map):
2239
    """Verifies and the node DRBD status.
2240

2241
    @type ninfo: L{objects.Node}
2242
    @param ninfo: the node to check
2243
    @param nresult: the remote results for the node
2244
    @param instanceinfo: the dict of instances
2245
    @param drbd_helper: the configured DRBD usermode helper
2246
    @param drbd_map: the DRBD map as returned by
2247
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2248

2249
    """
2250
    node = ninfo.name
2251
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2252

    
2253
    if drbd_helper:
2254
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2255
      test = (helper_result == None)
2256
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2257
               "no drbd usermode helper returned")
2258
      if helper_result:
2259
        status, payload = helper_result
2260
        test = not status
2261
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2262
                 "drbd usermode helper check unsuccessful: %s", payload)
2263
        test = status and (payload != drbd_helper)
2264
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2265
                 "wrong drbd usermode helper: %s", payload)
2266

    
2267
    # compute the DRBD minors
2268
    node_drbd = {}
2269
    for minor, instance in drbd_map[node].items():
2270
      test = instance not in instanceinfo
2271
      _ErrorIf(test, self.ECLUSTERCFG, None,
2272
               "ghost instance '%s' in temporary DRBD map", instance)
2273
        # ghost instance should not be running, but otherwise we
2274
        # don't give double warnings (both ghost instance and
2275
        # unallocated minor in use)
2276
      if test:
2277
        node_drbd[minor] = (instance, False)
2278
      else:
2279
        instance = instanceinfo[instance]
2280
        node_drbd[minor] = (instance.name, instance.admin_up)
2281

    
2282
    # and now check them
2283
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2284
    test = not isinstance(used_minors, (tuple, list))
2285
    _ErrorIf(test, self.ENODEDRBD, node,
2286
             "cannot parse drbd status file: %s", str(used_minors))
2287
    if test:
2288
      # we cannot check drbd status
2289
      return
2290

    
2291
    for minor, (iname, must_exist) in node_drbd.items():
2292
      test = minor not in used_minors and must_exist
2293
      _ErrorIf(test, self.ENODEDRBD, node,
2294
               "drbd minor %d of instance %s is not active", minor, iname)
2295
    for minor in used_minors:
2296
      test = minor not in node_drbd
2297
      _ErrorIf(test, self.ENODEDRBD, node,
2298
               "unallocated drbd minor %d is in use", minor)
2299

    
2300
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2301
    """Builds the node OS structures.
2302

2303
    @type ninfo: L{objects.Node}
2304
    @param ninfo: the node to check
2305
    @param nresult: the remote results for the node
2306
    @param nimg: the node image object
2307

2308
    """
2309
    node = ninfo.name
2310
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2311

    
2312
    remote_os = nresult.get(constants.NV_OSLIST, None)
2313
    test = (not isinstance(remote_os, list) or
2314
            not compat.all(isinstance(v, list) and len(v) == 7
2315
                           for v in remote_os))
2316

    
2317
    _ErrorIf(test, self.ENODEOS, node,
2318
             "node hasn't returned valid OS data")
2319

    
2320
    nimg.os_fail = test
2321

    
2322
    if test:
2323
      return
2324

    
2325
    os_dict = {}
2326

    
2327
    for (name, os_path, status, diagnose,
2328
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2329

    
2330
      if name not in os_dict:
2331
        os_dict[name] = []
2332

    
2333
      # parameters is a list of lists instead of list of tuples due to
2334
      # JSON lacking a real tuple type, fix it:
2335
      parameters = [tuple(v) for v in parameters]
2336
      os_dict[name].append((os_path, status, diagnose,
2337
                            set(variants), set(parameters), set(api_ver)))
2338

    
2339
    nimg.oslist = os_dict
2340

    
2341
  def _VerifyNodeOS(self, ninfo, nimg, base):
2342
    """Verifies the node OS list.
2343

2344
    @type ninfo: L{objects.Node}
2345
    @param ninfo: the node to check
2346
    @param nimg: the node image object
2347
    @param base: the 'template' node we match against (e.g. from the master)
2348

2349
    """
2350
    node = ninfo.name
2351
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2352

    
2353
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2354

    
2355
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2356
    for os_name, os_data in nimg.oslist.items():
2357
      assert os_data, "Empty OS status for OS %s?!" % os_name
2358
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2359
      _ErrorIf(not f_status, self.ENODEOS, node,
2360
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2361
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2362
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2363
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2364
      # comparisons with the 'base' image
2365
      test = os_name not in base.oslist
2366
      _ErrorIf(test, self.ENODEOS, node,
2367
               "Extra OS %s not present on reference node (%s)",
2368
               os_name, base.name)
2369
      if test:
2370
        continue
2371
      assert base.oslist[os_name], "Base node has empty OS status?"
2372
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2373
      if not b_status:
2374
        # base OS is invalid, skipping
2375
        continue
2376
      for kind, a, b in [("API version", f_api, b_api),
2377
                         ("variants list", f_var, b_var),
2378
                         ("parameters", beautify_params(f_param),
2379
                          beautify_params(b_param))]:
2380
        _ErrorIf(a != b, self.ENODEOS, node,
2381
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2382
                 kind, os_name, base.name,
2383
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2384

    
2385
    # check any missing OSes
2386
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2387
    _ErrorIf(missing, self.ENODEOS, node,
2388
             "OSes present on reference node %s but missing on this node: %s",
2389
             base.name, utils.CommaJoin(missing))
2390

    
2391
  def _VerifyOob(self, ninfo, nresult):
2392
    """Verifies out of band functionality of a node.
2393

2394
    @type ninfo: L{objects.Node}
2395
    @param ninfo: the node to check
2396
    @param nresult: the remote results for the node
2397

2398
    """
2399
    node = ninfo.name
2400
    # We just have to verify the paths on master and/or master candidates
2401
    # as the oob helper is invoked on the master
2402
    if ((ninfo.master_candidate or ninfo.master_capable) and
2403
        constants.NV_OOB_PATHS in nresult):
2404
      for path_result in nresult[constants.NV_OOB_PATHS]:
2405
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2406

    
2407
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2408
    """Verifies and updates the node volume data.
2409

2410
    This function will update a L{NodeImage}'s internal structures
2411
    with data from the remote call.
2412

2413
    @type ninfo: L{objects.Node}
2414
    @param ninfo: the node to check
2415
    @param nresult: the remote results for the node
2416
    @param nimg: the node image object
2417
    @param vg_name: the configured VG name
2418

2419
    """
2420
    node = ninfo.name
2421
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2422

    
2423
    nimg.lvm_fail = True
2424
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2425
    if vg_name is None:
2426
      pass
2427
    elif isinstance(lvdata, basestring):
2428
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2429
               utils.SafeEncode(lvdata))
2430
    elif not isinstance(lvdata, dict):
2431
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2432
    else:
2433
      nimg.volumes = lvdata
2434
      nimg.lvm_fail = False
2435

    
2436
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2437
    """Verifies and updates the node instance list.
2438

2439
    If the listing was successful, then updates this node's instance
2440
    list. Otherwise, it marks the RPC call as failed for the instance
2441
    list key.
2442

2443
    @type ninfo: L{objects.Node}
2444
    @param ninfo: the node to check
2445
    @param nresult: the remote results for the node
2446
    @param nimg: the node image object
2447

2448
    """
2449
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2450
    test = not isinstance(idata, list)
2451
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2452
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2453
    if test:
2454
      nimg.hyp_fail = True
2455
    else:
2456
      nimg.instances = idata
2457

    
2458
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2459
    """Verifies and computes a node information map
2460

2461
    @type ninfo: L{objects.Node}
2462
    @param ninfo: the node to check
2463
    @param nresult: the remote results for the node
2464
    @param nimg: the node image object
2465
    @param vg_name: the configured VG name
2466

2467
    """
2468
    node = ninfo.name
2469
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2470

    
2471
    # try to read free memory (from the hypervisor)
2472
    hv_info = nresult.get(constants.NV_HVINFO, None)
2473
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2474
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2475
    if not test:
2476
      try:
2477
        nimg.mfree = int(hv_info["memory_free"])
2478
      except (ValueError, TypeError):
2479
        _ErrorIf(True, self.ENODERPC, node,
2480
                 "node returned invalid nodeinfo, check hypervisor")
2481

    
2482
    # FIXME: devise a free space model for file based instances as well
2483
    if vg_name is not None:
2484
      test = (constants.NV_VGLIST not in nresult or
2485
              vg_name not in nresult[constants.NV_VGLIST])
2486
      _ErrorIf(test, self.ENODELVM, node,
2487
               "node didn't return data for the volume group '%s'"
2488
               " - it is either missing or broken", vg_name)
2489
      if not test:
2490
        try:
2491
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2492
        except (ValueError, TypeError):
2493
          _ErrorIf(True, self.ENODERPC, node,
2494
                   "node returned invalid LVM info, check LVM status")
2495

    
2496
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2497
    """Gets per-disk status information for all instances.
2498

2499
    @type nodelist: list of strings
2500
    @param nodelist: Node names
2501
    @type node_image: dict of (name, L{objects.Node})
2502
    @param node_image: Node objects
2503
    @type instanceinfo: dict of (name, L{objects.Instance})
2504
    @param instanceinfo: Instance objects
2505
    @rtype: {instance: {node: [(succes, payload)]}}
2506
    @return: a dictionary of per-instance dictionaries with nodes as
2507
        keys and disk information as values; the disk information is a
2508
        list of tuples (success, payload)
2509

2510
    """
2511
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2512

    
2513
    node_disks = {}
2514
    node_disks_devonly = {}
2515
    diskless_instances = set()
2516
    diskless = constants.DT_DISKLESS
2517

    
2518
    for nname in nodelist:
2519
      node_instances = list(itertools.chain(node_image[nname].pinst,
2520
                                            node_image[nname].sinst))
2521
      diskless_instances.update(inst for inst in node_instances
2522
                                if instanceinfo[inst].disk_template == diskless)
2523
      disks = [(inst, disk)
2524
               for inst in node_instances
2525
               for disk in instanceinfo[inst].disks]
2526

    
2527
      if not disks:
2528
        # No need to collect data
2529
        continue
2530

    
2531
      node_disks[nname] = disks
2532

    
2533
      # Creating copies as SetDiskID below will modify the objects and that can
2534
      # lead to incorrect data returned from nodes
2535
      devonly = [dev.Copy() for (_, dev) in disks]
2536

    
2537
      for dev in devonly:
2538
        self.cfg.SetDiskID(dev, nname)
2539

    
2540
      node_disks_devonly[nname] = devonly
2541

    
2542
    assert len(node_disks) == len(node_disks_devonly)
2543

    
2544
    # Collect data from all nodes with disks
2545
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2546
                                                          node_disks_devonly)
2547

    
2548
    assert len(result) == len(node_disks)
2549

    
2550
    instdisk = {}
2551

    
2552
    for (nname, nres) in result.items():
2553
      disks = node_disks[nname]
2554

    
2555
      if nres.offline:
2556
        # No data from this node
2557
        data = len(disks) * [(False, "node offline")]
2558
      else:
2559
        msg = nres.fail_msg
2560
        _ErrorIf(msg, self.ENODERPC, nname,
2561
                 "while getting disk information: %s", msg)
2562
        if msg:
2563
          # No data from this node
2564
          data = len(disks) * [(False, msg)]
2565
        else:
2566
          data = []
2567
          for idx, i in enumerate(nres.payload):
2568
            if isinstance(i, (tuple, list)) and len(i) == 2:
2569
              data.append(i)
2570
            else:
2571
              logging.warning("Invalid result from node %s, entry %d: %s",
2572
                              nname, idx, i)
2573
              data.append((False, "Invalid result from the remote node"))
2574

    
2575
      for ((inst, _), status) in zip(disks, data):
2576
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2577

    
2578
    # Add empty entries for diskless instances.
2579
    for inst in diskless_instances:
2580
      assert inst not in instdisk
2581
      instdisk[inst] = {}
2582

    
2583
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2584
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2585
                      compat.all(isinstance(s, (tuple, list)) and
2586
                                 len(s) == 2 for s in statuses)
2587
                      for inst, nnames in instdisk.items()
2588
                      for nname, statuses in nnames.items())
2589
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2590

    
2591
    return instdisk
2592

    
2593
  @staticmethod
2594
  def _SshNodeSelector(group_uuid, all_nodes):
2595
    """Create endless iterators for all potential SSH check hosts.
2596

2597
    """
2598
    nodes = [node for node in all_nodes
2599
             if (node.group != group_uuid and
2600
                 not node.offline)]
2601
    keyfunc = operator.attrgetter("group")
2602

    
2603
    return map(itertools.cycle,
2604
               [sorted(map(operator.attrgetter("name"), names))
2605
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2606
                                                  keyfunc)])
2607

    
2608
  @classmethod
2609
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2610
    """Choose which nodes should talk to which other nodes.
2611

2612
    We will make nodes contact all nodes in their group, and one node from
2613
    every other group.
2614

2615
    @warning: This algorithm has a known issue if one node group is much
2616
      smaller than others (e.g. just one node). In such a case all other
2617
      nodes will talk to the single node.
2618

2619
    """
2620
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2621
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2622

    
2623
    return (online_nodes,
2624
            dict((name, sorted([i.next() for i in sel]))
2625
                 for name in online_nodes))
2626

    
2627
  def BuildHooksEnv(self):
2628
    """Build hooks env.
2629

2630
    Cluster-Verify hooks just ran in the post phase and their failure makes
2631
    the output be logged in the verify output and the verification to fail.
2632

2633
    """
2634
    env = {
2635
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2636
      }
2637

    
2638
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2639
               for node in self.my_node_info.values())
2640

    
2641
    return env
2642

    
2643
  def BuildHooksNodes(self):
2644
    """Build hooks nodes.
2645

2646
    """
2647
    return ([], self.my_node_names)
2648

    
2649
  def Exec(self, feedback_fn):
2650
    """Verify integrity of the node group, performing various test on nodes.
2651

2652
    """
2653
    # This method has too many local variables. pylint: disable=R0914
2654
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2655

    
2656
    if not self.my_node_names:
2657
      # empty node group
2658
      feedback_fn("* Empty node group, skipping verification")
2659
      return True
2660

    
2661
    self.bad = False
2662
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2663
    verbose = self.op.verbose
2664
    self._feedback_fn = feedback_fn
2665

    
2666
    vg_name = self.cfg.GetVGName()
2667
    drbd_helper = self.cfg.GetDRBDHelper()
2668
    cluster = self.cfg.GetClusterInfo()
2669
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2670
    hypervisors = cluster.enabled_hypervisors
2671
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2672

    
2673
    i_non_redundant = [] # Non redundant instances
2674
    i_non_a_balanced = [] # Non auto-balanced instances
2675
    n_offline = 0 # Count of offline nodes
2676
    n_drained = 0 # Count of nodes being drained
2677
    node_vol_should = {}
2678

    
2679
    # FIXME: verify OS list
2680

    
2681
    # File verification
2682
    filemap = _ComputeAncillaryFiles(cluster, False)
2683

    
2684
    # do local checksums
2685
    master_node = self.master_node = self.cfg.GetMasterNode()
2686
    master_ip = self.cfg.GetMasterIP()
2687

    
2688
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2689

    
2690
    node_verify_param = {
2691
      constants.NV_FILELIST:
2692
        utils.UniqueSequence(filename
2693
                             for files in filemap
2694
                             for filename in files),
2695
      constants.NV_NODELIST:
2696
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2697
                                  self.all_node_info.values()),
2698
      constants.NV_HYPERVISOR: hypervisors,
2699
      constants.NV_HVPARAMS:
2700
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2701
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2702
                                 for node in node_data_list
2703
                                 if not node.offline],
2704
      constants.NV_INSTANCELIST: hypervisors,
2705
      constants.NV_VERSION: None,
2706
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2707
      constants.NV_NODESETUP: None,
2708
      constants.NV_TIME: None,
2709
      constants.NV_MASTERIP: (master_node, master_ip),
2710
      constants.NV_OSLIST: None,
2711
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2712
      }
2713

    
2714
    if vg_name is not None:
2715
      node_verify_param[constants.NV_VGLIST] = None
2716
      node_verify_param[constants.NV_LVLIST] = vg_name
2717
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2718
      node_verify_param[constants.NV_DRBDLIST] = None
2719

    
2720
    if drbd_helper:
2721
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2722

    
2723
    # bridge checks
2724
    # FIXME: this needs to be changed per node-group, not cluster-wide
2725
    bridges = set()
2726
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2727
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2728
      bridges.add(default_nicpp[constants.NIC_LINK])
2729
    for instance in self.my_inst_info.values():
2730
      for nic in instance.nics:
2731
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2732
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2733
          bridges.add(full_nic[constants.NIC_LINK])
2734

    
2735
    if bridges:
2736
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2737

    
2738
    # Build our expected cluster state
2739
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2740
                                                 name=node.name,
2741
                                                 vm_capable=node.vm_capable))
2742
                      for node in node_data_list)
2743

    
2744
    # Gather OOB paths
2745
    oob_paths = []
2746
    for node in self.all_node_info.values():
2747
      path = _SupportsOob(self.cfg, node)
2748
      if path and path not in oob_paths:
2749
        oob_paths.append(path)
2750

    
2751
    if oob_paths:
2752
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2753

    
2754
    for instance in self.my_inst_names:
2755
      inst_config = self.my_inst_info[instance]
2756

    
2757
      for nname in inst_config.all_nodes:
2758
        if nname not in node_image:
2759
          gnode = self.NodeImage(name=nname)
2760
          gnode.ghost = (nname not in self.all_node_info)
2761
          node_image[nname] = gnode
2762

    
2763
      inst_config.MapLVsByNode(node_vol_should)
2764

    
2765
      pnode = inst_config.primary_node
2766
      node_image[pnode].pinst.append(instance)
2767

    
2768
      for snode in inst_config.secondary_nodes:
2769
        nimg = node_image[snode]
2770
        nimg.sinst.append(instance)
2771
        if pnode not in nimg.sbp:
2772
          nimg.sbp[pnode] = []
2773
        nimg.sbp[pnode].append(instance)
2774

    
2775
    # At this point, we have the in-memory data structures complete,
2776
    # except for the runtime information, which we'll gather next
2777

    
2778
    # Due to the way our RPC system works, exact response times cannot be
2779
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2780
    # time before and after executing the request, we can at least have a time
2781
    # window.
2782
    nvinfo_starttime = time.time()
2783
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2784
                                           node_verify_param,
2785
                                           self.cfg.GetClusterName())
2786
    nvinfo_endtime = time.time()
2787

    
2788
    if self.extra_lv_nodes and vg_name is not None:
2789
      extra_lv_nvinfo = \
2790
          self.rpc.call_node_verify(self.extra_lv_nodes,
2791
                                    {constants.NV_LVLIST: vg_name},
2792
                                    self.cfg.GetClusterName())
2793
    else:
2794
      extra_lv_nvinfo = {}
2795

    
2796
    all_drbd_map = self.cfg.ComputeDRBDMap()
2797

    
2798
    feedback_fn("* Gathering disk information (%s nodes)" %
2799
                len(self.my_node_names))
2800
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2801
                                     self.my_inst_info)
2802

    
2803
    feedback_fn("* Verifying configuration file consistency")
2804

    
2805
    # If not all nodes are being checked, we need to make sure the master node
2806
    # and a non-checked vm_capable node are in the list.
2807
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2808
    if absent_nodes:
2809
      vf_nvinfo = all_nvinfo.copy()
2810
      vf_node_info = list(self.my_node_info.values())
2811
      additional_nodes = []
2812
      if master_node not in self.my_node_info:
2813
        additional_nodes.append(master_node)
2814
        vf_node_info.append(self.all_node_info[master_node])
2815
      # Add the first vm_capable node we find which is not included
2816
      for node in absent_nodes:
2817
        nodeinfo = self.all_node_info[node]
2818
        if nodeinfo.vm_capable and not nodeinfo.offline:
2819
          additional_nodes.append(node)
2820
          vf_node_info.append(self.all_node_info[node])
2821
          break
2822
      key = constants.NV_FILELIST
2823
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2824
                                                 {key: node_verify_param[key]},
2825
                                                 self.cfg.GetClusterName()))
2826
    else:
2827
      vf_nvinfo = all_nvinfo
2828
      vf_node_info = self.my_node_info.values()
2829

    
2830
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2831

    
2832
    feedback_fn("* Verifying node status")
2833

    
2834
    refos_img = None
2835

    
2836
    for node_i in node_data_list:
2837
      node = node_i.name
2838
      nimg = node_image[node]
2839

    
2840
      if node_i.offline:
2841
        if verbose:
2842
          feedback_fn("* Skipping offline node %s" % (node,))
2843
        n_offline += 1
2844
        continue
2845

    
2846
      if node == master_node:
2847
        ntype = "master"
2848
      elif node_i.master_candidate:
2849
        ntype = "master candidate"
2850
      elif node_i.drained:
2851
        ntype = "drained"
2852
        n_drained += 1
2853
      else:
2854
        ntype = "regular"
2855
      if verbose:
2856
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2857

    
2858
      msg = all_nvinfo[node].fail_msg
2859
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2860
      if msg:
2861
        nimg.rpc_fail = True
2862
        continue
2863

    
2864
      nresult = all_nvinfo[node].payload
2865

    
2866
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2867
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2868
      self._VerifyNodeNetwork(node_i, nresult)
2869
      self._VerifyOob(node_i, nresult)
2870

    
2871
      if nimg.vm_capable:
2872
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2873
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2874
                             all_drbd_map)
2875

    
2876
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2877
        self._UpdateNodeInstances(node_i, nresult, nimg)
2878
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2879
        self._UpdateNodeOS(node_i, nresult, nimg)
2880

    
2881
        if not nimg.os_fail:
2882
          if refos_img is None:
2883
            refos_img = nimg
2884
          self._VerifyNodeOS(node_i, nimg, refos_img)
2885
        self._VerifyNodeBridges(node_i, nresult, bridges)
2886

    
2887
        # Check whether all running instancies are primary for the node. (This
2888
        # can no longer be done from _VerifyInstance below, since some of the
2889
        # wrong instances could be from other node groups.)
2890
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2891

    
2892
        for inst in non_primary_inst:
2893
          test = inst in self.all_inst_info
2894
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2895
                   "instance should not run on node %s", node_i.name)
2896
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2897
                   "node is running unknown instance %s", inst)
2898

    
2899
    for node, result in extra_lv_nvinfo.items():
2900
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2901
                              node_image[node], vg_name)
2902

    
2903
    feedback_fn("* Verifying instance status")
2904
    for instance in self.my_inst_names:
2905
      if verbose:
2906
        feedback_fn("* Verifying instance %s" % instance)
2907
      inst_config = self.my_inst_info[instance]
2908
      self._VerifyInstance(instance, inst_config, node_image,
2909
                           instdisk[instance])
2910
      inst_nodes_offline = []
2911

    
2912
      pnode = inst_config.primary_node
2913
      pnode_img = node_image[pnode]
2914
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2915
               self.ENODERPC, pnode, "instance %s, connection to"
2916
               " primary node failed", instance)
2917

    
2918
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2919
               self.EINSTANCEBADNODE, instance,
2920
               "instance is marked as running and lives on offline node %s",
2921
               inst_config.primary_node)
2922

    
2923
      # If the instance is non-redundant we cannot survive losing its primary
2924
      # node, so we are not N+1 compliant. On the other hand we have no disk
2925
      # templates with more than one secondary so that situation is not well
2926
      # supported either.
2927
      # FIXME: does not support file-backed instances
2928
      if not inst_config.secondary_nodes:
2929
        i_non_redundant.append(instance)
2930

    
2931
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2932
               instance, "instance has multiple secondary nodes: %s",
2933
               utils.CommaJoin(inst_config.secondary_nodes),
2934
               code=self.ETYPE_WARNING)
2935

    
2936
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2937
        pnode = inst_config.primary_node
2938
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2939
        instance_groups = {}
2940

    
2941
        for node in instance_nodes:
2942
          instance_groups.setdefault(self.all_node_info[node].group,
2943
                                     []).append(node)
2944

    
2945
        pretty_list = [
2946
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2947
          # Sort so that we always list the primary node first.
2948
          for group, nodes in sorted(instance_groups.items(),
2949
                                     key=lambda (_, nodes): pnode in nodes,
2950
                                     reverse=True)]
2951

    
2952
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2953
                      instance, "instance has primary and secondary nodes in"
2954
                      " different groups: %s", utils.CommaJoin(pretty_list),
2955
                      code=self.ETYPE_WARNING)
2956

    
2957
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2958
        i_non_a_balanced.append(instance)
2959

    
2960
      for snode in inst_config.secondary_nodes:
2961
        s_img = node_image[snode]
2962
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2963
                 "instance %s, connection to secondary node failed", instance)
2964

    
2965
        if s_img.offline:
2966
          inst_nodes_offline.append(snode)
2967

    
2968
      # warn that the instance lives on offline nodes
2969
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2970
               "instance has offline secondary node(s) %s",
2971
               utils.CommaJoin(inst_nodes_offline))
2972
      # ... or ghost/non-vm_capable nodes
2973
      for node in inst_config.all_nodes:
2974
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2975
                 "instance lives on ghost node %s", node)
2976
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2977
                 instance, "instance lives on non-vm_capable node %s", node)
2978

    
2979
    feedback_fn("* Verifying orphan volumes")
2980
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2981

    
2982
    # We will get spurious "unknown volume" warnings if any node of this group
2983
    # is secondary for an instance whose primary is in another group. To avoid
2984
    # them, we find these instances and add their volumes to node_vol_should.
2985
    for inst in self.all_inst_info.values():
2986
      for secondary in inst.secondary_nodes:
2987
        if (secondary in self.my_node_info
2988
            and inst.name not in self.my_inst_info):
2989
          inst.MapLVsByNode(node_vol_should)
2990
          break
2991

    
2992
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2993

    
2994
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2995
      feedback_fn("* Verifying N+1 Memory redundancy")
2996
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2997

    
2998
    feedback_fn("* Other Notes")
2999
    if i_non_redundant:
3000
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3001
                  % len(i_non_redundant))
3002

    
3003
    if i_non_a_balanced:
3004
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3005
                  % len(i_non_a_balanced))
3006

    
3007
    if n_offline:
3008
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3009

    
3010
    if n_drained:
3011
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3012

    
3013
    return not self.bad
3014

    
3015
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3016
    """Analyze the post-hooks' result
3017

3018
    This method analyses the hook result, handles it, and sends some
3019
    nicely-formatted feedback back to the user.
3020

3021
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
3022
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3023
    @param hooks_results: the results of the multi-node hooks rpc call
3024
    @param feedback_fn: function used send feedback back to the caller
3025
    @param lu_result: previous Exec result
3026
    @return: the new Exec result, based on the previous result
3027
        and hook results
3028

3029
    """
3030
    # We only really run POST phase hooks, only for non-empty groups,
3031
    # and are only interested in their results
3032
    if not self.my_node_names:
3033
      # empty node group
3034
      pass
3035
    elif phase == constants.HOOKS_PHASE_POST:
3036
      # Used to change hooks' output to proper indentation
3037
      feedback_fn("* Hooks Results")
3038
      assert hooks_results, "invalid result from hooks"
3039

    
3040
      for node_name in hooks_results:
3041
        res = hooks_results[node_name]
3042
        msg = res.fail_msg
3043
        test = msg and not res.offline
3044
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
3045
                      "Communication failure in hooks execution: %s", msg)
3046
        if res.offline or msg:
3047
          # No need to investigate payload if node is offline or gave
3048
          # an error.
3049
          continue
3050
        for script, hkr, output in res.payload:
3051
          test = hkr == constants.HKR_FAIL
3052
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
3053
                        "Script %s failed, output:", script)
3054
          if test:
3055
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3056
            feedback_fn("%s" % output)
3057
            lu_result = False
3058

    
3059
    return lu_result
3060

    
3061

    
3062
class LUClusterVerifyDisks(NoHooksLU):
3063
  """Verifies the cluster disks status.
3064

3065
  """
3066
  REQ_BGL = False
3067

    
3068
  def ExpandNames(self):
3069
    self.share_locks = _ShareAll()
3070
    self.needed_locks = {
3071
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3072
      }
3073

    
3074
  def Exec(self, feedback_fn):
3075
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3076

    
3077
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3078
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3079
                           for group in group_names])
3080

    
3081

    
3082
class LUGroupVerifyDisks(NoHooksLU):
3083
  """Verifies the status of all disks in a node group.
3084

3085
  """
3086
  REQ_BGL = False
3087

    
3088
  def ExpandNames(self):
3089
    # Raises errors.OpPrereqError on its own if group can't be found
3090
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3091

    
3092
    self.share_locks = _ShareAll()
3093
    self.needed_locks = {
3094
      locking.LEVEL_INSTANCE: [],
3095
      locking.LEVEL_NODEGROUP: [],
3096
      locking.LEVEL_NODE: [],
3097
      }
3098

    
3099
  def DeclareLocks(self, level):
3100
    if level == locking.LEVEL_INSTANCE:
3101
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3102

    
3103
      # Lock instances optimistically, needs verification once node and group
3104
      # locks have been acquired
3105
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3106
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3107

    
3108
    elif level == locking.LEVEL_NODEGROUP:
3109
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3110

    
3111
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3112
        set([self.group_uuid] +
3113
            # Lock all groups used by instances optimistically; this requires
3114
            # going via the node before it's locked, requiring verification
3115
            # later on
3116
            [group_uuid
3117
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3118
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3119

    
3120
    elif level == locking.LEVEL_NODE:
3121
      # This will only lock the nodes in the group to be verified which contain
3122
      # actual instances
3123
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3124
      self._LockInstancesNodes()
3125

    
3126
      # Lock all nodes in group to be verified
3127
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3128
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3129
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3130

    
3131
  def CheckPrereq(self):
3132
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3133
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3134
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3135

    
3136
    assert self.group_uuid in owned_groups
3137

    
3138
    # Check if locked instances are still correct
3139
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3140

    
3141
    # Get instance information
3142
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3143

    
3144
    # Check if node groups for locked instances are still correct
3145
    _CheckInstancesNodeGroups(self.cfg, self.instances,
3146
                              owned_groups, owned_nodes, self.group_uuid)
3147

    
3148
  def Exec(self, feedback_fn):
3149
    """Verify integrity of cluster disks.
3150

3151
    @rtype: tuple of three items
3152
    @return: a tuple of (dict of node-to-node_error, list of instances
3153
        which need activate-disks, dict of instance: (node, volume) for
3154
        missing volumes
3155

3156
    """
3157
    res_nodes = {}
3158
    res_instances = set()
3159
    res_missing = {}
3160

    
3161
    nv_dict = _MapInstanceDisksToNodes([inst
3162
                                        for inst in self.instances.values()
3163
                                        if inst.admin_up])
3164

    
3165
    if nv_dict:
3166
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3167
                             set(self.cfg.GetVmCapableNodeList()))
3168

    
3169
      node_lvs = self.rpc.call_lv_list(nodes, [])
3170

    
3171
      for (node, node_res) in node_lvs.items():
3172
        if node_res.offline:
3173
          continue
3174

    
3175
        msg = node_res.fail_msg
3176
        if msg:
3177
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3178
          res_nodes[node] = msg
3179
          continue
3180

    
3181
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3182
          inst = nv_dict.pop((node, lv_name), None)
3183
          if not (lv_online or inst is None):
3184
            res_instances.add(inst)
3185

    
3186
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3187
      # better
3188
      for key, inst in nv_dict.iteritems():
3189
        res_missing.setdefault(inst, []).append(list(key))
3190

    
3191
    return (res_nodes, list(res_instances), res_missing)
3192

    
3193

    
3194
class LUClusterRepairDiskSizes(NoHooksLU):
3195
  """Verifies the cluster disks sizes.
3196

3197
  """
3198
  REQ_BGL = False
3199

    
3200
  def ExpandNames(self):
3201
    if self.op.instances:
3202
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3203
      self.needed_locks = {
3204
        locking.LEVEL_NODE: [],
3205
        locking.LEVEL_INSTANCE: self.wanted_names,
3206
        }
3207
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3208
    else:
3209
      self.wanted_names = None
3210
      self.needed_locks = {
3211
        locking.LEVEL_NODE: locking.ALL_SET,
3212
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3213
        }
3214
    self.share_locks = {
3215
      locking.LEVEL_NODE: 1,
3216
      locking.LEVEL_INSTANCE: 0,
3217
      }
3218

    
3219
  def DeclareLocks(self, level):
3220
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3221
      self._LockInstancesNodes(primary_only=True)
3222

    
3223
  def CheckPrereq(self):
3224
    """Check prerequisites.
3225

3226
    This only checks the optional instance list against the existing names.
3227

3228
    """
3229
    if self.wanted_names is None:
3230
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3231

    
3232
    self.wanted_instances = \
3233
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3234

    
3235
  def _EnsureChildSizes(self, disk):
3236
    """Ensure children of the disk have the needed disk size.
3237

3238
    This is valid mainly for DRBD8 and fixes an issue where the
3239
    children have smaller disk size.
3240

3241
    @param disk: an L{ganeti.objects.Disk} object
3242

3243
    """
3244
    if disk.dev_type == constants.LD_DRBD8:
3245
      assert disk.children, "Empty children for DRBD8?"
3246
      fchild = disk.children[0]
3247
      mismatch = fchild.size < disk.size
3248
      if mismatch:
3249
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3250
                     fchild.size, disk.size)
3251
        fchild.size = disk.size
3252

    
3253
      # and we recurse on this child only, not on the metadev
3254
      return self._EnsureChildSizes(fchild) or mismatch
3255
    else:
3256
      return False
3257

    
3258
  def Exec(self, feedback_fn):
3259
    """Verify the size of cluster disks.
3260

3261
    """
3262
    # TODO: check child disks too
3263
    # TODO: check differences in size between primary/secondary nodes
3264
    per_node_disks = {}
3265
    for instance in self.wanted_instances:
3266
      pnode = instance.primary_node
3267
      if pnode not in per_node_disks:
3268
        per_node_disks[pnode] = []
3269
      for idx, disk in enumerate(instance.disks):
3270
        per_node_disks[pnode].append((instance, idx, disk))
3271

    
3272
    changed = []
3273
    for node, dskl in per_node_disks.items():
3274
      newl = [v[2].Copy() for v in dskl]
3275
      for dsk in newl:
3276
        self.cfg.SetDiskID(dsk, node)
3277
      result = self.rpc.call_blockdev_getsize(node, newl)
3278
      if result.fail_msg:
3279
        self.LogWarning("Failure in blockdev_getsize call to node"
3280
                        " %s, ignoring", node)
3281
        continue
3282
      if len(result.payload) != len(dskl):
3283
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3284
                        " result.payload=%s", node, len(dskl), result.payload)
3285
        self.LogWarning("Invalid result from node %s, ignoring node results",
3286
                        node)
3287
        continue
3288
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3289
        if size is None:
3290
          self.LogWarning("Disk %d of instance %s did not return size"
3291
                          " information, ignoring", idx, instance.name)
3292
          continue
3293
        if not isinstance(size, (int, long)):
3294
          self.LogWarning("Disk %d of instance %s did not return valid"
3295
                          " size information, ignoring", idx, instance.name)
3296
          continue
3297
        size = size >> 20
3298
        if size != disk.size:
3299
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3300
                       " correcting: recorded %d, actual %d", idx,
3301
                       instance.name, disk.size, size)
3302
          disk.size = size
3303
          self.cfg.Update(instance, feedback_fn)
3304
          changed.append((instance.name, idx, size))
3305
        if self._EnsureChildSizes(disk):
3306
          self.cfg.Update(instance, feedback_fn)
3307
          changed.append((instance.name, idx, disk.size))
3308
    return changed
3309

    
3310

    
3311
class LUClusterRename(LogicalUnit):
3312
  """Rename the cluster.
3313

3314
  """
3315
  HPATH = "cluster-rename"
3316
  HTYPE = constants.HTYPE_CLUSTER
3317

    
3318
  def BuildHooksEnv(self):
3319
    """Build hooks env.
3320

3321
    """
3322
    return {
3323
      "OP_TARGET": self.cfg.GetClusterName(),
3324
      "NEW_NAME": self.op.name,
3325
      }
3326

    
3327
  def BuildHooksNodes(self):
3328
    """Build hooks nodes.
3329

3330
    """
3331
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3332

    
3333
  def CheckPrereq(self):
3334
    """Verify that the passed name is a valid one.
3335

3336
    """
3337
    hostname = netutils.GetHostname(name=self.op.name,
3338
                                    family=self.cfg.GetPrimaryIPFamily())
3339

    
3340
    new_name = hostname.name
3341
    self.ip = new_ip = hostname.ip
3342
    old_name = self.cfg.GetClusterName()
3343
    old_ip = self.cfg.GetMasterIP()
3344
    if new_name == old_name and new_ip == old_ip:
3345
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3346
                                 " cluster has changed",
3347
                                 errors.ECODE_INVAL)
3348
    if new_ip != old_ip:
3349
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3350
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3351
                                   " reachable on the network" %
3352
                                   new_ip, errors.ECODE_NOTUNIQUE)
3353

    
3354
    self.op.name = new_name
3355

    
3356
  def Exec(self, feedback_fn):
3357
    """Rename the cluster.
3358

3359
    """
3360
    clustername = self.op.name
3361
    ip = self.ip
3362

    
3363
    # shutdown the master IP
3364
    master = self.cfg.GetMasterNode()
3365
    result = self.rpc.call_node_deactivate_master_ip(master)
3366
    result.Raise("Could not disable the master role")
3367

    
3368
    try:
3369
      cluster = self.cfg.GetClusterInfo()
3370
      cluster.cluster_name = clustername
3371
      cluster.master_ip = ip
3372
      self.cfg.Update(cluster, feedback_fn)
3373

    
3374
      # update the known hosts file
3375
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3376
      node_list = self.cfg.GetOnlineNodeList()
3377
      try:
3378
        node_list.remove(master)
3379
      except ValueError:
3380
        pass
3381
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3382
    finally:
3383
      result = self.rpc.call_node_activate_master_ip(master)
3384
      msg = result.fail_msg
3385
      if msg:
3386
        self.LogWarning("Could not re-enable the master role on"
3387
                        " the master, please restart manually: %s", msg)
3388

    
3389
    return clustername
3390

    
3391

    
3392
class LUClusterSetParams(LogicalUnit):
3393
  """Change the parameters of the cluster.
3394

3395
  """
3396
  HPATH = "cluster-modify"
3397
  HTYPE = constants.HTYPE_CLUSTER
3398
  REQ_BGL = False
3399

    
3400
  def CheckArguments(self):
3401
    """Check parameters
3402

3403
    """
3404
    if self.op.uid_pool:
3405
      uidpool.CheckUidPool(self.op.uid_pool)
3406

    
3407
    if self.op.add_uids:
3408
      uidpool.CheckUidPool(self.op.add_uids)
3409

    
3410
    if self.op.remove_uids:
3411
      uidpool.CheckUidPool(self.op.remove_uids)
3412

    
3413
  def ExpandNames(self):
3414
    # FIXME: in the future maybe other cluster params won't require checking on
3415
    # all nodes to be modified.
3416
    self.needed_locks = {
3417
      locking.LEVEL_NODE: locking.ALL_SET,
3418
    }
3419
    self.share_locks[locking.LEVEL_NODE] = 1
3420

    
3421
  def BuildHooksEnv(self):
3422
    """Build hooks env.
3423

3424
    """
3425
    return {
3426
      "OP_TARGET": self.cfg.GetClusterName(),
3427
      "NEW_VG_NAME": self.op.vg_name,
3428
      }
3429

    
3430
  def BuildHooksNodes(self):
3431
    """Build hooks nodes.
3432

3433
    """
3434
    mn = self.cfg.GetMasterNode()
3435
    return ([mn], [mn])
3436

    
3437
  def CheckPrereq(self):
3438
    """Check prerequisites.
3439

3440
    This checks whether the given params don't conflict and
3441
    if the given volume group is valid.
3442

3443
    """
3444
    if self.op.vg_name is not None and not self.op.vg_name:
3445
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3446
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3447
                                   " instances exist", errors.ECODE_INVAL)
3448

    
3449
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3450
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3451
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3452
                                   " drbd-based instances exist",
3453
                                   errors.ECODE_INVAL)
3454

    
3455
    node_list = self.owned_locks(locking.LEVEL_NODE)
3456

    
3457
    # if vg_name not None, checks given volume group on all nodes
3458
    if self.op.vg_name:
3459
      vglist = self.rpc.call_vg_list(node_list)
3460
      for node in node_list:
3461
        msg = vglist[node].fail_msg
3462
        if msg:
3463
          # ignoring down node
3464
          self.LogWarning("Error while gathering data on node %s"
3465
                          " (ignoring node): %s", node, msg)
3466
          continue
3467
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3468
                                              self.op.vg_name,
3469
                                              constants.MIN_VG_SIZE)
3470
        if vgstatus:
3471
          raise errors.OpPrereqError("Error on node '%s': %s" %
3472
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3473

    
3474
    if self.op.drbd_helper:
3475
      # checks given drbd helper on all nodes
3476
      helpers = self.rpc.call_drbd_helper(node_list)
3477
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3478
        if ninfo.offline:
3479
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3480
          continue
3481
        msg = helpers[node].fail_msg
3482
        if msg:
3483
          raise errors.OpPrereqError("Error checking drbd helper on node"
3484
                                     " '%s': %s" % (node, msg),
3485
                                     errors.ECODE_ENVIRON)
3486
        node_helper = helpers[node].payload
3487
        if node_helper != self.op.drbd_helper:
3488
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3489
                                     (node, node_helper), errors.ECODE_ENVIRON)
3490

    
3491
    self.cluster = cluster = self.cfg.GetClusterInfo()
3492
    # validate params changes
3493
    if self.op.beparams:
3494
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3495
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3496

    
3497
    if self.op.ndparams:
3498
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3499
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3500

    
3501
      # TODO: we need a more general way to handle resetting
3502
      # cluster-level parameters to default values
3503
      if self.new_ndparams["oob_program"] == "":
3504
        self.new_ndparams["oob_program"] = \
3505
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3506

    
3507
    if self.op.nicparams:
3508
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3509
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3510
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3511
      nic_errors = []
3512

    
3513
      # check all instances for consistency
3514
      for instance in self.cfg.GetAllInstancesInfo().values():
3515
        for nic_idx, nic in enumerate(instance.nics):
3516
          params_copy = copy.deepcopy(nic.nicparams)
3517
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3518

    
3519
          # check parameter syntax
3520
          try:
3521
            objects.NIC.CheckParameterSyntax(params_filled)
3522
          except errors.ConfigurationError, err:
3523
            nic_errors.append("Instance %s, nic/%d: %s" %
3524
                              (instance.name, nic_idx, err))
3525

    
3526
          # if we're moving instances to routed, check that they have an ip
3527
          target_mode = params_filled[constants.NIC_MODE]
3528
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3529
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3530
                              " address" % (instance.name, nic_idx))
3531
      if nic_errors:
3532
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3533
                                   "\n".join(nic_errors))
3534

    
3535
    # hypervisor list/parameters
3536
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3537
    if self.op.hvparams:
3538
      for hv_name, hv_dict in self.op.hvparams.items():
3539
        if hv_name not in self.new_hvparams:
3540
          self.new_hvparams[hv_name] = hv_dict
3541
        else:
3542
          self.new_hvparams[hv_name].update(hv_dict)
3543

    
3544
    # os hypervisor parameters
3545
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3546
    if self.op.os_hvp:
3547
      for os_name, hvs in self.op.os_hvp.items():
3548
        if os_name not in self.new_os_hvp:
3549
          self.new_os_hvp[os_name] = hvs
3550
        else:
3551
          for hv_name, hv_dict in hvs.items():
3552
            if hv_name not in self.new_os_hvp[os_name]:
3553
              self.new_os_hvp[os_name][hv_name] = hv_dict
3554
            else:
3555
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3556

    
3557
    # os parameters
3558
    self.new_osp = objects.FillDict(cluster.osparams, {})
3559
    if self.op.osparams:
3560
      for os_name, osp in self.op.osparams.items():
3561
        if os_name not in self.new_osp:
3562
          self.new_osp[os_name] = {}
3563

    
3564
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3565
                                                  use_none=True)
3566

    
3567
        if not self.new_osp[os_name]:
3568
          # we removed all parameters
3569
          del self.new_osp[os_name]
3570
        else:
3571
          # check the parameter validity (remote check)
3572
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3573
                         os_name, self.new_osp[os_name])
3574

    
3575
    # changes to the hypervisor list
3576
    if self.op.enabled_hypervisors is not None:
3577
      self.hv_list = self.op.enabled_hypervisors
3578
      for hv in self.hv_list:
3579
        # if the hypervisor doesn't already exist in the cluster
3580
        # hvparams, we initialize it to empty, and then (in both
3581
        # cases) we make sure to fill the defaults, as we might not
3582
        # have a complete defaults list if the hypervisor wasn't
3583
        # enabled before
3584
        if hv not in new_hvp:
3585
          new_hvp[hv] = {}
3586
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3587
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3588
    else:
3589
      self.hv_list = cluster.enabled_hypervisors
3590

    
3591
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3592
      # either the enabled list has changed, or the parameters have, validate
3593
      for hv_name, hv_params in self.new_hvparams.items():
3594
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3595
            (self.op.enabled_hypervisors and
3596
             hv_name in self.op.enabled_hypervisors)):
3597
          # either this is a new hypervisor, or its parameters have changed
3598
          hv_class = hypervisor.GetHypervisor(hv_name)
3599
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3600
          hv_class.CheckParameterSyntax(hv_params)
3601
          _CheckHVParams(self, node_list, hv_name, hv_params)
3602

    
3603
    if self.op.os_hvp:
3604
      # no need to check any newly-enabled hypervisors, since the
3605
      # defaults have already been checked in the above code-block
3606
      for os_name, os_hvp in self.new_os_hvp.items():
3607
        for hv_name, hv_params in os_hvp.items():
3608
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3609
          # we need to fill in the new os_hvp on top of the actual hv_p
3610
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3611
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3612
          hv_class = hypervisor.GetHypervisor(hv_name)
3613
          hv_class.CheckParameterSyntax(new_osp)
3614
          _CheckHVParams(self, node_list, hv_name, new_osp)
3615

    
3616
    if self.op.default_iallocator:
3617
      alloc_script = utils.FindFile(self.op.default_iallocator,
3618
                                    constants.IALLOCATOR_SEARCH_PATH,
3619
                                    os.path.isfile)
3620
      if alloc_script is None:
3621
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3622
                                   " specified" % self.op.default_iallocator,
3623
                                   errors.ECODE_INVAL)
3624

    
3625
  def Exec(self, feedback_fn):
3626
    """Change the parameters of the cluster.
3627

3628
    """
3629
    if self.op.vg_name is not None:
3630
      new_volume = self.op.vg_name
3631
      if not new_volume:
3632
        new_volume = None
3633
      if new_volume != self.cfg.GetVGName():
3634
        self.cfg.SetVGName(new_volume)
3635
      else:
3636
        feedback_fn("Cluster LVM configuration already in desired"
3637
                    " state, not changing")
3638
    if self.op.drbd_helper is not None:
3639
      new_helper = self.op.drbd_helper
3640
      if not new_helper:
3641
        new_helper = None
3642
      if new_helper != self.cfg.GetDRBDHelper():
3643
        self.cfg.SetDRBDHelper(new_helper)
3644
      else:
3645
        feedback_fn("Cluster DRBD helper already in desired state,"
3646
                    " not changing")
3647
    if self.op.hvparams:
3648
      self.cluster.hvparams = self.new_hvparams
3649
    if self.op.os_hvp:
3650
      self.cluster.os_hvp = self.new_os_hvp
3651
    if self.op.enabled_hypervisors is not None:
3652
      self.cluster.hvparams = self.new_hvparams
3653
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3654
    if self.op.beparams:
3655
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3656
    if self.op.nicparams:
3657
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3658
    if self.op.osparams:
3659
      self.cluster.osparams = self.new_osp
3660
    if self.op.ndparams:
3661
      self.cluster.ndparams = self.new_ndparams
3662

    
3663
    if self.op.candidate_pool_size is not None:
3664
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3665
      # we need to update the pool size here, otherwise the save will fail
3666
      _AdjustCandidatePool(self, [])
3667

    
3668
    if self.op.maintain_node_health is not None:
3669
      self.cluster.maintain_node_health = self.op.maintain_node_health
3670

    
3671
    if self.op.prealloc_wipe_disks is not None:
3672
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3673

    
3674
    if self.op.add_uids is not None:
3675
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3676

    
3677
    if self.op.remove_uids is not None:
3678
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3679

    
3680
    if self.op.uid_pool is not None:
3681
      self.cluster.uid_pool = self.op.uid_pool
3682

    
3683
    if self.op.default_iallocator is not None:
3684
      self.cluster.default_iallocator = self.op.default_iallocator
3685

    
3686
    if self.op.reserved_lvs is not None:
3687
      self.cluster.reserved_lvs = self.op.reserved_lvs
3688

    
3689
    def helper_os(aname, mods, desc):
3690
      desc += " OS list"
3691
      lst = getattr(self.cluster, aname)
3692
      for key, val in mods:
3693
        if key == constants.DDM_ADD:
3694
          if val in lst:
3695
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3696
          else:
3697
            lst.append(val)
3698
        elif key == constants.DDM_REMOVE:
3699
          if val in lst:
3700
            lst.remove(val)
3701
          else:
3702
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3703
        else:
3704
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3705

    
3706
    if self.op.hidden_os:
3707
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3708

    
3709
    if self.op.blacklisted_os:
3710
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3711

    
3712
    if self.op.master_netdev:
3713
      master = self.cfg.GetMasterNode()
3714
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3715
                  self.cluster.master_netdev)
3716
      result = self.rpc.call_node_deactivate_master_ip(master)
3717
      result.Raise("Could not disable the master ip")
3718
      feedback_fn("Changing master_netdev from %s to %s" %
3719
                  (self.cluster.master_netdev, self.op.master_netdev))
3720
      self.cluster.master_netdev = self.op.master_netdev
3721

    
3722
    self.cfg.Update(self.cluster, feedback_fn)
3723

    
3724
    if self.op.master_netdev:
3725
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3726
                  self.op.master_netdev)
3727
      result = self.rpc.call_node_activate_master_ip(master)
3728
      if result.fail_msg:
3729
        self.LogWarning("Could not re-enable the master ip on"
3730
                        " the master, please restart manually: %s",
3731
                        result.fail_msg)
3732

    
3733

    
3734
def _UploadHelper(lu, nodes, fname):
3735
  """Helper for uploading a file and showing warnings.
3736

3737
  """
3738
  if os.path.exists(fname):
3739
    result = lu.rpc.call_upload_file(nodes, fname)
3740
    for to_node, to_result in result.items():
3741
      msg = to_result.fail_msg
3742
      if msg:
3743
        msg = ("Copy of file %s to node %s failed: %s" %
3744
               (fname, to_node, msg))
3745
        lu.proc.LogWarning(msg)
3746

    
3747

    
3748
def _ComputeAncillaryFiles(cluster, redist):
3749
  """Compute files external to Ganeti which need to be consistent.
3750

3751
  @type redist: boolean
3752
  @param redist: Whether to include files which need to be redistributed
3753

3754
  """
3755
  # Compute files for all nodes
3756
  files_all = set([
3757
    constants.SSH_KNOWN_HOSTS_FILE,
3758
    constants.CONFD_HMAC_KEY,
3759
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3760
    constants.RAPI_USERS_FILE,
3761
    ])
3762

    
3763
  if not redist:
3764
    files_all.update(constants.ALL_CERT_FILES)
3765
    files_all.update(ssconf.SimpleStore().GetFileList())
3766
  else:
3767
    # we need to ship at least the RAPI certificate
3768
    files_all.add(constants.RAPI_CERT_FILE)
3769

    
3770
  if cluster.modify_etc_hosts:
3771
    files_all.add(constants.ETC_HOSTS)
3772

    
3773
  # Files which are optional, these must:
3774
  # - be present in one other category as well
3775
  # - either exist or not exist on all nodes of that category (mc, vm all)
3776
  files_opt = set([
3777
    constants.RAPI_USERS_FILE,
3778
    ])
3779

    
3780
  # Files which should only be on master candidates
3781
  files_mc = set()
3782
  if not redist:
3783
    files_mc.add(constants.CLUSTER_CONF_FILE)
3784

    
3785
  # Files which should only be on VM-capable nodes
3786
  files_vm = set(filename
3787
    for hv_name in cluster.enabled_hypervisors
3788
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3789

    
3790
  files_opt |= set(filename
3791
    for hv_name in cluster.enabled_hypervisors
3792
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3793

    
3794
  # Filenames in each category must be unique
3795
  all_files_set = files_all | files_mc | files_vm
3796
  assert (len(all_files_set) ==
3797
          sum(map(len, [files_all, files_mc, files_vm]))), \
3798
         "Found file listed in more than one file list"
3799

    
3800
  # Optional files must be present in one other category
3801
  assert all_files_set.issuperset(files_opt), \
3802
         "Optional file not in a different required list"
3803

    
3804
  return (files_all, files_opt, files_mc, files_vm)
3805

    
3806

    
3807
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3808
  """Distribute additional files which are part of the cluster configuration.
3809

3810
  ConfigWriter takes care of distributing the config and ssconf files, but
3811
  there are more files which should be distributed to all nodes. This function
3812
  makes sure those are copied.
3813

3814
  @param lu: calling logical unit
3815
  @param additional_nodes: list of nodes not in the config to distribute to
3816
  @type additional_vm: boolean
3817
  @param additional_vm: whether the additional nodes are vm-capable or not
3818

3819
  """
3820
  # Gather target nodes
3821
  cluster = lu.cfg.GetClusterInfo()
3822
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3823

    
3824
  online_nodes = lu.cfg.GetOnlineNodeList()
3825
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3826

    
3827
  if additional_nodes is not None:
3828
    online_nodes.extend(additional_nodes)
3829
    if additional_vm:
3830
      vm_nodes.extend(additional_nodes)
3831

    
3832
  # Never distribute to master node
3833
  for nodelist in [online_nodes, vm_nodes]:
3834
    if master_info.name in nodelist:
3835
      nodelist.remove(master_info.name)
3836

    
3837
  # Gather file lists
3838
  (files_all, _, files_mc, files_vm) = \
3839
    _ComputeAncillaryFiles(cluster, True)
3840

    
3841
  # Never re-distribute configuration file from here
3842
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3843
              constants.CLUSTER_CONF_FILE in files_vm)
3844
  assert not files_mc, "Master candidates not handled in this function"
3845

    
3846
  filemap = [
3847
    (online_nodes, files_all),
3848
    (vm_nodes, files_vm),
3849
    ]
3850

    
3851
  # Upload the files
3852
  for (node_list, files) in filemap:
3853
    for fname in files:
3854
      _UploadHelper(lu, node_list, fname)
3855

    
3856

    
3857
class LUClusterRedistConf(NoHooksLU):
3858
  """Force the redistribution of cluster configuration.
3859

3860
  This is a very simple LU.
3861

3862
  """
3863
  REQ_BGL = False
3864

    
3865
  def ExpandNames(self):
3866
    self.needed_locks = {
3867
      locking.LEVEL_NODE: locking.ALL_SET,
3868
    }
3869
    self.share_locks[locking.LEVEL_NODE] = 1
3870

    
3871
  def Exec(self, feedback_fn):
3872
    """Redistribute the configuration.
3873

3874
    """
3875
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3876
    _RedistributeAncillaryFiles(self)
3877

    
3878

    
3879
class LUClusterActivateMasterIp(NoHooksLU):
3880
  """Activate the master IP on the master node.
3881

3882
  """
3883
  def Exec(self, feedback_fn):
3884
    """Activate the master IP.
3885

3886
    """
3887
    master = self.cfg.GetMasterNode()
3888
    result = self.rpc.call_node_activate_master_ip(master)
3889
    result.Raise("Could not activate the master IP")
3890

    
3891

    
3892
class LUClusterDeactivateMasterIp(NoHooksLU):
3893
  """Deactivate the master IP on the master node.
3894

3895
  """
3896
  def Exec(self, feedback_fn):
3897
    """Deactivate the master IP.
3898

3899
    """
3900
    master = self.cfg.GetMasterNode()
3901
    result = self.rpc.call_node_deactivate_master_ip(master)
3902
    result.Raise("Could not deactivate the master IP")
3903

    
3904

    
3905
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3906
  """Sleep and poll for an instance's disk to sync.
3907

3908
  """
3909
  if not instance.disks or disks is not None and not disks:
3910
    return True
3911

    
3912
  disks = _ExpandCheckDisks(instance, disks)
3913

    
3914
  if not oneshot:
3915
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3916

    
3917
  node = instance.primary_node
3918

    
3919
  for dev in disks:
3920
    lu.cfg.SetDiskID(dev, node)
3921

    
3922
  # TODO: Convert to utils.Retry
3923

    
3924
  retries = 0
3925
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3926
  while True:
3927
    max_time = 0
3928
    done = True
3929
    cumul_degraded = False
3930
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3931
    msg = rstats.fail_msg
3932
    if msg:
3933
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3934
      retries += 1
3935
      if retries >= 10:
3936
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3937
                                 " aborting." % node)
3938
      time.sleep(6)
3939
      continue
3940
    rstats = rstats.payload
3941
    retries = 0
3942
    for i, mstat in enumerate(rstats):
3943
      if mstat is None:
3944
        lu.LogWarning("Can't compute data for node %s/%s",
3945
                           node, disks[i].iv_name)
3946
        continue
3947

    
3948
      cumul_degraded = (cumul_degraded or
3949
                        (mstat.is_degraded and mstat.sync_percent is None))
3950
      if mstat.sync_percent is not None:
3951
        done = False
3952
        if mstat.estimated_time is not None:
3953
          rem_time = ("%s remaining (estimated)" %
3954
                      utils.FormatSeconds(mstat.estimated_time))
3955
          max_time = mstat.estimated_time
3956
        else:
3957
          rem_time = "no time estimate"
3958
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3959
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3960

    
3961
    # if we're done but degraded, let's do a few small retries, to
3962
    # make sure we see a stable and not transient situation; therefore
3963
    # we force restart of the loop
3964
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3965
      logging.info("Degraded disks found, %d retries left", degr_retries)
3966
      degr_retries -= 1
3967
      time.sleep(1)
3968
      continue
3969

    
3970
    if done or oneshot:
3971
      break
3972

    
3973
    time.sleep(min(60, max_time))
3974

    
3975
  if done:
3976
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3977
  return not cumul_degraded
3978

    
3979

    
3980
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3981
  """Check that mirrors are not degraded.
3982

3983
  The ldisk parameter, if True, will change the test from the
3984
  is_degraded attribute (which represents overall non-ok status for
3985
  the device(s)) to the ldisk (representing the local storage status).
3986

3987
  """
3988
  lu.cfg.SetDiskID(dev, node)
3989

    
3990
  result = True
3991

    
3992
  if on_primary or dev.AssembleOnSecondary():
3993
    rstats = lu.rpc.call_blockdev_find(node, dev)
3994
    msg = rstats.fail_msg
3995
    if msg:
3996
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3997
      result = False
3998
    elif not rstats.payload:
3999
      lu.LogWarning("Can't find disk on node %s", node)
4000
      result = False
4001
    else:
4002
      if ldisk:
4003
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4004
      else:
4005
        result = result and not rstats.payload.is_degraded
4006

    
4007
  if dev.children:
4008
    for child in dev.children:
4009
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4010

    
4011
  return result
4012

    
4013

    
4014
class LUOobCommand(NoHooksLU):
4015
  """Logical unit for OOB handling.
4016

4017
  """
4018
  REG_BGL = False
4019
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4020

    
4021
  def ExpandNames(self):
4022
    """Gather locks we need.
4023

4024
    """
4025
    if self.op.node_names:
4026
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4027
      lock_names = self.op.node_names
4028
    else:
4029
      lock_names = locking.ALL_SET
4030

    
4031
    self.needed_locks = {
4032
      locking.LEVEL_NODE: lock_names,
4033
      }
4034

    
4035
  def CheckPrereq(self):
4036
    """Check prerequisites.
4037

4038
    This checks:
4039
     - the node exists in the configuration
4040
     - OOB is supported
4041

4042
    Any errors are signaled by raising errors.OpPrereqError.
4043

4044
    """
4045
    self.nodes = []
4046
    self.master_node = self.cfg.GetMasterNode()
4047

    
4048
    assert self.op.power_delay >= 0.0
4049

    
4050
    if self.op.node_names:
4051
      if (self.op.command in self._SKIP_MASTER and
4052
          self.master_node in self.op.node_names):
4053
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4054
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4055

    
4056
        if master_oob_handler:
4057
          additional_text = ("run '%s %s %s' if you want to operate on the"
4058
                             " master regardless") % (master_oob_handler,
4059
                                                      self.op.command,
4060
                                                      self.master_node)
4061
        else:
4062
          additional_text = "it does not support out-of-band operations"
4063

    
4064
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4065
                                    " allowed for %s; %s") %
4066
                                   (self.master_node, self.op.command,
4067
                                    additional_text), errors.ECODE_INVAL)
4068
    else:
4069
      self.op.node_names = self.cfg.GetNodeList()
4070
      if self.op.command in self._SKIP_MASTER:
4071
        self.op.node_names.remove(self.master_node)
4072

    
4073
    if self.op.command in self._SKIP_MASTER:
4074
      assert self.master_node not in self.op.node_names
4075

    
4076
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4077
      if node is None:
4078
        raise errors.OpPrereqError("Node %s not found" % node_name,
4079
                                   errors.ECODE_NOENT)
4080
      else:
4081
        self.nodes.append(node)
4082

    
4083
      if (not self.op.ignore_status and
4084
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4085
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4086
                                    " not marked offline") % node_name,
4087
                                   errors.ECODE_STATE)
4088

    
4089
  def Exec(self, feedback_fn):
4090
    """Execute OOB and return result if we expect any.
4091

4092
    """
4093
    master_node = self.master_node
4094
    ret = []
4095

    
4096
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4097
                                              key=lambda node: node.name)):
4098
      node_entry = [(constants.RS_NORMAL, node.name)]
4099
      ret.append(node_entry)
4100

    
4101
      oob_program = _SupportsOob(self.cfg, node)
4102

    
4103
      if not oob_program:
4104
        node_entry.append((constants.RS_UNAVAIL, None))
4105
        continue
4106

    
4107
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4108
                   self.op.command, oob_program, node.name)
4109
      result = self.rpc.call_run_oob(master_node, oob_program,
4110
                                     self.op.command, node.name,
4111
                                     self.op.timeout)
4112

    
4113
      if result.fail_msg:
4114
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4115
                        node.name, result.fail_msg)
4116
        node_entry.append((constants.RS_NODATA, None))
4117
      else:
4118
        try:
4119
          self._CheckPayload(result)
4120
        except errors.OpExecError, err:
4121
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4122
                          node.name, err)
4123
          node_entry.append((constants.RS_NODATA, None))
4124
        else:
4125
          if self.op.command == constants.OOB_HEALTH:
4126
            # For health we should log important events
4127
            for item, status in result.payload:
4128
              if status in [constants.OOB_STATUS_WARNING,
4129
                            constants.OOB_STATUS_CRITICAL]:
4130
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4131
                                item, node.name, status)
4132

    
4133
          if self.op.command == constants.OOB_POWER_ON:
4134
            node.powered = True
4135
          elif self.op.command == constants.OOB_POWER_OFF:
4136
            node.powered = False
4137
          elif self.op.command == constants.OOB_POWER_STATUS:
4138
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4139
            if powered != node.powered:
4140
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4141
                               " match actual power state (%s)"), node.powered,
4142
                              node.name, powered)
4143

    
4144
          # For configuration changing commands we should update the node
4145
          if self.op.command in (constants.OOB_POWER_ON,
4146
                                 constants.OOB_POWER_OFF):
4147
            self.cfg.Update(node, feedback_fn)
4148

    
4149
          node_entry.append((constants.RS_NORMAL, result.payload))
4150

    
4151
          if (self.op.command == constants.OOB_POWER_ON and
4152
              idx < len(self.nodes) - 1):
4153
            time.sleep(self.op.power_delay)
4154

    
4155
    return ret
4156

    
4157
  def _CheckPayload(self, result):
4158
    """Checks if the payload is valid.
4159

4160
    @param result: RPC result
4161
    @raises errors.OpExecError: If payload is not valid
4162

4163
    """
4164
    errs = []
4165
    if self.op.command == constants.OOB_HEALTH:
4166
      if not isinstance(result.payload, list):
4167
        errs.append("command 'health' is expected to return a list but got %s" %
4168
                    type(result.payload))
4169
      else:
4170
        for item, status in result.payload:
4171
          if status not in constants.OOB_STATUSES:
4172
            errs.append("health item '%s' has invalid status '%s'" %
4173
                        (item, status))
4174

    
4175
    if self.op.command == constants.OOB_POWER_STATUS:
4176
      if not isinstance(result.payload, dict):
4177
        errs.append("power-status is expected to return a dict but got %s" %
4178
                    type(result.payload))
4179

    
4180
    if self.op.command in [
4181
        constants.OOB_POWER_ON,
4182
        constants.OOB_POWER_OFF,
4183
        constants.OOB_POWER_CYCLE,
4184
        ]:
4185
      if result.payload is not None:
4186
        errs.append("%s is expected to not return payload but got '%s'" %
4187
                    (self.op.command, result.payload))
4188

    
4189
    if errs:
4190
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4191
                               utils.CommaJoin(errs))
4192

    
4193

    
4194
class _OsQuery(_QueryBase):
4195
  FIELDS = query.OS_FIELDS
4196

    
4197
  def ExpandNames(self, lu):
4198
    # Lock all nodes in shared mode
4199
    # Temporary removal of locks, should be reverted later
4200
    # TODO: reintroduce locks when they are lighter-weight
4201
    lu.needed_locks = {}
4202
    #self.share_locks[locking.LEVEL_NODE] = 1
4203
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4204

    
4205
    # The following variables interact with _QueryBase._GetNames
4206
    if self.names:
4207
      self.wanted = self.names
4208
    else:
4209
      self.wanted = locking.ALL_SET
4210

    
4211
    self.do_locking = self.use_locking
4212

    
4213
  def DeclareLocks(self, lu, level):
4214
    pass
4215

    
4216
  @staticmethod
4217
  def _DiagnoseByOS(rlist):
4218
    """Remaps a per-node return list into an a per-os per-node dictionary
4219

4220
    @param rlist: a map with node names as keys and OS objects as values
4221

4222
    @rtype: dict
4223
    @return: a dictionary with osnames as keys and as value another
4224
        map, with nodes as keys and tuples of (path, status, diagnose,
4225
        variants, parameters, api_versions) as values, eg::
4226

4227
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4228
                                     (/srv/..., False, "invalid api")],
4229
                           "node2": [(/srv/..., True, "", [], [])]}
4230
          }
4231

4232
    """
4233
    all_os = {}
4234
    # we build here the list of nodes that didn't fail the RPC (at RPC
4235
    # level), so that nodes with a non-responding node daemon don't
4236
    # make all OSes invalid
4237
    good_nodes = [node_name for node_name in rlist
4238
                  if not rlist[node_name].fail_msg]
4239
    for node_name, nr in rlist.items():
4240
      if nr.fail_msg or not nr.payload:
4241
        continue
4242
      for (name, path, status, diagnose, variants,
4243
           params, api_versions) in nr.payload:
4244
        if name not in all_os:
4245
          # build a list of nodes for this os containing empty lists
4246
          # for each node in node_list
4247
          all_os[name] = {}
4248
          for nname in good_nodes:
4249
            all_os[name][nname] = []
4250
        # convert params from [name, help] to (name, help)
4251
        params = [tuple(v) for v in params]
4252
        all_os[name][node_name].append((path, status, diagnose,
4253
                                        variants, params, api_versions))
4254
    return all_os
4255

    
4256
  def _GetQueryData(self, lu):
4257
    """Computes the list of nodes and their attributes.
4258

4259
    """
4260
    # Locking is not used
4261
    assert not (compat.any(lu.glm.is_owned(level)
4262
                           for level in locking.LEVELS
4263
                           if level != locking.LEVEL_CLUSTER) or
4264
                self.do_locking or self.use_locking)
4265

    
4266
    valid_nodes = [node.name
4267
                   for node in lu.cfg.GetAllNodesInfo().values()
4268
                   if not node.offline and node.vm_capable]
4269
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4270
    cluster = lu.cfg.GetClusterInfo()
4271

    
4272
    data = {}
4273

    
4274
    for (os_name, os_data) in pol.items():
4275
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4276
                          hidden=(os_name in cluster.hidden_os),
4277
                          blacklisted=(os_name in cluster.blacklisted_os))
4278

    
4279
      variants = set()
4280
      parameters = set()
4281
      api_versions = set()
4282

    
4283
      for idx, osl in enumerate(os_data.values()):
4284
        info.valid = bool(info.valid and osl and osl[0][1])
4285
        if not info.valid:
4286
          break
4287

    
4288
        (node_variants, node_params, node_api) = osl[0][3:6]
4289
        if idx == 0:
4290
          # First entry
4291
          variants.update(node_variants)
4292
          parameters.update(node_params)
4293
          api_versions.update(node_api)
4294
        else:
4295
          # Filter out inconsistent values
4296
          variants.intersection_update(node_variants)
4297
          parameters.intersection_update(node_params)
4298
          api_versions.intersection_update(node_api)
4299

    
4300
      info.variants = list(variants)
4301
      info.parameters = list(parameters)
4302
      info.api_versions = list(api_versions)
4303

    
4304
      data[os_name] = info
4305

    
4306
    # Prepare data in requested order
4307
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4308
            if name in data]
4309

    
4310

    
4311
class LUOsDiagnose(NoHooksLU):
4312
  """Logical unit for OS diagnose/query.
4313

4314
  """
4315
  REQ_BGL = False
4316

    
4317
  @staticmethod
4318
  def _BuildFilter(fields, names):
4319
    """Builds a filter for querying OSes.
4320

4321
    """
4322
    name_filter = qlang.MakeSimpleFilter("name", names)
4323

    
4324
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4325
    # respective field is not requested
4326
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4327
                     for fname in ["hidden", "blacklisted"]
4328
                     if fname not in fields]
4329
    if "valid" not in fields:
4330
      status_filter.append([qlang.OP_TRUE, "valid"])
4331

    
4332
    if status_filter:
4333
      status_filter.insert(0, qlang.OP_AND)
4334
    else:
4335
      status_filter = None
4336

    
4337
    if name_filter and status_filter:
4338
      return [qlang.OP_AND, name_filter, status_filter]
4339
    elif name_filter:
4340
      return name_filter
4341
    else:
4342
      return status_filter
4343

    
4344
  def CheckArguments(self):
4345
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4346
                       self.op.output_fields, False)
4347

    
4348
  def ExpandNames(self):
4349
    self.oq.ExpandNames(self)
4350

    
4351
  def Exec(self, feedback_fn):
4352
    return self.oq.OldStyleQuery(self)
4353

    
4354

    
4355
class LUNodeRemove(LogicalUnit):
4356
  """Logical unit for removing a node.
4357

4358
  """
4359
  HPATH = "node-remove"
4360
  HTYPE = constants.HTYPE_NODE
4361

    
4362
  def BuildHooksEnv(self):
4363
    """Build hooks env.
4364

4365
    This doesn't run on the target node in the pre phase as a failed
4366
    node would then be impossible to remove.
4367

4368
    """
4369
    return {
4370
      "OP_TARGET": self.op.node_name,
4371
      "NODE_NAME": self.op.node_name,
4372
      }
4373

    
4374
  def BuildHooksNodes(self):
4375
    """Build hooks nodes.
4376

4377
    """
4378
    all_nodes = self.cfg.GetNodeList()
4379
    try:
4380
      all_nodes.remove(self.op.node_name)
4381
    except ValueError:
4382
      logging.warning("Node '%s', which is about to be removed, was not found"
4383
                      " in the list of all nodes", self.op.node_name)
4384
    return (all_nodes, all_nodes)
4385

    
4386
  def CheckPrereq(self):
4387
    """Check prerequisites.
4388

4389
    This checks:
4390
     - the node exists in the configuration
4391
     - it does not have primary or secondary instances
4392
     - it's not the master
4393

4394
    Any errors are signaled by raising errors.OpPrereqError.
4395

4396
    """
4397
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4398
    node = self.cfg.GetNodeInfo(self.op.node_name)
4399
    assert node is not None
4400

    
4401
    masternode = self.cfg.GetMasterNode()
4402
    if node.name == masternode:
4403
      raise errors.OpPrereqError("Node is the master node, failover to another"
4404
                                 " node is required", errors.ECODE_INVAL)
4405

    
4406
    for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4407
      if node.name in instance.all_nodes:
4408
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4409
                                   " please remove first" % instance_name,
4410
                                   errors.ECODE_INVAL)
4411
    self.op.node_name = node.name
4412
    self.node = node
4413

    
4414
  def Exec(self, feedback_fn):
4415
    """Removes the node from the cluster.
4416

4417
    """
4418
    node = self.node
4419
    logging.info("Stopping the node daemon and removing configs from node %s",
4420
                 node.name)
4421

    
4422
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4423

    
4424
    # Promote nodes to master candidate as needed
4425
    _AdjustCandidatePool(self, exceptions=[node.name])
4426
    self.context.RemoveNode(node.name)
4427

    
4428
    # Run post hooks on the node before it's removed
4429
    _RunPostHook(self, node.name)
4430

    
4431
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4432
    msg = result.fail_msg
4433
    if msg:
4434
      self.LogWarning("Errors encountered on the remote node while leaving"
4435
                      " the cluster: %s", msg)
4436

    
4437
    # Remove node from our /etc/hosts
4438
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4439
      master_node = self.cfg.GetMasterNode()
4440
      result = self.rpc.call_etc_hosts_modify(master_node,
4441
                                              constants.ETC_HOSTS_REMOVE,
4442
                                              node.name, None)
4443
      result.Raise("Can't update hosts file with new host data")
4444
      _RedistributeAncillaryFiles(self)
4445

    
4446

    
4447
class _NodeQuery(_QueryBase):
4448
  FIELDS = query.NODE_FIELDS
4449

    
4450
  def ExpandNames(self, lu):
4451
    lu.needed_locks = {}
4452
    lu.share_locks = _ShareAll()
4453

    
4454
    if self.names:
4455
      self.wanted = _GetWantedNodes(lu, self.names)
4456
    else:
4457
      self.wanted = locking.ALL_SET
4458

    
4459
    self.do_locking = (self.use_locking and
4460
                       query.NQ_LIVE in self.requested_data)
4461

    
4462
    if self.do_locking:
4463
      # If any non-static field is requested we need to lock the nodes
4464
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4465

    
4466
  def DeclareLocks(self, lu, level):
4467
    pass
4468

    
4469
  def _GetQueryData(self, lu):
4470
    """Computes the list of nodes and their attributes.
4471

4472
    """
4473
    all_info = lu.cfg.GetAllNodesInfo()
4474

    
4475
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4476

    
4477
    # Gather data as requested
4478
    if query.NQ_LIVE in self.requested_data:
4479
      # filter out non-vm_capable nodes
4480
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4481

    
4482
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4483
                                        lu.cfg.GetHypervisorType())
4484
      live_data = dict((name, nresult.payload)
4485
                       for (name, nresult) in node_data.items()
4486
                       if not nresult.fail_msg and nresult.payload)
4487
    else:
4488
      live_data = None
4489

    
4490
    if query.NQ_INST in self.requested_data:
4491
      node_to_primary = dict([(name, set()) for name in nodenames])
4492
      node_to_secondary = dict([(name, set()) for name in nodenames])
4493

    
4494
      inst_data = lu.cfg.GetAllInstancesInfo()
4495

    
4496
      for inst in inst_data.values():
4497
        if inst.primary_node in node_to_primary:
4498
          node_to_primary[inst.primary_node].add(inst.name)
4499
        for secnode in inst.secondary_nodes:
4500
          if secnode in node_to_secondary:
4501
            node_to_secondary[secnode].add(inst.name)
4502
    else:
4503
      node_to_primary = None
4504
      node_to_secondary = None
4505

    
4506
    if query.NQ_OOB in self.requested_data:
4507
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4508
                         for name, node in all_info.iteritems())
4509
    else:
4510
      oob_support = None
4511

    
4512
    if query.NQ_GROUP in self.requested_data:
4513
      groups = lu.cfg.GetAllNodeGroupsInfo()
4514
    else:
4515
      groups = {}
4516

    
4517
    return query.NodeQueryData([all_info[name] for name in nodenames],
4518
                               live_data, lu.cfg.GetMasterNode(),
4519
                               node_to_primary, node_to_secondary, groups,
4520
                               oob_support, lu.cfg.GetClusterInfo())
4521

    
4522

    
4523
class LUNodeQuery(NoHooksLU):
4524
  """Logical unit for querying nodes.
4525

4526
  """
4527
  # pylint: disable=W0142
4528
  REQ_BGL = False
4529

    
4530
  def CheckArguments(self):
4531
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4532
                         self.op.output_fields, self.op.use_locking)
4533

    
4534
  def ExpandNames(self):
4535
    self.nq.ExpandNames(self)
4536

    
4537
  def Exec(self, feedback_fn):
4538
    return self.nq.OldStyleQuery(self)
4539

    
4540

    
4541
class LUNodeQueryvols(NoHooksLU):
4542
  """Logical unit for getting volumes on node(s).
4543

4544
  """
4545
  REQ_BGL = False
4546
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4547
  _FIELDS_STATIC = utils.FieldSet("node")
4548

    
4549
  def CheckArguments(self):
4550
    _CheckOutputFields(static=self._FIELDS_STATIC,
4551
                       dynamic=self._FIELDS_DYNAMIC,
4552
                       selected=self.op.output_fields)
4553

    
4554
  def ExpandNames(self):
4555
    self.needed_locks = {}
4556
    self.share_locks[locking.LEVEL_NODE] = 1
4557
    if not self.op.nodes:
4558
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4559
    else:
4560
      self.needed_locks[locking.LEVEL_NODE] = \
4561
        _GetWantedNodes(self, self.op.nodes)
4562

    
4563
  def Exec(self, feedback_fn):
4564
    """Computes the list of nodes and their attributes.
4565

4566
    """
4567
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4568
    volumes = self.rpc.call_node_volumes(nodenames)
4569

    
4570
    ilist = self.cfg.GetAllInstancesInfo()
4571
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4572

    
4573
    output = []
4574
    for node in nodenames:
4575
      nresult = volumes[node]
4576
      if nresult.offline:
4577
        continue
4578
      msg = nresult.fail_msg
4579
      if msg:
4580
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4581
        continue
4582

    
4583
      node_vols = sorted(nresult.payload,
4584
                         key=operator.itemgetter("dev"))
4585

    
4586
      for vol in node_vols:
4587
        node_output = []
4588
        for field in self.op.output_fields:
4589
          if field == "node":
4590
            val = node
4591
          elif field == "phys":
4592
            val = vol["dev"]
4593
          elif field == "vg":
4594
            val = vol["vg"]
4595
          elif field == "name":
4596
            val = vol["name"]
4597
          elif field == "size":
4598
            val = int(float(vol["size"]))
4599
          elif field == "instance":
4600
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4601
          else:
4602
            raise errors.ParameterError(field)
4603
          node_output.append(str(val))
4604

    
4605
        output.append(node_output)
4606

    
4607
    return output
4608

    
4609

    
4610
class LUNodeQueryStorage(NoHooksLU):
4611
  """Logical unit for getting information on storage units on node(s).
4612

4613
  """
4614
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4615
  REQ_BGL = False
4616

    
4617
  def CheckArguments(self):
4618
    _CheckOutputFields(static=self._FIELDS_STATIC,
4619
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4620
                       selected=self.op.output_fields)
4621

    
4622
  def ExpandNames(self):
4623
    self.needed_locks = {}
4624
    self.share_locks[locking.LEVEL_NODE] = 1
4625

    
4626
    if self.op.nodes:
4627
      self.needed_locks[locking.LEVEL_NODE] = \
4628
        _GetWantedNodes(self, self.op.nodes)
4629
    else:
4630
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4631

    
4632
  def Exec(self, feedback_fn):
4633
    """Computes the list of nodes and their attributes.
4634

4635
    """
4636
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4637

    
4638
    # Always get name to sort by
4639
    if constants.SF_NAME in self.op.output_fields:
4640
      fields = self.op.output_fields[:]
4641
    else:
4642
      fields = [constants.SF_NAME] + self.op.output_fields
4643

    
4644
    # Never ask for node or type as it's only known to the LU
4645
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4646
      while extra in fields:
4647
        fields.remove(extra)
4648

    
4649
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4650
    name_idx = field_idx[constants.SF_NAME]
4651

    
4652
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4653
    data = self.rpc.call_storage_list(self.nodes,
4654
                                      self.op.storage_type, st_args,
4655
                                      self.op.name, fields)
4656

    
4657
    result = []
4658

    
4659
    for node in utils.NiceSort(self.nodes):
4660
      nresult = data[node]
4661
      if nresult.offline:
4662
        continue
4663

    
4664
      msg = nresult.fail_msg
4665
      if msg:
4666
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4667
        continue
4668

    
4669
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4670

    
4671
      for name in utils.NiceSort(rows.keys()):
4672
        row = rows[name]
4673

    
4674
        out = []
4675

    
4676
        for field in self.op.output_fields:
4677
          if field == constants.SF_NODE:
4678
            val = node
4679
          elif field == constants.SF_TYPE:
4680
            val = self.op.storage_type
4681
          elif field in field_idx:
4682
            val = row[field_idx[field]]
4683
          else:
4684
            raise errors.ParameterError(field)
4685

    
4686
          out.append(val)
4687

    
4688
        result.append(out)
4689

    
4690
    return result
4691

    
4692

    
4693
class _InstanceQuery(_QueryBase):
4694
  FIELDS = query.INSTANCE_FIELDS
4695

    
4696
  def ExpandNames(self, lu):
4697
    lu.needed_locks = {}
4698
    lu.share_locks = _ShareAll()
4699

    
4700
    if self.names:
4701
      self.wanted = _GetWantedInstances(lu, self.names)
4702
    else:
4703
      self.wanted = locking.ALL_SET
4704

    
4705
    self.do_locking = (self.use_locking and
4706
                       query.IQ_LIVE in self.requested_data)
4707
    if self.do_locking:
4708
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4709
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4710
      lu.needed_locks[locking.LEVEL_NODE] = []
4711
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4712

    
4713
    self.do_grouplocks = (self.do_locking and
4714
                          query.IQ_NODES in self.requested_data)
4715

    
4716
  def DeclareLocks(self, lu, level):
4717
    if self.do_locking:
4718
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4719
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4720

    
4721
        # Lock all groups used by instances optimistically; this requires going
4722
        # via the node before it's locked, requiring verification later on
4723
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4724
          set(group_uuid
4725
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4726
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4727
      elif level == locking.LEVEL_NODE:
4728
        lu._LockInstancesNodes() # pylint: disable=W0212
4729

    
4730
  @staticmethod
4731
  def _CheckGroupLocks(lu):
4732
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4733
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4734

    
4735
    # Check if node groups for locked instances are still correct
4736
    for instance_name in owned_instances:
4737
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4738

    
4739
  def _GetQueryData(self, lu):
4740
    """Computes the list of instances and their attributes.
4741

4742
    """
4743
    if self.do_grouplocks:
4744
      self._CheckGroupLocks(lu)
4745

    
4746
    cluster = lu.cfg.GetClusterInfo()
4747
    all_info = lu.cfg.GetAllInstancesInfo()
4748

    
4749
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4750

    
4751
    instance_list = [all_info[name] for name in instance_names]
4752
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4753
                                        for inst in instance_list)))
4754
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4755
    bad_nodes = []
4756
    offline_nodes = []
4757
    wrongnode_inst = set()
4758

    
4759
    # Gather data as requested
4760
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4761
      live_data = {}
4762
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4763
      for name in nodes:
4764
        result = node_data[name]
4765
        if result.offline:
4766
          # offline nodes will be in both lists
4767
          assert result.fail_msg
4768
          offline_nodes.append(name)
4769
        if result.fail_msg:
4770
          bad_nodes.append(name)
4771
        elif result.payload:
4772
          for inst in result.payload:
4773
            if inst in all_info:
4774
              if all_info[inst].primary_node == name:
4775
                live_data.update(result.payload)
4776
              else:
4777
                wrongnode_inst.add(inst)
4778
            else:
4779
              # orphan instance; we don't list it here as we don't
4780
              # handle this case yet in the output of instance listing
4781
              logging.warning("Orphan instance '%s' found on node %s",
4782
                              inst, name)
4783
        # else no instance is alive
4784
    else:
4785
      live_data = {}
4786

    
4787
    if query.IQ_DISKUSAGE in self.requested_data:
4788
      disk_usage = dict((inst.name,
4789
                         _ComputeDiskSize(inst.disk_template,
4790
                                          [{constants.IDISK_SIZE: disk.size}
4791
                                           for disk in inst.disks]))
4792
                        for inst in instance_list)
4793
    else:
4794
      disk_usage = None
4795

    
4796
    if query.IQ_CONSOLE in self.requested_data:
4797
      consinfo = {}
4798
      for inst in instance_list:
4799
        if inst.name in live_data:
4800
          # Instance is running
4801
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4802
        else:
4803
          consinfo[inst.name] = None
4804
      assert set(consinfo.keys()) == set(instance_names)
4805
    else:
4806
      consinfo = None
4807

    
4808
    if query.IQ_NODES in self.requested_data:
4809
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4810
                                            instance_list)))
4811
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4812
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4813
                    for uuid in set(map(operator.attrgetter("group"),
4814
                                        nodes.values())))
4815
    else:
4816
      nodes = None
4817
      groups = None
4818

    
4819
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4820
                                   disk_usage, offline_nodes, bad_nodes,
4821
                                   live_data, wrongnode_inst, consinfo,
4822
                                   nodes, groups)
4823

    
4824

    
4825
class LUQuery(NoHooksLU):
4826
  """Query for resources/items of a certain kind.
4827

4828
  """
4829
  # pylint: disable=W0142
4830
  REQ_BGL = False
4831

    
4832
  def CheckArguments(self):
4833
    qcls = _GetQueryImplementation(self.op.what)
4834

    
4835
    self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4836

    
4837
  def ExpandNames(self):
4838
    self.impl.ExpandNames(self)
4839

    
4840
  def DeclareLocks(self, level):
4841
    self.impl.DeclareLocks(self, level)
4842

    
4843
  def Exec(self, feedback_fn):
4844
    return self.impl.NewStyleQuery(self)
4845

    
4846

    
4847
class LUQueryFields(NoHooksLU):
4848
  """Query for resources/items of a certain kind.
4849

4850
  """
4851
  # pylint: disable=W0142
4852
  REQ_BGL = False
4853

    
4854
  def CheckArguments(self):
4855
    self.qcls = _GetQueryImplementation(self.op.what)
4856

    
4857
  def ExpandNames(self):
4858
    self.needed_locks = {}
4859

    
4860
  def Exec(self, feedback_fn):
4861
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4862

    
4863

    
4864
class LUNodeModifyStorage(NoHooksLU):
4865
  """Logical unit for modifying a storage volume on a node.
4866

4867
  """
4868
  REQ_BGL = False
4869

    
4870
  def CheckArguments(self):
4871
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4872

    
4873
    storage_type = self.op.storage_type
4874

    
4875
    try:
4876
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4877
    except KeyError:
4878
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4879
                                 " modified" % storage_type,
4880
                                 errors.ECODE_INVAL)
4881

    
4882
    diff = set(self.op.changes.keys()) - modifiable
4883
    if diff:
4884
      raise errors.OpPrereqError("The following fields can not be modified for"
4885
                                 " storage units of type '%s': %r" %
4886
                                 (storage_type, list(diff)),
4887
                                 errors.ECODE_INVAL)
4888

    
4889
  def ExpandNames(self):
4890
    self.needed_locks = {
4891
      locking.LEVEL_NODE: self.op.node_name,
4892
      }
4893

    
4894
  def Exec(self, feedback_fn):
4895
    """Computes the list of nodes and their attributes.
4896

4897
    """
4898
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4899
    result = self.rpc.call_storage_modify(self.op.node_name,
4900
                                          self.op.storage_type, st_args,
4901
                                          self.op.name, self.op.changes)
4902
    result.Raise("Failed to modify storage unit '%s' on %s" %
4903
                 (self.op.name, self.op.node_name))
4904

    
4905

    
4906
class LUNodeAdd(LogicalUnit):
4907
  """Logical unit for adding node to the cluster.
4908

4909
  """
4910
  HPATH = "node-add"
4911
  HTYPE = constants.HTYPE_NODE
4912
  _NFLAGS = ["master_capable", "vm_capable"]
4913

    
4914
  def CheckArguments(self):
4915
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4916
    # validate/normalize the node name
4917
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4918
                                         family=self.primary_ip_family)
4919
    self.op.node_name = self.hostname.name
4920

    
4921
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4922
      raise errors.OpPrereqError("Cannot readd the master node",
4923
                                 errors.ECODE_STATE)
4924

    
4925
    if self.op.readd and self.op.group:
4926
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4927
                                 " being readded", errors.ECODE_INVAL)
4928

    
4929
  def BuildHooksEnv(self):
4930
    """Build hooks env.
4931

4932
    This will run on all nodes before, and on all nodes + the new node after.
4933

4934
    """
4935
    return {
4936
      "OP_TARGET": self.op.node_name,
4937
      "NODE_NAME": self.op.node_name,
4938
      "NODE_PIP": self.op.primary_ip,
4939
      "NODE_SIP": self.op.secondary_ip,
4940
      "MASTER_CAPABLE": str(self.op.master_capable),
4941
      "VM_CAPABLE": str(self.op.vm_capable),
4942
      }
4943

    
4944
  def BuildHooksNodes(self):
4945
    """Build hooks nodes.
4946

4947
    """
4948
    # Exclude added node
4949
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4950
    post_nodes = pre_nodes + [self.op.node_name, ]
4951

    
4952
    return (pre_nodes, post_nodes)
4953

    
4954
  def CheckPrereq(self):
4955
    """Check prerequisites.
4956

4957
    This checks:
4958
     - the new node is not already in the config
4959
     - it is resolvable
4960
     - its parameters (single/dual homed) matches the cluster
4961

4962
    Any errors are signaled by raising errors.OpPrereqError.
4963

4964
    """
4965
    cfg = self.cfg
4966
    hostname = self.hostname
4967
    node = hostname.name
4968
    primary_ip = self.op.primary_ip = hostname.ip
4969
    if self.op.secondary_ip is None:
4970
      if self.primary_ip_family == netutils.IP6Address.family:
4971
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4972
                                   " IPv4 address must be given as secondary",
4973
                                   errors.ECODE_INVAL)
4974
      self.op.secondary_ip = primary_ip
4975

    
4976
    secondary_ip = self.op.secondary_ip
4977
    if not netutils.IP4Address.IsValid(secondary_ip):
4978
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4979
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4980

    
4981
    node_list = cfg.GetNodeList()
4982
    if not self.op.readd and node in node_list:
4983
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4984
                                 node, errors.ECODE_EXISTS)
4985
    elif self.op.readd and node not in node_list:
4986
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4987
                                 errors.ECODE_NOENT)
4988

    
4989
    self.changed_primary_ip = False
4990

    
4991
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4992
      if self.op.readd and node == existing_node_name:
4993
        if existing_node.secondary_ip != secondary_ip:
4994
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4995
                                     " address configuration as before",
4996
                                     errors.ECODE_INVAL)
4997
        if existing_node.primary_ip != primary_ip:
4998
          self.changed_primary_ip = True
4999

    
5000
        continue
5001

    
5002
      if (existing_node.primary_ip == primary_ip or
5003
          existing_node.secondary_ip == primary_ip or
5004
          existing_node.primary_ip == secondary_ip or
5005
          existing_node.secondary_ip == secondary_ip):
5006
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5007
                                   " existing node %s" % existing_node.name,
5008
                                   errors.ECODE_NOTUNIQUE)
5009

    
5010
    # After this 'if' block, None is no longer a valid value for the
5011
    # _capable op attributes
5012
    if self.op.readd:
5013
      old_node = self.cfg.GetNodeInfo(node)
5014
      assert old_node is not None, "Can't retrieve locked node %s" % node
5015
      for attr in self._NFLAGS:
5016
        if getattr(self.op, attr) is None:
5017
          setattr(self.op, attr, getattr(old_node, attr))
5018
    else:
5019
      for attr in self._NFLAGS:
5020
        if getattr(self.op, attr) is None:
5021
          setattr(self.op, attr, True)
5022

    
5023
    if self.op.readd and not self.op.vm_capable:
5024
      pri, sec = cfg.GetNodeInstances(node)
5025
      if pri or sec:
5026
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5027
                                   " flag set to false, but it already holds"
5028
                                   " instances" % node,
5029
                                   errors.ECODE_STATE)
5030

    
5031
    # check that the type of the node (single versus dual homed) is the
5032
    # same as for the master
5033
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5034
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5035
    newbie_singlehomed = secondary_ip == primary_ip
5036
    if master_singlehomed != newbie_singlehomed:
5037
      if master_singlehomed:
5038
        raise errors.OpPrereqError("The master has no secondary ip but the"
5039
                                   " new node has one",
5040
                                   errors.ECODE_INVAL)
5041
      else:
5042
        raise errors.OpPrereqError("The master has a secondary ip but the"
5043
                                   " new node doesn't have one",
5044
                                   errors.ECODE_INVAL)
5045

    
5046
    # checks reachability
5047
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5048
      raise errors.OpPrereqError("Node not reachable by ping",
5049
                                 errors.ECODE_ENVIRON)
5050

    
5051
    if not newbie_singlehomed:
5052
      # check reachability from my secondary ip to newbie's secondary ip
5053
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5054
                           source=myself.secondary_ip):
5055
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5056
                                   " based ping to node daemon port",
5057
                                   errors.ECODE_ENVIRON)
5058

    
5059
    if self.op.readd:
5060
      exceptions = [node]
5061
    else:
5062
      exceptions = []
5063

    
5064
    if self.op.master_capable:
5065
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5066
    else:
5067
      self.master_candidate = False
5068

    
5069
    if self.op.readd:
5070
      self.new_node = old_node
5071
    else:
5072
      node_group = cfg.LookupNodeGroup(self.op.group)
5073
      self.new_node = objects.Node(name=node,
5074
                                   primary_ip=primary_ip,
5075
                                   secondary_ip=secondary_ip,
5076
                                   master_candidate=self.master_candidate,
5077
                                   offline=False, drained=False,
5078
                                   group=node_group)
5079

    
5080
    if self.op.ndparams:
5081
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5082

    
5083
    # check connectivity
5084
    result = self.rpc.call_version([self.new_node.name])[self.new_node.name]
5085
    result.Raise("Can't get version information from node %s" % node)
5086
    if constants.PROTOCOL_VERSION == result.payload:
5087
      logging.info("Communication to node %s fine, sw version %s match",
5088
                   node, result.payload)
5089
    else:
5090
      raise errors.OpPrereqError("Version mismatch master version %s,"
5091
                                 " node version %s" %
5092
                                 (constants.PROTOCOL_VERSION, result.payload),
5093
                                 errors.ECODE_ENVIRON)
5094

    
5095
  def Exec(self, feedback_fn):
5096
    """Adds the new node to the cluster.
5097

5098
    """
5099
    new_node = self.new_node
5100
    node = new_node.name
5101

    
5102
    # We adding a new node so we assume it's powered
5103
    new_node.powered = True
5104

    
5105
    # for re-adds, reset the offline/drained/master-candidate flags;
5106
    # we need to reset here, otherwise offline would prevent RPC calls
5107
    # later in the procedure; this also means that if the re-add
5108
    # fails, we are left with a non-offlined, broken node
5109
    if self.op.readd:
5110
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5111
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5112
      # if we demote the node, we do cleanup later in the procedure
5113
      new_node.master_candidate = self.master_candidate
5114
      if self.changed_primary_ip:
5115
        new_node.primary_ip = self.op.primary_ip
5116

    
5117
    # copy the master/vm_capable flags
5118
    for attr in self._NFLAGS:
5119
      setattr(new_node, attr, getattr(self.op, attr))
5120

    
5121
    # notify the user about any possible mc promotion
5122
    if new_node.master_candidate:
5123
      self.LogInfo("Node will be a master candidate")
5124

    
5125
    if self.op.ndparams:
5126
      new_node.ndparams = self.op.ndparams
5127
    else:
5128
      new_node.ndparams = {}
5129

    
5130
    # Add node to our /etc/hosts, and add key to known_hosts
5131
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5132
      master_node = self.cfg.GetMasterNode()
5133
      result = self.rpc.call_etc_hosts_modify(master_node,
5134
                                              constants.ETC_HOSTS_ADD,
5135
                                              self.hostname.name,
5136
                                              self.hostname.ip)
5137
      result.Raise("Can't update hosts file with new host data")
5138

    
5139
    if new_node.secondary_ip != new_node.primary_ip:
5140
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5141
                               False)
5142

    
5143
    node_verify_list = [self.cfg.GetMasterNode()]
5144
    node_verify_param = {
5145
      constants.NV_NODELIST: ([node], {}),
5146
      # TODO: do a node-net-test as well?
5147
    }
5148

    
5149
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5150
                                       self.cfg.GetClusterName())
5151
    for verifier in node_verify_list:
5152
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5153
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5154
      if nl_payload:
5155
        for failed in nl_payload:
5156
          feedback_fn("ssh/hostname verification failed"
5157
                      " (checking from %s): %s" %
5158
                      (verifier, nl_payload[failed]))
5159
        raise errors.OpExecError("ssh/hostname verification failed")
5160

    
5161
    if self.op.readd:
5162
      _RedistributeAncillaryFiles(self)
5163
      self.context.ReaddNode(new_node)
5164
      # make sure we redistribute the config
5165
      self.cfg.Update(new_node, feedback_fn)
5166
      # and make sure the new node will not have old files around
5167
      if not new_node.master_candidate:
5168
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5169
        msg = result.fail_msg
5170
        if msg:
5171
          self.LogWarning("Node failed to demote itself from master"
5172
                          " candidate status: %s" % msg)
5173
    else:
5174
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5175
                                  additional_vm=self.op.vm_capable)
5176
      self.context.AddNode(new_node, self.proc.GetECId())
5177

    
5178

    
5179
class LUNodeSetParams(LogicalUnit):
5180
  """Modifies the parameters of a node.
5181

5182
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5183
      to the node role (as _ROLE_*)
5184
  @cvar _R2F: a dictionary from node role to tuples of flags
5185
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5186

5187
  """
5188
  HPATH = "node-modify"
5189
  HTYPE = constants.HTYPE_NODE
5190
  REQ_BGL = False
5191
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5192
  _F2R = {
5193
    (True, False, False): _ROLE_CANDIDATE,
5194
    (False, True, False): _ROLE_DRAINED,
5195
    (False, False, True): _ROLE_OFFLINE,
5196
    (False, False, False): _ROLE_REGULAR,
5197
    }
5198
  _R2F = dict((v, k) for k, v in _F2R.items())
5199
  _FLAGS = ["master_candidate", "drained", "offline"]
5200

    
5201
  def CheckArguments(self):
5202
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5203
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5204
                self.op.master_capable, self.op.vm_capable,
5205
                self.op.secondary_ip, self.op.ndparams]
5206
    if all_mods.count(None) == len(all_mods):
5207
      raise errors.OpPrereqError("Please pass at least one modification",
5208
                                 errors.ECODE_INVAL)
5209
    if all_mods.count(True) > 1:
5210
      raise errors.OpPrereqError("Can't set the node into more than one"
5211
                                 " state at the same time",
5212
                                 errors.ECODE_INVAL)
5213

    
5214
    # Boolean value that tells us whether we might be demoting from MC
5215
    self.might_demote = (self.op.master_candidate == False or
5216
                         self.op.offline == True or
5217
                         self.op.drained == True or
5218
                         self.op.master_capable == False)
5219

    
5220
    if self.op.secondary_ip:
5221
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5222
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5223
                                   " address" % self.op.secondary_ip,
5224
                                   errors.ECODE_INVAL)
5225

    
5226
    self.lock_all = self.op.auto_promote and self.might_demote
5227
    self.lock_instances = self.op.secondary_ip is not None
5228

    
5229
  def ExpandNames(self):
5230
    if self.lock_all:
5231
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5232
    else:
5233
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5234

    
5235
    if self.lock_instances:
5236
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5237

    
5238
  def DeclareLocks(self, level):
5239
    # If we have locked all instances, before waiting to lock nodes, release
5240
    # all the ones living on nodes unrelated to the current operation.
5241
    if level == locking.LEVEL_NODE and self.lock_instances:
5242
      self.affected_instances = []
5243
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5244
        instances_keep = []
5245

    
5246
        # Build list of instances to release
5247
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5248
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5249
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5250
              self.op.node_name in instance.all_nodes):
5251
            instances_keep.append(instance_name)
5252
            self.affected_instances.append(instance)
5253

    
5254
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5255

    
5256
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5257
                set(instances_keep))
5258

    
5259
  def BuildHooksEnv(self):
5260
    """Build hooks env.
5261

5262
    This runs on the master node.
5263

5264
    """
5265
    return {
5266
      "OP_TARGET": self.op.node_name,
5267
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5268
      "OFFLINE": str(self.op.offline),
5269
      "DRAINED": str(self.op.drained),
5270
      "MASTER_CAPABLE": str(self.op.master_capable),
5271
      "VM_CAPABLE": str(self.op.vm_capable),
5272
      }
5273

    
5274
  def BuildHooksNodes(self):
5275
    """Build hooks nodes.
5276

5277
    """
5278
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5279
    return (nl, nl)
5280

    
5281
  def CheckPrereq(self):
5282
    """Check prerequisites.
5283

5284
    This only checks the instance list against the existing names.
5285

5286
    """
5287
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5288

    
5289
    if (self.op.master_candidate is not None or
5290
        self.op.drained is not None or
5291
        self.op.offline is not None):
5292
      # we can't change the master's node flags
5293
      if self.op.node_name == self.cfg.GetMasterNode():
5294
        raise errors.OpPrereqError("The master role can be changed"
5295
                                   " only via master-failover",
5296
                                   errors.ECODE_INVAL)
5297

    
5298
    if self.op.master_candidate and not node.master_capable:
5299
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5300
                                 " it a master candidate" % node.name,
5301
                                 errors.ECODE_STATE)
5302

    
5303
    if self.op.vm_capable == False:
5304
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5305
      if ipri or isec:
5306
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5307
                                   " the vm_capable flag" % node.name,
5308
                                   errors.ECODE_STATE)
5309

    
5310
    if node.master_candidate and self.might_demote and not self.lock_all:
5311
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5312
      # check if after removing the current node, we're missing master
5313
      # candidates
5314
      (mc_remaining, mc_should, _) = \
5315
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5316
      if mc_remaining < mc_should:
5317
        raise errors.OpPrereqError("Not enough master candidates, please"
5318
                                   " pass auto promote option to allow"
5319
                                   " promotion", errors.ECODE_STATE)
5320

    
5321
    self.old_flags = old_flags = (node.master_candidate,
5322
                                  node.drained, node.offline)
5323
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5324
    self.old_role = old_role = self._F2R[old_flags]
5325

    
5326
    # Check for ineffective changes
5327
    for attr in self._FLAGS:
5328
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5329
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5330
        setattr(self.op, attr, None)
5331

    
5332
    # Past this point, any flag change to False means a transition
5333
    # away from the respective state, as only real changes are kept
5334

    
5335
    # TODO: We might query the real power state if it supports OOB
5336
    if _SupportsOob(self.cfg, node):
5337
      if self.op.offline is False and not (node.powered or
5338
                                           self.op.powered == True):
5339
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5340
                                    " offline status can be reset") %
5341
                                   self.op.node_name)
5342
    elif self.op.powered is not None:
5343
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5344
                                  " as it does not support out-of-band"
5345
                                  " handling") % self.op.node_name)
5346

    
5347
    # If we're being deofflined/drained, we'll MC ourself if needed
5348
    if (self.op.drained == False or self.op.offline == False or
5349
        (self.op.master_capable and not node.master_capable)):
5350
      if _DecideSelfPromotion(self):
5351
        self.op.master_candidate = True
5352
        self.LogInfo("Auto-promoting node to master candidate")
5353

    
5354
    # If we're no longer master capable, we'll demote ourselves from MC
5355
    if self.op.master_capable == False and node.master_candidate:
5356
      self.LogInfo("Demoting from master candidate")
5357
      self.op.master_candidate = False
5358

    
5359
    # Compute new role
5360
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5361
    if self.op.master_candidate:
5362
      new_role = self._ROLE_CANDIDATE
5363
    elif self.op.drained:
5364
      new_role = self._ROLE_DRAINED
5365
    elif self.op.offline:
5366
      new_role = self._ROLE_OFFLINE
5367
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5368
      # False is still in new flags, which means we're un-setting (the
5369
      # only) True flag
5370
      new_role = self._ROLE_REGULAR
5371
    else: # no new flags, nothing, keep old role
5372
      new_role = old_role
5373

    
5374
    self.new_role = new_role
5375

    
5376
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5377
      # Trying to transition out of offline status
5378
      result = self.rpc.call_version([node.name])[node.name]
5379
      if result.fail_msg:
5380
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5381
                                   " to report its version: %s" %
5382
                                   (node.name, result.fail_msg),
5383
                                   errors.ECODE_STATE)
5384
      else:
5385
        self.LogWarning("Transitioning node from offline to online state"
5386
                        " without using re-add. Please make sure the node"
5387
                        " is healthy!")
5388

    
5389
    if self.op.secondary_ip:
5390
      # Ok even without locking, because this can't be changed by any LU
5391
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5392
      master_singlehomed = master.secondary_ip == master.primary_ip
5393
      if master_singlehomed and self.op.secondary_ip:
5394
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5395
                                   " homed cluster", errors.ECODE_INVAL)
5396

    
5397
      if node.offline:
5398
        if self.affected_instances:
5399
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5400
                                     " node has instances (%s) configured"
5401
                                     " to use it" % self.affected_instances)
5402
      else:
5403
        # On online nodes, check that no instances are running, and that
5404
        # the node has the new ip and we can reach it.
5405
        for instance in self.affected_instances:
5406
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5407

    
5408
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5409
        if master.name != node.name:
5410
          # check reachability from master secondary ip to new secondary ip
5411
          if not netutils.TcpPing(self.op.secondary_ip,
5412
                                  constants.DEFAULT_NODED_PORT,
5413
                                  source=master.secondary_ip):
5414
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5415
                                       " based ping to node daemon port",
5416
                                       errors.ECODE_ENVIRON)
5417

    
5418
    if self.op.ndparams:
5419
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5420
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5421
      self.new_ndparams = new_ndparams
5422

    
5423
  def Exec(self, feedback_fn):
5424
    """Modifies a node.
5425

5426
    """
5427
    node = self.node
5428
    old_role = self.old_role
5429
    new_role = self.new_role
5430

    
5431
    result = []
5432

    
5433
    if self.op.ndparams:
5434
      node.ndparams = self.new_ndparams
5435

    
5436
    if self.op.powered is not None:
5437
      node.powered = self.op.powered
5438

    
5439
    for attr in ["master_capable", "vm_capable"]:
5440
      val = getattr(self.op, attr)
5441
      if val is not None:
5442
        setattr(node, attr, val)
5443
        result.append((attr, str(val)))
5444

    
5445
    if new_role != old_role:
5446
      # Tell the node to demote itself, if no longer MC and not offline
5447
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5448
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5449
        if msg:
5450
          self.LogWarning("Node failed to demote itself: %s", msg)
5451

    
5452
      new_flags = self._R2F[new_role]
5453
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5454
        if of != nf:
5455
          result.append((desc, str(nf)))
5456
      (node.master_candidate, node.drained, node.offline) = new_flags
5457

    
5458
      # we locked all nodes, we adjust the CP before updating this node
5459
      if self.lock_all:
5460
        _AdjustCandidatePool(self, [node.name])
5461

    
5462
    if self.op.secondary_ip:
5463
      node.secondary_ip = self.op.secondary_ip
5464
      result.append(("secondary_ip", self.op.secondary_ip))
5465

    
5466
    # this will trigger configuration file update, if needed
5467
    self.cfg.Update(node, feedback_fn)
5468

    
5469
    # this will trigger job queue propagation or cleanup if the mc
5470
    # flag changed
5471
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5472
      self.context.ReaddNode(node)
5473

    
5474
    return result
5475

    
5476

    
5477
class LUNodePowercycle(NoHooksLU):
5478
  """Powercycles a node.
5479

5480
  """
5481
  REQ_BGL = False
5482

    
5483
  def CheckArguments(self):
5484
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5485
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5486
      raise errors.OpPrereqError("The node is the master and the force"
5487
                                 " parameter was not set",
5488
                                 errors.ECODE_INVAL)
5489

    
5490
  def ExpandNames(self):
5491
    """Locking for PowercycleNode.
5492

5493
    This is a last-resort option and shouldn't block on other
5494
    jobs. Therefore, we grab no locks.
5495

5496
    """
5497
    self.needed_locks = {}
5498

    
5499
  def Exec(self, feedback_fn):
5500
    """Reboots a node.
5501

5502
    """
5503
    result = self.rpc.call_node_powercycle(self.op.node_name,
5504
                                           self.cfg.GetHypervisorType())
5505
    result.Raise("Failed to schedule the reboot")
5506
    return result.payload
5507

    
5508

    
5509
class LUClusterQuery(NoHooksLU):
5510
  """Query cluster configuration.
5511

5512
  """
5513
  REQ_BGL = False
5514

    
5515
  def ExpandNames(self):
5516
    self.needed_locks = {}
5517

    
5518
  def Exec(self, feedback_fn):
5519
    """Return cluster config.
5520

5521
    """
5522
    cluster = self.cfg.GetClusterInfo()
5523
    os_hvp = {}
5524

    
5525
    # Filter just for enabled hypervisors
5526
    for os_name, hv_dict in cluster.os_hvp.items():
5527
      os_hvp[os_name] = {}
5528
      for hv_name, hv_params in hv_dict.items():
5529
        if hv_name in cluster.enabled_hypervisors:
5530
          os_hvp[os_name][hv_name] = hv_params
5531

    
5532
    # Convert ip_family to ip_version
5533
    primary_ip_version = constants.IP4_VERSION
5534
    if cluster.primary_ip_family == netutils.IP6Address.family:
5535
      primary_ip_version = constants.IP6_VERSION
5536

    
5537
    result = {
5538
      "software_version": constants.RELEASE_VERSION,
5539
      "protocol_version": constants.PROTOCOL_VERSION,
5540
      "config_version": constants.CONFIG_VERSION,
5541
      "os_api_version": max(constants.OS_API_VERSIONS),
5542
      "export_version": constants.EXPORT_VERSION,
5543
      "architecture": runtime.GetArchInfo(),
5544
      "name": cluster.cluster_name,
5545
      "master": cluster.master_node,
5546
      "default_hypervisor": cluster.enabled_hypervisors[0],
5547
      "enabled_hypervisors": cluster.enabled_hypervisors,
5548
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5549
                        for hypervisor_name in cluster.enabled_hypervisors]),
5550
      "os_hvp": os_hvp,
5551
      "beparams": cluster.beparams,
5552
      "osparams": cluster.osparams,
5553
      "nicparams": cluster.nicparams,
5554
      "ndparams": cluster.ndparams,
5555
      "candidate_pool_size": cluster.candidate_pool_size,
5556
      "master_netdev": cluster.master_netdev,
5557
      "volume_group_name": cluster.volume_group_name,
5558
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5559
      "file_storage_dir": cluster.file_storage_dir,
5560
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5561
      "maintain_node_health": cluster.maintain_node_health,
5562
      "ctime": cluster.ctime,
5563
      "mtime": cluster.mtime,
5564
      "uuid": cluster.uuid,
5565
      "tags": list(cluster.GetTags()),
5566
      "uid_pool": cluster.uid_pool,
5567
      "default_iallocator": cluster.default_iallocator,
5568
      "reserved_lvs": cluster.reserved_lvs,
5569
      "primary_ip_version": primary_ip_version,
5570
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5571
      "hidden_os": cluster.hidden_os,
5572
      "blacklisted_os": cluster.blacklisted_os,
5573
      }
5574

    
5575
    return result
5576

    
5577

    
5578
class LUClusterConfigQuery(NoHooksLU):
5579
  """Return configuration values.
5580

5581
  """
5582
  REQ_BGL = False
5583
  _FIELDS_DYNAMIC = utils.FieldSet()
5584
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5585
                                  "watcher_pause", "volume_group_name")
5586

    
5587
  def CheckArguments(self):
5588
    _CheckOutputFields(static=self._FIELDS_STATIC,
5589
                       dynamic=self._FIELDS_DYNAMIC,
5590
                       selected=self.op.output_fields)
5591

    
5592
  def ExpandNames(self):
5593
    self.needed_locks = {}
5594

    
5595
  def Exec(self, feedback_fn):
5596
    """Dump a representation of the cluster config to the standard output.
5597

5598
    """
5599
    values = []
5600
    for field in self.op.output_fields:
5601
      if field == "cluster_name":
5602
        entry = self.cfg.GetClusterName()
5603
      elif field == "master_node":
5604
        entry = self.cfg.GetMasterNode()
5605
      elif field == "drain_flag":
5606
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5607
      elif field == "watcher_pause":
5608
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5609
      elif field == "volume_group_name":
5610
        entry = self.cfg.GetVGName()
5611
      else:
5612
        raise errors.ParameterError(field)
5613
      values.append(entry)
5614
    return values
5615

    
5616

    
5617
class LUInstanceActivateDisks(NoHooksLU):
5618
  """Bring up an instance's disks.
5619

5620
  """
5621
  REQ_BGL = False
5622

    
5623
  def ExpandNames(self):
5624
    self._ExpandAndLockInstance()
5625
    self.needed_locks[locking.LEVEL_NODE] = []
5626
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5627

    
5628
  def DeclareLocks(self, level):
5629
    if level == locking.LEVEL_NODE:
5630
      self._LockInstancesNodes()
5631

    
5632
  def CheckPrereq(self):
5633
    """Check prerequisites.
5634

5635
    This checks that the instance is in the cluster.
5636

5637
    """
5638
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5639
    assert self.instance is not None, \
5640
      "Cannot retrieve locked instance %s" % self.op.instance_name
5641
    _CheckNodeOnline(self, self.instance.primary_node)
5642

    
5643
  def Exec(self, feedback_fn):
5644
    """Activate the disks.
5645

5646
    """
5647
    disks_ok, disks_info = \
5648
              _AssembleInstanceDisks(self, self.instance,
5649
                                     ignore_size=self.op.ignore_size)
5650
    if not disks_ok:
5651
      raise errors.OpExecError("Cannot activate block devices")
5652

    
5653
    return disks_info
5654

    
5655

    
5656
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5657
                           ignore_size=False):
5658
  """Prepare the block devices for an instance.
5659

5660
  This sets up the block devices on all nodes.
5661

5662
  @type lu: L{LogicalUnit}
5663
  @param lu: the logical unit on whose behalf we execute
5664
  @type instance: L{objects.Instance}
5665
  @param instance: the instance for whose disks we assemble
5666
  @type disks: list of L{objects.Disk} or None
5667
  @param disks: which disks to assemble (or all, if None)
5668
  @type ignore_secondaries: boolean
5669
  @param ignore_secondaries: if true, errors on secondary nodes
5670
      won't result in an error return from the function
5671
  @type ignore_size: boolean
5672
  @param ignore_size: if true, the current known size of the disk
5673
      will not be used during the disk activation, useful for cases
5674
      when the size is wrong
5675
  @return: False if the operation failed, otherwise a list of
5676
      (host, instance_visible_name, node_visible_name)
5677
      with the mapping from node devices to instance devices
5678

5679
  """
5680
  device_info = []
5681
  disks_ok = True
5682
  iname = instance.name
5683
  disks = _ExpandCheckDisks(instance, disks)
5684

    
5685
  # With the two passes mechanism we try to reduce the window of
5686
  # opportunity for the race condition of switching DRBD to primary
5687
  # before handshaking occured, but we do not eliminate it
5688

    
5689
  # The proper fix would be to wait (with some limits) until the
5690
  # connection has been made and drbd transitions from WFConnection
5691
  # into any other network-connected state (Connected, SyncTarget,
5692
  # SyncSource, etc.)
5693

    
5694
  # 1st pass, assemble on all nodes in secondary mode
5695
  for idx, inst_disk in enumerate(disks):
5696
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5697
      if ignore_size:
5698
        node_disk = node_disk.Copy()
5699
        node_disk.UnsetSize()
5700
      lu.cfg.SetDiskID(node_disk, node)
5701
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5702
      msg = result.fail_msg
5703
      if msg:
5704
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5705
                           " (is_primary=False, pass=1): %s",
5706
                           inst_disk.iv_name, node, msg)
5707
        if not ignore_secondaries:
5708
          disks_ok = False
5709

    
5710
  # FIXME: race condition on drbd migration to primary
5711

    
5712
  # 2nd pass, do only the primary node
5713
  for idx, inst_disk in enumerate(disks):
5714
    dev_path = None
5715

    
5716
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5717
      if node != instance.primary_node:
5718
        continue
5719
      if ignore_size:
5720
        node_disk = node_disk.Copy()
5721
        node_disk.UnsetSize()
5722
      lu.cfg.SetDiskID(node_disk, node)
5723
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5724
      msg = result.fail_msg
5725
      if msg:
5726
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5727
                           " (is_primary=True, pass=2): %s",
5728
                           inst_disk.iv_name, node, msg)
5729
        disks_ok = False
5730
      else:
5731
        dev_path = result.payload
5732

    
5733
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5734

    
5735
  # leave the disks configured for the primary node
5736
  # this is a workaround that would be fixed better by
5737
  # improving the logical/physical id handling
5738
  for disk in disks:
5739
    lu.cfg.SetDiskID(disk, instance.primary_node)
5740

    
5741
  return disks_ok, device_info
5742

    
5743

    
5744
def _StartInstanceDisks(lu, instance, force):
5745
  """Start the disks of an instance.
5746

5747
  """
5748
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5749
                                           ignore_secondaries=force)
5750
  if not disks_ok:
5751
    _ShutdownInstanceDisks(lu, instance)
5752
    if force is not None and not force:
5753
      lu.proc.LogWarning("", hint="If the message above refers to a"
5754
                         " secondary node,"
5755
                         " you can retry the operation using '--force'.")
5756
    raise errors.OpExecError("Disk consistency error")
5757

    
5758

    
5759
class LUInstanceDeactivateDisks(NoHooksLU):
5760
  """Shutdown an instance's disks.
5761

5762
  """
5763
  REQ_BGL = False
5764

    
5765
  def ExpandNames(self):
5766
    self._ExpandAndLockInstance()
5767
    self.needed_locks[locking.LEVEL_NODE] = []
5768
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5769

    
5770
  def DeclareLocks(self, level):
5771
    if level == locking.LEVEL_NODE:
5772
      self._LockInstancesNodes()
5773

    
5774
  def CheckPrereq(self):
5775
    """Check prerequisites.
5776

5777
    This checks that the instance is in the cluster.
5778

5779
    """
5780
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5781
    assert self.instance is not None, \
5782
      "Cannot retrieve locked instance %s" % self.op.instance_name
5783

    
5784
  def Exec(self, feedback_fn):
5785
    """Deactivate the disks
5786

5787
    """
5788
    instance = self.instance
5789
    if self.op.force:
5790
      _ShutdownInstanceDisks(self, instance)
5791
    else:
5792
      _SafeShutdownInstanceDisks(self, instance)
5793

    
5794

    
5795
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5796
  """Shutdown block devices of an instance.
5797

5798
  This function checks if an instance is running, before calling
5799
  _ShutdownInstanceDisks.
5800

5801
  """
5802
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5803
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5804

    
5805

    
5806
def _ExpandCheckDisks(instance, disks):
5807
  """Return the instance disks selected by the disks list
5808

5809
  @type disks: list of L{objects.Disk} or None
5810
  @param disks: selected disks
5811
  @rtype: list of L{objects.Disk}
5812
  @return: selected instance disks to act on
5813

5814
  """
5815
  if disks is None:
5816
    return instance.disks
5817
  else:
5818
    if not set(disks).issubset(instance.disks):
5819
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5820
                                   " target instance")
5821
    return disks
5822

    
5823

    
5824
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5825
  """Shutdown block devices of an instance.
5826

5827
  This does the shutdown on all nodes of the instance.
5828

5829
  If the ignore_primary is false, errors on the primary node are
5830
  ignored.
5831

5832
  """
5833
  all_result = True
5834
  disks = _ExpandCheckDisks(instance, disks)
5835

    
5836
  for disk in disks:
5837
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5838
      lu.cfg.SetDiskID(top_disk, node)
5839
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5840
      msg = result.fail_msg
5841
      if msg:
5842
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5843
                      disk.iv_name, node, msg)
5844
        if ((node == instance.primary_node and not ignore_primary) or
5845
            (node != instance.primary_node and not result.offline)):
5846
          all_result = False
5847
  return all_result
5848

    
5849

    
5850
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5851
  """Checks if a node has enough free memory.
5852

5853
  This function check if a given node has the needed amount of free
5854
  memory. In case the node has less memory or we cannot get the
5855
  information from the node, this function raise an OpPrereqError
5856
  exception.
5857

5858
  @type lu: C{LogicalUnit}
5859
  @param lu: a logical unit from which we get configuration data
5860
  @type node: C{str}
5861
  @param node: the node to check
5862
  @type reason: C{str}
5863
  @param reason: string to use in the error message
5864
  @type requested: C{int}
5865
  @param requested: the amount of memory in MiB to check for
5866
  @type hypervisor_name: C{str}
5867
  @param hypervisor_name: the hypervisor to ask for memory stats
5868
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5869
      we cannot check the node
5870

5871
  """
5872
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5873
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5874
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5875
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5876
  if not isinstance(free_mem, int):
5877
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5878
                               " was '%s'" % (node, free_mem),
5879
                               errors.ECODE_ENVIRON)
5880
  if requested > free_mem:
5881
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5882
                               " needed %s MiB, available %s MiB" %
5883
                               (node, reason, requested, free_mem),
5884
                               errors.ECODE_NORES)
5885

    
5886

    
5887
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5888
  """Checks if nodes have enough free disk space in the all VGs.
5889

5890
  This function check if all given nodes have the needed amount of
5891
  free disk. In case any node has less disk or we cannot get the
5892
  information from the node, this function raise an OpPrereqError
5893
  exception.
5894

5895
  @type lu: C{LogicalUnit}
5896
  @param lu: a logical unit from which we get configuration data
5897
  @type nodenames: C{list}
5898
  @param nodenames: the list of node names to check
5899
  @type req_sizes: C{dict}
5900
  @param req_sizes: the hash of vg and corresponding amount of disk in
5901
      MiB to check for
5902
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5903
      or we cannot check the node
5904

5905
  """
5906
  for vg, req_size in req_sizes.items():
5907
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5908

    
5909

    
5910
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5911
  """Checks if nodes have enough free disk space in the specified VG.
5912

5913
  This function check if all given nodes have the needed amount of
5914
  free disk. In case any node has less disk or we cannot get the
5915
  information from the node, this function raise an OpPrereqError
5916
  exception.
5917

5918
  @type lu: C{LogicalUnit}
5919
  @param lu: a logical unit from which we get configuration data
5920
  @type nodenames: C{list}
5921
  @param nodenames: the list of node names to check
5922
  @type vg: C{str}
5923
  @param vg: the volume group to check
5924
  @type requested: C{int}
5925
  @param requested: the amount of disk in MiB to check for
5926
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5927
      or we cannot check the node
5928

5929
  """
5930
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5931
  for node in nodenames:
5932
    info = nodeinfo[node]
5933
    info.Raise("Cannot get current information from node %s" % node,
5934
               prereq=True, ecode=errors.ECODE_ENVIRON)
5935
    vg_free = info.payload.get("vg_free", None)
5936
    if not isinstance(vg_free, int):
5937
      raise errors.OpPrereqError("Can't compute free disk space on node"
5938
                                 " %s for vg %s, result was '%s'" %
5939
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5940
    if requested > vg_free:
5941
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5942
                                 " vg %s: required %d MiB, available %d MiB" %
5943
                                 (node, vg, requested, vg_free),
5944
                                 errors.ECODE_NORES)
5945

    
5946

    
5947
class LUInstanceStartup(LogicalUnit):
5948
  """Starts an instance.
5949

5950
  """
5951
  HPATH = "instance-start"
5952
  HTYPE = constants.HTYPE_INSTANCE
5953
  REQ_BGL = False
5954

    
5955
  def CheckArguments(self):
5956
    # extra beparams
5957
    if self.op.beparams:
5958
      # fill the beparams dict
5959
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5960

    
5961
  def ExpandNames(self):
5962
    self._ExpandAndLockInstance()
5963

    
5964
  def BuildHooksEnv(self):
5965
    """Build hooks env.
5966

5967
    This runs on master, primary and secondary nodes of the instance.
5968

5969
    """
5970
    env = {
5971
      "FORCE": self.op.force,
5972
      }
5973

    
5974
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5975

    
5976
    return env
5977

    
5978
  def BuildHooksNodes(self):
5979
    """Build hooks nodes.
5980

5981
    """
5982
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5983
    return (nl, nl)
5984

    
5985
  def CheckPrereq(self):
5986
    """Check prerequisites.
5987

5988
    This checks that the instance is in the cluster.
5989

5990
    """
5991
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5992
    assert self.instance is not None, \
5993
      "Cannot retrieve locked instance %s" % self.op.instance_name
5994

    
5995
    # extra hvparams
5996
    if self.op.hvparams:
5997
      # check hypervisor parameter syntax (locally)
5998
      cluster = self.cfg.GetClusterInfo()
5999
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6000
      filled_hvp = cluster.FillHV(instance)
6001
      filled_hvp.update(self.op.hvparams)
6002
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6003
      hv_type.CheckParameterSyntax(filled_hvp)
6004
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6005

    
6006
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6007

    
6008
    if self.primary_offline and self.op.ignore_offline_nodes:
6009
      self.proc.LogWarning("Ignoring offline primary node")
6010

    
6011
      if self.op.hvparams or self.op.beparams:
6012
        self.proc.LogWarning("Overridden parameters are ignored")
6013
    else:
6014
      _CheckNodeOnline(self, instance.primary_node)
6015

    
6016
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6017

    
6018
      # check bridges existence
6019
      _CheckInstanceBridgesExist(self, instance)
6020

    
6021
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6022
                                                instance.name,
6023
                                                instance.hypervisor)
6024
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6025
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6026
      if not remote_info.payload: # not running already
6027
        _CheckNodeFreeMemory(self, instance.primary_node,
6028
                             "starting instance %s" % instance.name,
6029
                             bep[constants.BE_MEMORY], instance.hypervisor)
6030

    
6031
  def Exec(self, feedback_fn):
6032
    """Start the instance.
6033

6034
    """
6035
    instance = self.instance
6036
    force = self.op.force
6037

    
6038
    if not self.op.no_remember:
6039
      self.cfg.MarkInstanceUp(instance.name)
6040

    
6041
    if self.primary_offline:
6042
      assert self.op.ignore_offline_nodes
6043
      self.proc.LogInfo("Primary node offline, marked instance as started")
6044
    else:
6045
      node_current = instance.primary_node
6046

    
6047
      _StartInstanceDisks(self, instance, force)
6048

    
6049
      result = self.rpc.call_instance_start(node_current, instance,
6050
                                            self.op.hvparams, self.op.beparams,
6051
                                            self.op.startup_paused)
6052
      msg = result.fail_msg
6053
      if msg:
6054
        _ShutdownInstanceDisks(self, instance)
6055
        raise errors.OpExecError("Could not start instance: %s" % msg)
6056

    
6057

    
6058
class LUInstanceReboot(LogicalUnit):
6059
  """Reboot an instance.
6060

6061
  """
6062
  HPATH = "instance-reboot"
6063
  HTYPE = constants.HTYPE_INSTANCE
6064
  REQ_BGL = False
6065

    
6066
  def ExpandNames(self):
6067
    self._ExpandAndLockInstance()
6068

    
6069
  def BuildHooksEnv(self):
6070
    """Build hooks env.
6071

6072
    This runs on master, primary and secondary nodes of the instance.
6073

6074
    """
6075
    env = {
6076
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6077
      "REBOOT_TYPE": self.op.reboot_type,
6078
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6079
      }
6080

    
6081
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6082

    
6083
    return env
6084

    
6085
  def BuildHooksNodes(self):
6086
    """Build hooks nodes.
6087

6088
    """
6089
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6090
    return (nl, nl)
6091

    
6092
  def CheckPrereq(self):
6093
    """Check prerequisites.
6094

6095
    This checks that the instance is in the cluster.
6096

6097
    """
6098
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6099
    assert self.instance is not None, \
6100
      "Cannot retrieve locked instance %s" % self.op.instance_name
6101

    
6102
    _CheckNodeOnline(self, instance.primary_node)
6103

    
6104
    # check bridges existence
6105
    _CheckInstanceBridgesExist(self, instance)
6106

    
6107
  def Exec(self, feedback_fn):
6108
    """Reboot the instance.
6109

6110
    """
6111
    instance = self.instance
6112
    ignore_secondaries = self.op.ignore_secondaries
6113
    reboot_type = self.op.reboot_type
6114

    
6115
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6116
                                              instance.name,
6117
                                              instance.hypervisor)
6118
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6119
    instance_running = bool(remote_info.payload)
6120

    
6121
    node_current = instance.primary_node
6122

    
6123
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6124
                                            constants.INSTANCE_REBOOT_HARD]:
6125
      for disk in instance.disks:
6126
        self.cfg.SetDiskID(disk, node_current)
6127
      result = self.rpc.call_instance_reboot(node_current, instance,
6128
                                             reboot_type,
6129
                                             self.op.shutdown_timeout)
6130
      result.Raise("Could not reboot instance")
6131
    else:
6132
      if instance_running:
6133
        result = self.rpc.call_instance_shutdown(node_current, instance,
6134
                                                 self.op.shutdown_timeout)
6135
        result.Raise("Could not shutdown instance for full reboot")
6136
        _ShutdownInstanceDisks(self, instance)
6137
      else:
6138
        self.LogInfo("Instance %s was already stopped, starting now",
6139
                     instance.name)
6140
      _StartInstanceDisks(self, instance, ignore_secondaries)
6141
      result = self.rpc.call_instance_start(node_current, instance,
6142
                                            None, None, False)
6143
      msg = result.fail_msg
6144
      if msg:
6145
        _ShutdownInstanceDisks(self, instance)
6146
        raise errors.OpExecError("Could not start instance for"
6147
                                 " full reboot: %s" % msg)
6148

    
6149
    self.cfg.MarkInstanceUp(instance.name)
6150

    
6151

    
6152
class LUInstanceShutdown(LogicalUnit):
6153
  """Shutdown an instance.
6154

6155
  """
6156
  HPATH = "instance-stop"
6157
  HTYPE = constants.HTYPE_INSTANCE
6158
  REQ_BGL = False
6159

    
6160
  def ExpandNames(self):
6161
    self._ExpandAndLockInstance()
6162

    
6163
  def BuildHooksEnv(self):
6164
    """Build hooks env.
6165

6166
    This runs on master, primary and secondary nodes of the instance.
6167

6168
    """
6169
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6170
    env["TIMEOUT"] = self.op.timeout
6171
    return env
6172

    
6173
  def BuildHooksNodes(self):
6174
    """Build hooks nodes.
6175

6176
    """
6177
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6178
    return (nl, nl)
6179

    
6180
  def CheckPrereq(self):
6181
    """Check prerequisites.
6182

6183
    This checks that the instance is in the cluster.
6184

6185
    """
6186
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6187
    assert self.instance is not None, \
6188
      "Cannot retrieve locked instance %s" % self.op.instance_name
6189

    
6190
    self.primary_offline = \
6191
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6192

    
6193
    if self.primary_offline and self.op.ignore_offline_nodes:
6194
      self.proc.LogWarning("Ignoring offline primary node")
6195
    else:
6196
      _CheckNodeOnline(self, self.instance.primary_node)
6197

    
6198
  def Exec(self, feedback_fn):
6199
    """Shutdown the instance.
6200

6201
    """
6202
    instance = self.instance
6203
    node_current = instance.primary_node
6204
    timeout = self.op.timeout
6205

    
6206
    if not self.op.no_remember:
6207
      self.cfg.MarkInstanceDown(instance.name)
6208

    
6209
    if self.primary_offline:
6210
      assert self.op.ignore_offline_nodes
6211
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6212
    else:
6213
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6214
      msg = result.fail_msg
6215
      if msg:
6216
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6217

    
6218
      _ShutdownInstanceDisks(self, instance)
6219

    
6220

    
6221
class LUInstanceReinstall(LogicalUnit):
6222
  """Reinstall an instance.
6223

6224
  """
6225
  HPATH = "instance-reinstall"
6226
  HTYPE = constants.HTYPE_INSTANCE
6227
  REQ_BGL = False
6228

    
6229
  def ExpandNames(self):
6230
    self._ExpandAndLockInstance()
6231

    
6232
  def BuildHooksEnv(self):
6233
    """Build hooks env.
6234

6235
    This runs on master, primary and secondary nodes of the instance.
6236

6237
    """
6238
    return _BuildInstanceHookEnvByObject(self, self.instance)
6239

    
6240
  def BuildHooksNodes(self):
6241
    """Build hooks nodes.
6242

6243
    """
6244
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6245
    return (nl, nl)
6246

    
6247
  def CheckPrereq(self):
6248
    """Check prerequisites.
6249

6250
    This checks that the instance is in the cluster and is not running.
6251

6252
    """
6253
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6254
    assert instance is not None, \
6255
      "Cannot retrieve locked instance %s" % self.op.instance_name
6256
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6257
                     " offline, cannot reinstall")
6258
    for node in instance.secondary_nodes:
6259
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6260
                       " cannot reinstall")
6261

    
6262
    if instance.disk_template == constants.DT_DISKLESS:
6263
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6264
                                 self.op.instance_name,
6265
                                 errors.ECODE_INVAL)
6266
    _CheckInstanceDown(self, instance, "cannot reinstall")
6267

    
6268
    if self.op.os_type is not None:
6269
      # OS verification
6270
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6271
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6272
      instance_os = self.op.os_type
6273
    else:
6274
      instance_os = instance.os
6275

    
6276
    nodelist = list(instance.all_nodes)
6277

    
6278
    if self.op.osparams:
6279
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6280
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6281
      self.os_inst = i_osdict # the new dict (without defaults)
6282
    else:
6283
      self.os_inst = None
6284

    
6285
    self.instance = instance
6286

    
6287
  def Exec(self, feedback_fn):
6288
    """Reinstall the instance.
6289

6290
    """
6291
    inst = self.instance
6292

    
6293
    if self.op.os_type is not None:
6294
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6295
      inst.os = self.op.os_type
6296
      # Write to configuration
6297
      self.cfg.Update(inst, feedback_fn)
6298

    
6299
    _StartInstanceDisks(self, inst, None)
6300
    try:
6301
      feedback_fn("Running the instance OS create scripts...")
6302
      # FIXME: pass debug option from opcode to backend
6303
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6304
                                             self.op.debug_level,
6305
                                             osparams=self.os_inst)
6306
      result.Raise("Could not install OS for instance %s on node %s" %
6307
                   (inst.name, inst.primary_node))
6308
    finally:
6309
      _ShutdownInstanceDisks(self, inst)
6310

    
6311

    
6312
class LUInstanceRecreateDisks(LogicalUnit):
6313
  """Recreate an instance's missing disks.
6314

6315
  """
6316
  HPATH = "instance-recreate-disks"
6317
  HTYPE = constants.HTYPE_INSTANCE
6318
  REQ_BGL = False
6319

    
6320
  def CheckArguments(self):
6321
    # normalise the disk list
6322
    self.op.disks = sorted(frozenset(self.op.disks))
6323

    
6324
  def ExpandNames(self):
6325
    self._ExpandAndLockInstance()
6326
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6327
    if self.op.nodes:
6328
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6329
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6330
    else:
6331
      self.needed_locks[locking.LEVEL_NODE] = []
6332

    
6333
  def DeclareLocks(self, level):
6334
    if level == locking.LEVEL_NODE:
6335
      # if we replace the nodes, we only need to lock the old primary,
6336
      # otherwise we need to lock all nodes for disk re-creation
6337
      primary_only = bool(self.op.nodes)
6338
      self._LockInstancesNodes(primary_only=primary_only)
6339

    
6340
  def BuildHooksEnv(self):
6341
    """Build hooks env.
6342

6343
    This runs on master, primary and secondary nodes of the instance.
6344

6345
    """
6346
    return _BuildInstanceHookEnvByObject(self, self.instance)
6347

    
6348
  def BuildHooksNodes(self):
6349
    """Build hooks nodes.
6350

6351
    """
6352
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6353
    return (nl, nl)
6354

    
6355
  def CheckPrereq(self):
6356
    """Check prerequisites.
6357

6358
    This checks that the instance is in the cluster and is not running.
6359

6360
    """
6361
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6362
    assert instance is not None, \
6363
      "Cannot retrieve locked instance %s" % self.op.instance_name
6364
    if self.op.nodes:
6365
      if len(self.op.nodes) != len(instance.all_nodes):
6366
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6367
                                   " %d replacement nodes were specified" %
6368
                                   (instance.name, len(instance.all_nodes),
6369
                                    len(self.op.nodes)),
6370
                                   errors.ECODE_INVAL)
6371
      assert instance.disk_template != constants.DT_DRBD8 or \
6372
          len(self.op.nodes) == 2
6373
      assert instance.disk_template != constants.DT_PLAIN or \
6374
          len(self.op.nodes) == 1
6375
      primary_node = self.op.nodes[0]
6376
    else:
6377
      primary_node = instance.primary_node
6378
    _CheckNodeOnline(self, primary_node)
6379

    
6380
    if instance.disk_template == constants.DT_DISKLESS:
6381
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6382
                                 self.op.instance_name, errors.ECODE_INVAL)
6383
    # if we replace nodes *and* the old primary is offline, we don't
6384
    # check
6385
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6386
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6387
    if not (self.op.nodes and old_pnode.offline):
6388
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6389

    
6390
    if not self.op.disks:
6391
      self.op.disks = range(len(instance.disks))
6392
    else:
6393
      for idx in self.op.disks:
6394
        if idx >= len(instance.disks):
6395
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6396
                                     errors.ECODE_INVAL)
6397
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6398
      raise errors.OpPrereqError("Can't recreate disks partially and"
6399
                                 " change the nodes at the same time",
6400
                                 errors.ECODE_INVAL)
6401
    self.instance = instance
6402

    
6403
  def Exec(self, feedback_fn):
6404
    """Recreate the disks.
6405

6406
    """
6407
    instance = self.instance
6408

    
6409
    to_skip = []
6410
    mods = [] # keeps track of needed logical_id changes
6411

    
6412
    for idx, disk in enumerate(instance.disks):
6413
      if idx not in self.op.disks: # disk idx has not been passed in
6414
        to_skip.append(idx)
6415
        continue
6416
      # update secondaries for disks, if needed
6417
      if self.op.nodes:
6418
        if disk.dev_type == constants.LD_DRBD8:
6419
          # need to update the nodes and minors
6420
          assert len(self.op.nodes) == 2
6421
          assert len(disk.logical_id) == 6 # otherwise disk internals
6422
                                           # have changed
6423
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6424
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6425
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6426
                    new_minors[0], new_minors[1], old_secret)
6427
          assert len(disk.logical_id) == len(new_id)
6428
          mods.append((idx, new_id))
6429

    
6430
    # now that we have passed all asserts above, we can apply the mods
6431
    # in a single run (to avoid partial changes)
6432
    for idx, new_id in mods:
6433
      instance.disks[idx].logical_id = new_id
6434

    
6435
    # change primary node, if needed
6436
    if self.op.nodes:
6437
      instance.primary_node = self.op.nodes[0]
6438
      self.LogWarning("Changing the instance's nodes, you will have to"
6439
                      " remove any disks left on the older nodes manually")
6440

    
6441
    if self.op.nodes:
6442
      self.cfg.Update(instance, feedback_fn)
6443

    
6444
    _CreateDisks(self, instance, to_skip=to_skip)
6445

    
6446

    
6447
class LUInstanceRename(LogicalUnit):
6448
  """Rename an instance.
6449

6450
  """
6451
  HPATH = "instance-rename"
6452
  HTYPE = constants.HTYPE_INSTANCE
6453

    
6454
  def CheckArguments(self):
6455
    """Check arguments.
6456

6457
    """
6458
    if self.op.ip_check and not self.op.name_check:
6459
      # TODO: make the ip check more flexible and not depend on the name check
6460
      raise errors.OpPrereqError("IP address check requires a name check",
6461
                                 errors.ECODE_INVAL)
6462

    
6463
  def BuildHooksEnv(self):
6464
    """Build hooks env.
6465

6466
    This runs on master, primary and secondary nodes of the instance.
6467

6468
    """
6469
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6470
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6471
    return env
6472

    
6473
  def BuildHooksNodes(self):
6474
    """Build hooks nodes.
6475

6476
    """
6477
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6478
    return (nl, nl)
6479

    
6480
  def CheckPrereq(self):
6481
    """Check prerequisites.
6482

6483
    This checks that the instance is in the cluster and is not running.
6484

6485
    """
6486
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6487
                                                self.op.instance_name)
6488
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6489
    assert instance is not None
6490
    _CheckNodeOnline(self, instance.primary_node)
6491
    _CheckInstanceDown(self, instance, "cannot rename")
6492
    self.instance = instance
6493

    
6494
    new_name = self.op.new_name
6495
    if self.op.name_check:
6496
      hostname = netutils.GetHostname(name=new_name)
6497
      if hostname.name != new_name:
6498
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6499
                     hostname.name)
6500
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6501
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6502
                                    " same as given hostname '%s'") %
6503
                                    (hostname.name, self.op.new_name),
6504
                                    errors.ECODE_INVAL)
6505
      new_name = self.op.new_name = hostname.name
6506
      if (self.op.ip_check and
6507
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6508
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6509
                                   (hostname.ip, new_name),
6510
                                   errors.ECODE_NOTUNIQUE)
6511

    
6512
    instance_list = self.cfg.GetInstanceList()
6513
    if new_name in instance_list and new_name != instance.name:
6514
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6515
                                 new_name, errors.ECODE_EXISTS)
6516

    
6517
  def Exec(self, feedback_fn):
6518
    """Rename the instance.
6519

6520
    """
6521
    inst = self.instance
6522
    old_name = inst.name
6523

    
6524
    rename_file_storage = False
6525
    if (inst.disk_template in constants.DTS_FILEBASED and
6526
        self.op.new_name != inst.name):
6527
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6528
      rename_file_storage = True
6529

    
6530
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6531
    # Change the instance lock. This is definitely safe while we hold the BGL.
6532
    # Otherwise the new lock would have to be added in acquired mode.
6533
    assert self.REQ_BGL
6534
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6535
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6536

    
6537
    # re-read the instance from the configuration after rename
6538
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6539

    
6540
    if rename_file_storage:
6541
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6542
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6543
                                                     old_file_storage_dir,
6544
                                                     new_file_storage_dir)
6545
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6546
                   " (but the instance has been renamed in Ganeti)" %
6547
                   (inst.primary_node, old_file_storage_dir,
6548
                    new_file_storage_dir))
6549

    
6550
    _StartInstanceDisks(self, inst, None)
6551
    try:
6552
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6553
                                                 old_name, self.op.debug_level)
6554
      msg = result.fail_msg
6555
      if msg:
6556
        msg = ("Could not run OS rename script for instance %s on node %s"
6557
               " (but the instance has been renamed in Ganeti): %s" %
6558
               (inst.name, inst.primary_node, msg))
6559
        self.proc.LogWarning(msg)
6560
    finally:
6561
      _ShutdownInstanceDisks(self, inst)
6562

    
6563
    return inst.name
6564

    
6565

    
6566
class LUInstanceRemove(LogicalUnit):
6567
  """Remove an instance.
6568

6569
  """
6570
  HPATH = "instance-remove"
6571
  HTYPE = constants.HTYPE_INSTANCE
6572
  REQ_BGL = False
6573

    
6574
  def ExpandNames(self):
6575
    self._ExpandAndLockInstance()
6576
    self.needed_locks[locking.LEVEL_NODE] = []
6577
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6578

    
6579
  def DeclareLocks(self, level):
6580
    if level == locking.LEVEL_NODE:
6581
      self._LockInstancesNodes()
6582

    
6583
  def BuildHooksEnv(self):
6584
    """Build hooks env.
6585

6586
    This runs on master, primary and secondary nodes of the instance.
6587

6588
    """
6589
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6590
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6591
    return env
6592

    
6593
  def BuildHooksNodes(self):
6594
    """Build hooks nodes.
6595

6596
    """
6597
    nl = [self.cfg.GetMasterNode()]
6598
    nl_post = list(self.instance.all_nodes) + nl
6599
    return (nl, nl_post)
6600

    
6601
  def CheckPrereq(self):
6602
    """Check prerequisites.
6603

6604
    This checks that the instance is in the cluster.
6605

6606
    """
6607
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6608
    assert self.instance is not None, \
6609
      "Cannot retrieve locked instance %s" % self.op.instance_name
6610

    
6611
  def Exec(self, feedback_fn):
6612
    """Remove the instance.
6613

6614
    """
6615
    instance = self.instance
6616
    logging.info("Shutting down instance %s on node %s",
6617
                 instance.name, instance.primary_node)
6618

    
6619
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6620
                                             self.op.shutdown_timeout)
6621
    msg = result.fail_msg
6622
    if msg:
6623
      if self.op.ignore_failures:
6624
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6625
      else:
6626
        raise errors.OpExecError("Could not shutdown instance %s on"
6627
                                 " node %s: %s" %
6628
                                 (instance.name, instance.primary_node, msg))
6629

    
6630
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6631

    
6632

    
6633
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6634
  """Utility function to remove an instance.
6635

6636
  """
6637
  logging.info("Removing block devices for instance %s", instance.name)
6638

    
6639
  if not _RemoveDisks(lu, instance):
6640
    if not ignore_failures:
6641
      raise errors.OpExecError("Can't remove instance's disks")
6642
    feedback_fn("Warning: can't remove instance's disks")
6643

    
6644
  logging.info("Removing instance %s out of cluster config", instance.name)
6645

    
6646
  lu.cfg.RemoveInstance(instance.name)
6647

    
6648
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6649
    "Instance lock removal conflict"
6650

    
6651
  # Remove lock for the instance
6652
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6653

    
6654

    
6655
class LUInstanceQuery(NoHooksLU):
6656
  """Logical unit for querying instances.
6657

6658
  """
6659
  # pylint: disable=W0142
6660
  REQ_BGL = False
6661

    
6662
  def CheckArguments(self):
6663
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6664
                             self.op.output_fields, self.op.use_locking)
6665

    
6666
  def ExpandNames(self):
6667
    self.iq.ExpandNames(self)
6668

    
6669
  def DeclareLocks(self, level):
6670
    self.iq.DeclareLocks(self, level)
6671

    
6672
  def Exec(self, feedback_fn):
6673
    return self.iq.OldStyleQuery(self)
6674

    
6675

    
6676
class LUInstanceFailover(LogicalUnit):
6677
  """Failover an instance.
6678

6679
  """
6680
  HPATH = "instance-failover"
6681
  HTYPE = constants.HTYPE_INSTANCE
6682
  REQ_BGL = False
6683

    
6684
  def CheckArguments(self):
6685
    """Check the arguments.
6686

6687
    """
6688
    self.iallocator = getattr(self.op, "iallocator", None)
6689
    self.target_node = getattr(self.op, "target_node", None)
6690

    
6691
  def ExpandNames(self):
6692
    self._ExpandAndLockInstance()
6693

    
6694
    if self.op.target_node is not None:
6695
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6696

    
6697
    self.needed_locks[locking.LEVEL_NODE] = []
6698
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6699

    
6700
    ignore_consistency = self.op.ignore_consistency
6701
    shutdown_timeout = self.op.shutdown_timeout
6702
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6703
                                       cleanup=False,
6704
                                       failover=True,
6705
                                       ignore_consistency=ignore_consistency,
6706
                                       shutdown_timeout=shutdown_timeout)
6707
    self.tasklets = [self._migrater]
6708

    
6709
  def DeclareLocks(self, level):
6710
    if level == locking.LEVEL_NODE:
6711
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6712
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6713
        if self.op.target_node is None:
6714
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6715
        else:
6716
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6717
                                                   self.op.target_node]
6718
        del self.recalculate_locks[locking.LEVEL_NODE]
6719
      else:
6720
        self._LockInstancesNodes()
6721

    
6722
  def BuildHooksEnv(self):
6723
    """Build hooks env.
6724

6725
    This runs on master, primary and secondary nodes of the instance.
6726

6727
    """
6728
    instance = self._migrater.instance
6729
    source_node = instance.primary_node
6730
    target_node = self.op.target_node
6731
    env = {
6732
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6733
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6734
      "OLD_PRIMARY": source_node,
6735
      "NEW_PRIMARY": target_node,
6736
      }
6737

    
6738
    if instance.disk_template in constants.DTS_INT_MIRROR:
6739
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6740
      env["NEW_SECONDARY"] = source_node
6741
    else:
6742
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6743

    
6744
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6745

    
6746
    return env
6747

    
6748
  def BuildHooksNodes(self):
6749
    """Build hooks nodes.
6750

6751
    """
6752
    instance = self._migrater.instance
6753
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6754
    return (nl, nl + [instance.primary_node])
6755

    
6756

    
6757
class LUInstanceMigrate(LogicalUnit):
6758
  """Migrate an instance.
6759

6760
  This is migration without shutting down, compared to the failover,
6761
  which is done with shutdown.
6762

6763
  """
6764
  HPATH = "instance-migrate"
6765
  HTYPE = constants.HTYPE_INSTANCE
6766
  REQ_BGL = False
6767

    
6768
  def ExpandNames(self):
6769
    self._ExpandAndLockInstance()
6770

    
6771
    if self.op.target_node is not None:
6772
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6773

    
6774
    self.needed_locks[locking.LEVEL_NODE] = []
6775
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6776

    
6777
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6778
                                       cleanup=self.op.cleanup,
6779
                                       failover=False,
6780
                                       fallback=self.op.allow_failover)
6781
    self.tasklets = [self._migrater]
6782

    
6783
  def DeclareLocks(self, level):
6784
    if level == locking.LEVEL_NODE:
6785
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6786
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6787
        if self.op.target_node is None:
6788
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6789
        else:
6790
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6791
                                                   self.op.target_node]
6792
        del self.recalculate_locks[locking.LEVEL_NODE]
6793
      else:
6794
        self._LockInstancesNodes()
6795

    
6796
  def BuildHooksEnv(self):
6797
    """Build hooks env.
6798

6799
    This runs on master, primary and secondary nodes of the instance.
6800

6801
    """
6802
    instance = self._migrater.instance
6803
    source_node = instance.primary_node
6804
    target_node = self.op.target_node
6805
    env = _BuildInstanceHookEnvByObject(self, instance)
6806
    env.update({
6807
      "MIGRATE_LIVE": self._migrater.live,
6808
      "MIGRATE_CLEANUP": self.op.cleanup,
6809
      "OLD_PRIMARY": source_node,
6810
      "NEW_PRIMARY": target_node,
6811
      })
6812

    
6813
    if instance.disk_template in constants.DTS_INT_MIRROR:
6814
      env["OLD_SECONDARY"] = target_node
6815
      env["NEW_SECONDARY"] = source_node
6816
    else:
6817
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6818

    
6819
    return env
6820

    
6821
  def BuildHooksNodes(self):
6822
    """Build hooks nodes.
6823

6824
    """
6825
    instance = self._migrater.instance
6826
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6827
    return (nl, nl + [instance.primary_node])
6828

    
6829

    
6830
class LUInstanceMove(LogicalUnit):
6831
  """Move an instance by data-copying.
6832

6833
  """
6834
  HPATH = "instance-move"
6835
  HTYPE = constants.HTYPE_INSTANCE
6836
  REQ_BGL = False
6837

    
6838
  def ExpandNames(self):
6839
    self._ExpandAndLockInstance()
6840
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6841
    self.op.target_node = target_node
6842
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6843
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6844

    
6845
  def DeclareLocks(self, level):
6846
    if level == locking.LEVEL_NODE:
6847
      self._LockInstancesNodes(primary_only=True)
6848

    
6849
  def BuildHooksEnv(self):
6850
    """Build hooks env.
6851

6852
    This runs on master, primary and secondary nodes of the instance.
6853

6854
    """
6855
    env = {
6856
      "TARGET_NODE": self.op.target_node,
6857
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6858
      }
6859
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6860
    return env
6861

    
6862
  def BuildHooksNodes(self):
6863
    """Build hooks nodes.
6864

6865
    """
6866
    nl = [
6867
      self.cfg.GetMasterNode(),
6868
      self.instance.primary_node,
6869
      self.op.target_node,
6870
      ]
6871
    return (nl, nl)
6872

    
6873
  def CheckPrereq(self):
6874
    """Check prerequisites.
6875

6876
    This checks that the instance is in the cluster.
6877

6878
    """
6879
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6880
    assert self.instance is not None, \
6881
      "Cannot retrieve locked instance %s" % self.op.instance_name
6882

    
6883
    node = self.cfg.GetNodeInfo(self.op.target_node)
6884
    assert node is not None, \
6885
      "Cannot retrieve locked node %s" % self.op.target_node
6886

    
6887
    self.target_node = target_node = node.name
6888

    
6889
    if target_node == instance.primary_node:
6890
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6891
                                 (instance.name, target_node),
6892
                                 errors.ECODE_STATE)
6893

    
6894
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6895

    
6896
    for idx, dsk in enumerate(instance.disks):
6897
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6898
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6899
                                   " cannot copy" % idx, errors.ECODE_STATE)
6900

    
6901
    _CheckNodeOnline(self, target_node)
6902
    _CheckNodeNotDrained(self, target_node)
6903
    _CheckNodeVmCapable(self, target_node)
6904

    
6905
    if instance.admin_up:
6906
      # check memory requirements on the secondary node
6907
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6908
                           instance.name, bep[constants.BE_MEMORY],
6909
                           instance.hypervisor)
6910
    else:
6911
      self.LogInfo("Not checking memory on the secondary node as"
6912
                   " instance will not be started")
6913

    
6914
    # check bridge existance
6915
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6916

    
6917
  def Exec(self, feedback_fn):
6918
    """Move an instance.
6919

6920
    The move is done by shutting it down on its present node, copying
6921
    the data over (slow) and starting it on the new node.
6922

6923
    """
6924
    instance = self.instance
6925

    
6926
    source_node = instance.primary_node
6927
    target_node = self.target_node
6928

    
6929
    self.LogInfo("Shutting down instance %s on source node %s",
6930
                 instance.name, source_node)
6931

    
6932
    result = self.rpc.call_instance_shutdown(source_node, instance,
6933
                                             self.op.shutdown_timeout)
6934
    msg = result.fail_msg
6935
    if msg:
6936
      if self.op.ignore_consistency:
6937
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6938
                             " Proceeding anyway. Please make sure node"
6939
                             " %s is down. Error details: %s",
6940
                             instance.name, source_node, source_node, msg)
6941
      else:
6942
        raise errors.OpExecError("Could not shutdown instance %s on"
6943
                                 " node %s: %s" %
6944
                                 (instance.name, source_node, msg))
6945

    
6946
    # create the target disks
6947
    try:
6948
      _CreateDisks(self, instance, target_node=target_node)
6949
    except errors.OpExecError:
6950
      self.LogWarning("Device creation failed, reverting...")
6951
      try:
6952
        _RemoveDisks(self, instance, target_node=target_node)
6953
      finally:
6954
        self.cfg.ReleaseDRBDMinors(instance.name)
6955
        raise
6956

    
6957
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6958

    
6959
    errs = []
6960
    # activate, get path, copy the data over
6961
    for idx, disk in enumerate(instance.disks):
6962
      self.LogInfo("Copying data for disk %d", idx)
6963
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6964
                                               instance.name, True, idx)
6965
      if result.fail_msg:
6966
        self.LogWarning("Can't assemble newly created disk %d: %s",
6967
                        idx, result.fail_msg)
6968
        errs.append(result.fail_msg)
6969
        break
6970
      dev_path = result.payload
6971
      result = self.rpc.call_blockdev_export(source_node, disk,
6972
                                             target_node, dev_path,
6973
                                             cluster_name)
6974
      if result.fail_msg:
6975
        self.LogWarning("Can't copy data over for disk %d: %s",
6976
                        idx, result.fail_msg)
6977
        errs.append(result.fail_msg)
6978
        break
6979

    
6980
    if errs:
6981
      self.LogWarning("Some disks failed to copy, aborting")
6982
      try:
6983
        _RemoveDisks(self, instance, target_node=target_node)
6984
      finally:
6985
        self.cfg.ReleaseDRBDMinors(instance.name)
6986
        raise errors.OpExecError("Errors during disk copy: %s" %
6987
                                 (",".join(errs),))
6988

    
6989
    instance.primary_node = target_node
6990
    self.cfg.Update(instance, feedback_fn)
6991

    
6992
    self.LogInfo("Removing the disks on the original node")
6993
    _RemoveDisks(self, instance, target_node=source_node)
6994

    
6995
    # Only start the instance if it's marked as up
6996
    if instance.admin_up:
6997
      self.LogInfo("Starting instance %s on node %s",
6998
                   instance.name, target_node)
6999

    
7000
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7001
                                           ignore_secondaries=True)
7002
      if not disks_ok:
7003
        _ShutdownInstanceDisks(self, instance)
7004
        raise errors.OpExecError("Can't activate the instance's disks")
7005

    
7006
      result = self.rpc.call_instance_start(target_node, instance,
7007
                                            None, None, False)
7008
      msg = result.fail_msg
7009
      if msg:
7010
        _ShutdownInstanceDisks(self, instance)
7011
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7012
                                 (instance.name, target_node, msg))
7013

    
7014

    
7015
class LUNodeMigrate(LogicalUnit):
7016
  """Migrate all instances from a node.
7017

7018
  """
7019
  HPATH = "node-migrate"
7020
  HTYPE = constants.HTYPE_NODE
7021
  REQ_BGL = False
7022

    
7023
  def CheckArguments(self):
7024
    pass
7025

    
7026
  def ExpandNames(self):
7027
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7028

    
7029
    self.share_locks = _ShareAll()
7030
    self.needed_locks = {
7031
      locking.LEVEL_NODE: [self.op.node_name],
7032
      }
7033

    
7034
  def BuildHooksEnv(self):
7035
    """Build hooks env.
7036

7037
    This runs on the master, the primary and all the secondaries.
7038

7039
    """
7040
    return {
7041
      "NODE_NAME": self.op.node_name,
7042
      }
7043

    
7044
  def BuildHooksNodes(self):
7045
    """Build hooks nodes.
7046

7047
    """
7048
    nl = [self.cfg.GetMasterNode()]
7049
    return (nl, nl)
7050

    
7051
  def CheckPrereq(self):
7052
    pass
7053

    
7054
  def Exec(self, feedback_fn):
7055
    # Prepare jobs for migration instances
7056
    jobs = [
7057
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7058
                                 mode=self.op.mode,
7059
                                 live=self.op.live,
7060
                                 iallocator=self.op.iallocator,
7061
                                 target_node=self.op.target_node)]
7062
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7063
      ]
7064

    
7065
    # TODO: Run iallocator in this opcode and pass correct placement options to
7066
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7067
    # running the iallocator and the actual migration, a good consistency model
7068
    # will have to be found.
7069

    
7070
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7071
            frozenset([self.op.node_name]))
7072

    
7073
    return ResultWithJobs(jobs)
7074

    
7075

    
7076
class TLMigrateInstance(Tasklet):
7077
  """Tasklet class for instance migration.
7078

7079
  @type live: boolean
7080
  @ivar live: whether the migration will be done live or non-live;
7081
      this variable is initalized only after CheckPrereq has run
7082
  @type cleanup: boolean
7083
  @ivar cleanup: Wheater we cleanup from a failed migration
7084
  @type iallocator: string
7085
  @ivar iallocator: The iallocator used to determine target_node
7086
  @type target_node: string
7087
  @ivar target_node: If given, the target_node to reallocate the instance to
7088
  @type failover: boolean
7089
  @ivar failover: Whether operation results in failover or migration
7090
  @type fallback: boolean
7091
  @ivar fallback: Whether fallback to failover is allowed if migration not
7092
                  possible
7093
  @type ignore_consistency: boolean
7094
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7095
                            and target node
7096
  @type shutdown_timeout: int
7097
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7098

7099
  """
7100
  def __init__(self, lu, instance_name, cleanup=False,
7101
               failover=False, fallback=False,
7102
               ignore_consistency=False,
7103
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7104
    """Initializes this class.
7105

7106
    """
7107
    Tasklet.__init__(self, lu)
7108

    
7109
    # Parameters
7110
    self.instance_name = instance_name
7111
    self.cleanup = cleanup
7112
    self.live = False # will be overridden later
7113
    self.failover = failover
7114
    self.fallback = fallback
7115
    self.ignore_consistency = ignore_consistency
7116
    self.shutdown_timeout = shutdown_timeout
7117

    
7118
  def CheckPrereq(self):
7119
    """Check prerequisites.
7120

7121
    This checks that the instance is in the cluster.
7122

7123
    """
7124
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7125
    instance = self.cfg.GetInstanceInfo(instance_name)
7126
    assert instance is not None
7127
    self.instance = instance
7128

    
7129
    if (not self.cleanup and not instance.admin_up and not self.failover and
7130
        self.fallback):
7131
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7132
                      " to failover")
7133
      self.failover = True
7134

    
7135
    if instance.disk_template not in constants.DTS_MIRRORED:
7136
      if self.failover:
7137
        text = "failovers"
7138
      else:
7139
        text = "migrations"
7140
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7141
                                 " %s" % (instance.disk_template, text),
7142
                                 errors.ECODE_STATE)
7143

    
7144
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7145
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7146

    
7147
      if self.lu.op.iallocator:
7148
        self._RunAllocator()
7149
      else:
7150
        # We set set self.target_node as it is required by
7151
        # BuildHooksEnv
7152
        self.target_node = self.lu.op.target_node
7153

    
7154
      # self.target_node is already populated, either directly or by the
7155
      # iallocator run
7156
      target_node = self.target_node
7157
      if self.target_node == instance.primary_node:
7158
        raise errors.OpPrereqError("Cannot migrate instance %s"
7159
                                   " to its primary (%s)" %
7160
                                   (instance.name, instance.primary_node))
7161

    
7162
      if len(self.lu.tasklets) == 1:
7163
        # It is safe to release locks only when we're the only tasklet
7164
        # in the LU
7165
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7166
                      keep=[instance.primary_node, self.target_node])
7167

    
7168
    else:
7169
      secondary_nodes = instance.secondary_nodes
7170
      if not secondary_nodes:
7171
        raise errors.ConfigurationError("No secondary node but using"
7172
                                        " %s disk template" %
7173
                                        instance.disk_template)
7174
      target_node = secondary_nodes[0]
7175
      if self.lu.op.iallocator or (self.lu.op.target_node and
7176
                                   self.lu.op.target_node != target_node):
7177
        if self.failover:
7178
          text = "failed over"
7179
        else:
7180
          text = "migrated"
7181
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7182
                                   " be %s to arbitrary nodes"
7183
                                   " (neither an iallocator nor a target"
7184
                                   " node can be passed)" %
7185
                                   (instance.disk_template, text),
7186
                                   errors.ECODE_INVAL)
7187

    
7188
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7189

    
7190
    # check memory requirements on the secondary node
7191
    if not self.cleanup and (not self.failover or instance.admin_up):
7192
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7193
                           instance.name, i_be[constants.BE_MEMORY],
7194
                           instance.hypervisor)
7195
    else:
7196
      self.lu.LogInfo("Not checking memory on the secondary node as"
7197
                      " instance will not be started")
7198

    
7199
    # check bridge existance
7200
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7201

    
7202
    if not self.cleanup:
7203
      _CheckNodeNotDrained(self.lu, target_node)
7204
      if not self.failover:
7205
        result = self.rpc.call_instance_migratable(instance.primary_node,
7206
                                                   instance)
7207
        if result.fail_msg and self.fallback:
7208
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7209
                          " failover")
7210
          self.failover = True
7211
        else:
7212
          result.Raise("Can't migrate, please use failover",
7213
                       prereq=True, ecode=errors.ECODE_STATE)
7214

    
7215
    assert not (self.failover and self.cleanup)
7216

    
7217
    if not self.failover:
7218
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7219
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7220
                                   " parameters are accepted",
7221
                                   errors.ECODE_INVAL)
7222
      if self.lu.op.live is not None:
7223
        if self.lu.op.live:
7224
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7225
        else:
7226
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7227
        # reset the 'live' parameter to None so that repeated
7228
        # invocations of CheckPrereq do not raise an exception
7229
        self.lu.op.live = None
7230
      elif self.lu.op.mode is None:
7231
        # read the default value from the hypervisor
7232
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7233
                                                skip_globals=False)
7234
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7235

    
7236
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7237
    else:
7238
      # Failover is never live
7239
      self.live = False
7240

    
7241
  def _RunAllocator(self):
7242
    """Run the allocator based on input opcode.
7243

7244
    """
7245
    ial = IAllocator(self.cfg, self.rpc,
7246
                     mode=constants.IALLOCATOR_MODE_RELOC,
7247
                     name=self.instance_name,
7248
                     # TODO See why hail breaks with a single node below
7249
                     relocate_from=[self.instance.primary_node,
7250
                                    self.instance.primary_node],
7251
                     )
7252

    
7253
    ial.Run(self.lu.op.iallocator)
7254

    
7255
    if not ial.success:
7256
      raise errors.OpPrereqError("Can't compute nodes using"
7257
                                 " iallocator '%s': %s" %
7258
                                 (self.lu.op.iallocator, ial.info),
7259
                                 errors.ECODE_NORES)
7260
    if len(ial.result) != ial.required_nodes:
7261
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7262
                                 " of nodes (%s), required %s" %
7263
                                 (self.lu.op.iallocator, len(ial.result),
7264
                                  ial.required_nodes), errors.ECODE_FAULT)
7265
    self.target_node = ial.result[0]
7266
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7267
                 self.instance_name, self.lu.op.iallocator,
7268
                 utils.CommaJoin(ial.result))
7269

    
7270
  def _WaitUntilSync(self):
7271
    """Poll with custom rpc for disk sync.
7272

7273
    This uses our own step-based rpc call.
7274

7275
    """
7276
    self.feedback_fn("* wait until resync is done")
7277
    all_done = False
7278
    while not all_done:
7279
      all_done = True
7280
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7281
                                            self.nodes_ip,
7282
                                            self.instance.disks)
7283
      min_percent = 100
7284
      for node, nres in result.items():
7285
        nres.Raise("Cannot resync disks on node %s" % node)
7286
        node_done, node_percent = nres.payload
7287
        all_done = all_done and node_done
7288
        if node_percent is not None:
7289
          min_percent = min(min_percent, node_percent)
7290
      if not all_done:
7291
        if min_percent < 100:
7292
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7293
        time.sleep(2)
7294

    
7295
  def _EnsureSecondary(self, node):
7296
    """Demote a node to secondary.
7297

7298
    """
7299
    self.feedback_fn("* switching node %s to secondary mode" % node)
7300

    
7301
    for dev in self.instance.disks:
7302
      self.cfg.SetDiskID(dev, node)
7303

    
7304
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7305
                                          self.instance.disks)
7306
    result.Raise("Cannot change disk to secondary on node %s" % node)
7307

    
7308
  def _GoStandalone(self):
7309
    """Disconnect from the network.
7310

7311
    """
7312
    self.feedback_fn("* changing into standalone mode")
7313
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7314
                                               self.instance.disks)
7315
    for node, nres in result.items():
7316
      nres.Raise("Cannot disconnect disks node %s" % node)
7317

    
7318
  def _GoReconnect(self, multimaster):
7319
    """Reconnect to the network.
7320

7321
    """
7322
    if multimaster:
7323
      msg = "dual-master"
7324
    else:
7325
      msg = "single-master"
7326
    self.feedback_fn("* changing disks into %s mode" % msg)
7327
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7328
                                           self.instance.disks,
7329
                                           self.instance.name, multimaster)
7330
    for node, nres in result.items():
7331
      nres.Raise("Cannot change disks config on node %s" % node)
7332

    
7333
  def _ExecCleanup(self):
7334
    """Try to cleanup after a failed migration.
7335

7336
    The cleanup is done by:
7337
      - check that the instance is running only on one node
7338
        (and update the config if needed)
7339
      - change disks on its secondary node to secondary
7340
      - wait until disks are fully synchronized
7341
      - disconnect from the network
7342
      - change disks into single-master mode
7343
      - wait again until disks are fully synchronized
7344

7345
    """
7346
    instance = self.instance
7347
    target_node = self.target_node
7348
    source_node = self.source_node
7349

    
7350
    # check running on only one node
7351
    self.feedback_fn("* checking where the instance actually runs"
7352
                     " (if this hangs, the hypervisor might be in"
7353
                     " a bad state)")
7354
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7355
    for node, result in ins_l.items():
7356
      result.Raise("Can't contact node %s" % node)
7357

    
7358
    runningon_source = instance.name in ins_l[source_node].payload
7359
    runningon_target = instance.name in ins_l[target_node].payload
7360

    
7361
    if runningon_source and runningon_target:
7362
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7363
                               " or the hypervisor is confused; you will have"
7364
                               " to ensure manually that it runs only on one"
7365
                               " and restart this operation")
7366

    
7367
    if not (runningon_source or runningon_target):
7368
      raise errors.OpExecError("Instance does not seem to be running at all;"
7369
                               " in this case it's safer to repair by"
7370
                               " running 'gnt-instance stop' to ensure disk"
7371
                               " shutdown, and then restarting it")
7372

    
7373
    if runningon_target:
7374
      # the migration has actually succeeded, we need to update the config
7375
      self.feedback_fn("* instance running on secondary node (%s),"
7376
                       " updating config" % target_node)
7377
      instance.primary_node = target_node
7378
      self.cfg.Update(instance, self.feedback_fn)
7379
      demoted_node = source_node
7380
    else:
7381
      self.feedback_fn("* instance confirmed to be running on its"
7382
                       " primary node (%s)" % source_node)
7383
      demoted_node = target_node
7384

    
7385
    if instance.disk_template in constants.DTS_INT_MIRROR:
7386
      self._EnsureSecondary(demoted_node)
7387
      try:
7388
        self._WaitUntilSync()
7389
      except errors.OpExecError:
7390
        # we ignore here errors, since if the device is standalone, it
7391
        # won't be able to sync
7392
        pass
7393
      self._GoStandalone()
7394
      self._GoReconnect(False)
7395
      self._WaitUntilSync()
7396

    
7397
    self.feedback_fn("* done")
7398

    
7399
  def _RevertDiskStatus(self):
7400
    """Try to revert the disk status after a failed migration.
7401

7402
    """
7403
    target_node = self.target_node
7404
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7405
      return
7406

    
7407
    try:
7408
      self._EnsureSecondary(target_node)
7409
      self._GoStandalone()
7410
      self._GoReconnect(False)
7411
      self._WaitUntilSync()
7412
    except errors.OpExecError, err:
7413
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7414
                         " please try to recover the instance manually;"
7415
                         " error '%s'" % str(err))
7416

    
7417
  def _AbortMigration(self):
7418
    """Call the hypervisor code to abort a started migration.
7419

7420
    """
7421
    instance = self.instance
7422
    target_node = self.target_node
7423
    migration_info = self.migration_info
7424

    
7425
    abort_result = self.rpc.call_finalize_migration(target_node,
7426
                                                    instance,
7427
                                                    migration_info,
7428
                                                    False)
7429
    abort_msg = abort_result.fail_msg
7430
    if abort_msg:
7431
      logging.error("Aborting migration failed on target node %s: %s",
7432
                    target_node, abort_msg)
7433
      # Don't raise an exception here, as we stil have to try to revert the
7434
      # disk status, even if this step failed.
7435

    
7436
  def _ExecMigration(self):
7437
    """Migrate an instance.
7438

7439
    The migrate is done by:
7440
      - change the disks into dual-master mode
7441
      - wait until disks are fully synchronized again
7442
      - migrate the instance
7443
      - change disks on the new secondary node (the old primary) to secondary
7444
      - wait until disks are fully synchronized
7445
      - change disks into single-master mode
7446

7447
    """
7448
    instance = self.instance
7449
    target_node = self.target_node
7450
    source_node = self.source_node
7451

    
7452
    # Check for hypervisor version mismatch and warn the user.
7453
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7454
                                       None, self.instance.hypervisor)
7455
    src_info = nodeinfo[source_node]
7456
    dst_info = nodeinfo[target_node]
7457

    
7458
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7459
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7460
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7461
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7462
      if src_version != dst_version:
7463
        self.feedback_fn("* warning: hypervisor version mismatch between"
7464
                         " source (%s) and target (%s) node" %
7465
                         (src_version, dst_version))
7466

    
7467
    self.feedback_fn("* checking disk consistency between source and target")
7468
    for dev in instance.disks:
7469
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7470
        raise errors.OpExecError("Disk %s is degraded or not fully"
7471
                                 " synchronized on target node,"
7472
                                 " aborting migration" % dev.iv_name)
7473

    
7474
    # First get the migration information from the remote node
7475
    result = self.rpc.call_migration_info(source_node, instance)
7476
    msg = result.fail_msg
7477
    if msg:
7478
      log_err = ("Failed fetching source migration information from %s: %s" %
7479
                 (source_node, msg))
7480
      logging.error(log_err)
7481
      raise errors.OpExecError(log_err)
7482

    
7483
    self.migration_info = migration_info = result.payload
7484

    
7485
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7486
      # Then switch the disks to master/master mode
7487
      self._EnsureSecondary(target_node)
7488
      self._GoStandalone()
7489
      self._GoReconnect(True)
7490
      self._WaitUntilSync()
7491

    
7492
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7493
    result = self.rpc.call_accept_instance(target_node,
7494
                                           instance,
7495
                                           migration_info,
7496
                                           self.nodes_ip[target_node])
7497

    
7498
    msg = result.fail_msg
7499
    if msg:
7500
      logging.error("Instance pre-migration failed, trying to revert"
7501
                    " disk status: %s", msg)
7502
      self.feedback_fn("Pre-migration failed, aborting")
7503
      self._AbortMigration()
7504
      self._RevertDiskStatus()
7505
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7506
                               (instance.name, msg))
7507

    
7508
    self.feedback_fn("* migrating instance to %s" % target_node)
7509
    result = self.rpc.call_instance_migrate(source_node, instance,
7510
                                            self.nodes_ip[target_node],
7511
                                            self.live)
7512
    msg = result.fail_msg
7513
    if msg:
7514
      logging.error("Instance migration failed, trying to revert"
7515
                    " disk status: %s", msg)
7516
      self.feedback_fn("Migration failed, aborting")
7517
      self._AbortMigration()
7518
      self._RevertDiskStatus()
7519
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7520
                               (instance.name, msg))
7521

    
7522
    instance.primary_node = target_node
7523
    # distribute new instance config to the other nodes
7524
    self.cfg.Update(instance, self.feedback_fn)
7525

    
7526
    result = self.rpc.call_finalize_migration(target_node,
7527
                                              instance,
7528
                                              migration_info,
7529
                                              True)
7530
    msg = result.fail_msg
7531
    if msg:
7532
      logging.error("Instance migration succeeded, but finalization failed:"
7533
                    " %s", msg)
7534
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7535
                               msg)
7536

    
7537
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7538
      self._EnsureSecondary(source_node)
7539
      self._WaitUntilSync()
7540
      self._GoStandalone()
7541
      self._GoReconnect(False)
7542
      self._WaitUntilSync()
7543

    
7544
    self.feedback_fn("* done")
7545

    
7546
  def _ExecFailover(self):
7547
    """Failover an instance.
7548

7549
    The failover is done by shutting it down on its present node and
7550
    starting it on the secondary.
7551

7552
    """
7553
    instance = self.instance
7554
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7555

    
7556
    source_node = instance.primary_node
7557
    target_node = self.target_node
7558

    
7559
    if instance.admin_up:
7560
      self.feedback_fn("* checking disk consistency between source and target")
7561
      for dev in instance.disks:
7562
        # for drbd, these are drbd over lvm
7563
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7564
          if primary_node.offline:
7565
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7566
                             " target node %s" %
7567
                             (primary_node.name, dev.iv_name, target_node))
7568
          elif not self.ignore_consistency:
7569
            raise errors.OpExecError("Disk %s is degraded on target node,"
7570
                                     " aborting failover" % dev.iv_name)
7571
    else:
7572
      self.feedback_fn("* not checking disk consistency as instance is not"
7573
                       " running")
7574

    
7575
    self.feedback_fn("* shutting down instance on source node")
7576
    logging.info("Shutting down instance %s on node %s",
7577
                 instance.name, source_node)
7578

    
7579
    result = self.rpc.call_instance_shutdown(source_node, instance,
7580
                                             self.shutdown_timeout)
7581
    msg = result.fail_msg
7582
    if msg:
7583
      if self.ignore_consistency or primary_node.offline:
7584
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7585
                           " proceeding anyway; please make sure node"
7586
                           " %s is down; error details: %s",
7587
                           instance.name, source_node, source_node, msg)
7588
      else:
7589
        raise errors.OpExecError("Could not shutdown instance %s on"
7590
                                 " node %s: %s" %
7591
                                 (instance.name, source_node, msg))
7592

    
7593
    self.feedback_fn("* deactivating the instance's disks on source node")
7594
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7595
      raise errors.OpExecError("Can't shut down the instance's disks")
7596

    
7597
    instance.primary_node = target_node
7598
    # distribute new instance config to the other nodes
7599
    self.cfg.Update(instance, self.feedback_fn)
7600

    
7601
    # Only start the instance if it's marked as up
7602
    if instance.admin_up:
7603
      self.feedback_fn("* activating the instance's disks on target node %s" %
7604
                       target_node)
7605
      logging.info("Starting instance %s on node %s",
7606
                   instance.name, target_node)
7607

    
7608
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7609
                                           ignore_secondaries=True)
7610
      if not disks_ok:
7611
        _ShutdownInstanceDisks(self.lu, instance)
7612
        raise errors.OpExecError("Can't activate the instance's disks")
7613

    
7614
      self.feedback_fn("* starting the instance on the target node %s" %
7615
                       target_node)
7616
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7617
                                            False)
7618
      msg = result.fail_msg
7619
      if msg:
7620
        _ShutdownInstanceDisks(self.lu, instance)
7621
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7622
                                 (instance.name, target_node, msg))
7623

    
7624
  def Exec(self, feedback_fn):
7625
    """Perform the migration.
7626

7627
    """
7628
    self.feedback_fn = feedback_fn
7629
    self.source_node = self.instance.primary_node
7630

    
7631
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7632
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7633
      self.target_node = self.instance.secondary_nodes[0]
7634
      # Otherwise self.target_node has been populated either
7635
      # directly, or through an iallocator.
7636

    
7637
    self.all_nodes = [self.source_node, self.target_node]
7638
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7639
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7640

    
7641
    if self.failover:
7642
      feedback_fn("Failover instance %s" % self.instance.name)
7643
      self._ExecFailover()
7644
    else:
7645
      feedback_fn("Migrating instance %s" % self.instance.name)
7646

    
7647
      if self.cleanup:
7648
        return self._ExecCleanup()
7649
      else:
7650
        return self._ExecMigration()
7651

    
7652

    
7653
def _CreateBlockDev(lu, node, instance, device, force_create,
7654
                    info, force_open):
7655
  """Create a tree of block devices on a given node.
7656

7657
  If this device type has to be created on secondaries, create it and
7658
  all its children.
7659

7660
  If not, just recurse to children keeping the same 'force' value.
7661

7662
  @param lu: the lu on whose behalf we execute
7663
  @param node: the node on which to create the device
7664
  @type instance: L{objects.Instance}
7665
  @param instance: the instance which owns the device
7666
  @type device: L{objects.Disk}
7667
  @param device: the device to create
7668
  @type force_create: boolean
7669
  @param force_create: whether to force creation of this device; this
7670
      will be change to True whenever we find a device which has
7671
      CreateOnSecondary() attribute
7672
  @param info: the extra 'metadata' we should attach to the device
7673
      (this will be represented as a LVM tag)
7674
  @type force_open: boolean
7675
  @param force_open: this parameter will be passes to the
7676
      L{backend.BlockdevCreate} function where it specifies
7677
      whether we run on primary or not, and it affects both
7678
      the child assembly and the device own Open() execution
7679

7680
  """
7681
  if device.CreateOnSecondary():
7682
    force_create = True
7683

    
7684
  if device.children:
7685
    for child in device.children:
7686
      _CreateBlockDev(lu, node, instance, child, force_create,
7687
                      info, force_open)
7688

    
7689
  if not force_create:
7690
    return
7691

    
7692
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7693

    
7694

    
7695
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7696
  """Create a single block device on a given node.
7697

7698
  This will not recurse over children of the device, so they must be
7699
  created in advance.
7700

7701
  @param lu: the lu on whose behalf we execute
7702
  @param node: the node on which to create the device
7703
  @type instance: L{objects.Instance}
7704
  @param instance: the instance which owns the device
7705
  @type device: L{objects.Disk}
7706
  @param device: the device to create
7707
  @param info: the extra 'metadata' we should attach to the device
7708
      (this will be represented as a LVM tag)
7709
  @type force_open: boolean
7710
  @param force_open: this parameter will be passes to the
7711
      L{backend.BlockdevCreate} function where it specifies
7712
      whether we run on primary or not, and it affects both
7713
      the child assembly and the device own Open() execution
7714

7715
  """
7716
  lu.cfg.SetDiskID(device, node)
7717
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7718
                                       instance.name, force_open, info)
7719
  result.Raise("Can't create block device %s on"
7720
               " node %s for instance %s" % (device, node, instance.name))
7721
  if device.physical_id is None:
7722
    device.physical_id = result.payload
7723

    
7724

    
7725
def _GenerateUniqueNames(lu, exts):
7726
  """Generate a suitable LV name.
7727

7728
  This will generate a logical volume name for the given instance.
7729

7730
  """
7731
  results = []
7732
  for val in exts:
7733
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7734
    results.append("%s%s" % (new_id, val))
7735
  return results
7736

    
7737

    
7738
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7739
                         iv_name, p_minor, s_minor):
7740
  """Generate a drbd8 device complete with its children.
7741

7742
  """
7743
  assert len(vgnames) == len(names) == 2
7744
  port = lu.cfg.AllocatePort()
7745
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7746
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7747
                          logical_id=(vgnames[0], names[0]))
7748
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7749
                          logical_id=(vgnames[1], names[1]))
7750
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7751
                          logical_id=(primary, secondary, port,
7752
                                      p_minor, s_minor,
7753
                                      shared_secret),
7754
                          children=[dev_data, dev_meta],
7755
                          iv_name=iv_name)
7756
  return drbd_dev
7757

    
7758

    
7759
def _GenerateDiskTemplate(lu, template_name,
7760
                          instance_name, primary_node,
7761
                          secondary_nodes, disk_info,
7762
                          file_storage_dir, file_driver,
7763
                          base_index, feedback_fn):
7764
  """Generate the entire disk layout for a given template type.
7765

7766
  """
7767
  #TODO: compute space requirements
7768

    
7769
  vgname = lu.cfg.GetVGName()
7770
  disk_count = len(disk_info)
7771
  disks = []
7772
  if template_name == constants.DT_DISKLESS:
7773
    pass
7774
  elif template_name == constants.DT_PLAIN:
7775
    if len(secondary_nodes) != 0:
7776
      raise errors.ProgrammerError("Wrong template configuration")
7777

    
7778
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7779
                                      for i in range(disk_count)])
7780
    for idx, disk in enumerate(disk_info):
7781
      disk_index = idx + base_index
7782
      vg = disk.get(constants.IDISK_VG, vgname)
7783
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7784
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7785
                              size=disk[constants.IDISK_SIZE],
7786
                              logical_id=(vg, names[idx]),
7787
                              iv_name="disk/%d" % disk_index,
7788
                              mode=disk[constants.IDISK_MODE])
7789
      disks.append(disk_dev)
7790
  elif template_name == constants.DT_DRBD8:
7791
    if len(secondary_nodes) != 1:
7792
      raise errors.ProgrammerError("Wrong template configuration")
7793
    remote_node = secondary_nodes[0]
7794
    minors = lu.cfg.AllocateDRBDMinor(
7795
      [primary_node, remote_node] * len(disk_info), instance_name)
7796

    
7797
    names = []
7798
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7799
                                               for i in range(disk_count)]):
7800
      names.append(lv_prefix + "_data")
7801
      names.append(lv_prefix + "_meta")
7802
    for idx, disk in enumerate(disk_info):
7803
      disk_index = idx + base_index
7804
      data_vg = disk.get(constants.IDISK_VG, vgname)
7805
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7806
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7807
                                      disk[constants.IDISK_SIZE],
7808
                                      [data_vg, meta_vg],
7809
                                      names[idx * 2:idx * 2 + 2],
7810
                                      "disk/%d" % disk_index,
7811
                                      minors[idx * 2], minors[idx * 2 + 1])
7812
      disk_dev.mode = disk[constants.IDISK_MODE]
7813
      disks.append(disk_dev)
7814
  elif template_name == constants.DT_FILE:
7815
    if len(secondary_nodes) != 0:
7816
      raise errors.ProgrammerError("Wrong template configuration")
7817

    
7818
    opcodes.RequireFileStorage()
7819

    
7820
    for idx, disk in enumerate(disk_info):
7821
      disk_index = idx + base_index
7822
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7823
                              size=disk[constants.IDISK_SIZE],
7824
                              iv_name="disk/%d" % disk_index,
7825
                              logical_id=(file_driver,
7826
                                          "%s/disk%d" % (file_storage_dir,
7827
                                                         disk_index)),
7828
                              mode=disk[constants.IDISK_MODE])
7829
      disks.append(disk_dev)
7830
  elif template_name == constants.DT_SHARED_FILE:
7831
    if len(secondary_nodes) != 0:
7832
      raise errors.ProgrammerError("Wrong template configuration")
7833

    
7834
    opcodes.RequireSharedFileStorage()
7835

    
7836
    for idx, disk in enumerate(disk_info):
7837
      disk_index = idx + base_index
7838
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7839
                              size=disk[constants.IDISK_SIZE],
7840
                              iv_name="disk/%d" % disk_index,
7841
                              logical_id=(file_driver,
7842
                                          "%s/disk%d" % (file_storage_dir,
7843
                                                         disk_index)),
7844
                              mode=disk[constants.IDISK_MODE])
7845
      disks.append(disk_dev)
7846
  elif template_name == constants.DT_BLOCK:
7847
    if len(secondary_nodes) != 0:
7848
      raise errors.ProgrammerError("Wrong template configuration")
7849

    
7850
    for idx, disk in enumerate(disk_info):
7851
      disk_index = idx + base_index
7852
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7853
                              size=disk[constants.IDISK_SIZE],
7854
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7855
                                          disk[constants.IDISK_ADOPT]),
7856
                              iv_name="disk/%d" % disk_index,
7857
                              mode=disk[constants.IDISK_MODE])
7858
      disks.append(disk_dev)
7859

    
7860
  else:
7861
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7862
  return disks
7863

    
7864

    
7865
def _GetInstanceInfoText(instance):
7866
  """Compute that text that should be added to the disk's metadata.
7867

7868
  """
7869
  return "originstname+%s" % instance.name
7870

    
7871

    
7872
def _CalcEta(time_taken, written, total_size):
7873
  """Calculates the ETA based on size written and total size.
7874

7875
  @param time_taken: The time taken so far
7876
  @param written: amount written so far
7877
  @param total_size: The total size of data to be written
7878
  @return: The remaining time in seconds
7879

7880
  """
7881
  avg_time = time_taken / float(written)
7882
  return (total_size - written) * avg_time
7883

    
7884

    
7885
def _WipeDisks(lu, instance):
7886
  """Wipes instance disks.
7887

7888
  @type lu: L{LogicalUnit}
7889
  @param lu: the logical unit on whose behalf we execute
7890
  @type instance: L{objects.Instance}
7891
  @param instance: the instance whose disks we should create
7892
  @return: the success of the wipe
7893

7894
  """
7895
  node = instance.primary_node
7896

    
7897
  for device in instance.disks:
7898
    lu.cfg.SetDiskID(device, node)
7899

    
7900
  logging.info("Pause sync of instance %s disks", instance.name)
7901
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7902

    
7903
  for idx, success in enumerate(result.payload):
7904
    if not success:
7905
      logging.warn("pause-sync of instance %s for disks %d failed",
7906
                   instance.name, idx)
7907

    
7908
  try:
7909
    for idx, device in enumerate(instance.disks):
7910
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7911
      # MAX_WIPE_CHUNK at max
7912
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7913
                            constants.MIN_WIPE_CHUNK_PERCENT)
7914
      # we _must_ make this an int, otherwise rounding errors will
7915
      # occur
7916
      wipe_chunk_size = int(wipe_chunk_size)
7917

    
7918
      lu.LogInfo("* Wiping disk %d", idx)
7919
      logging.info("Wiping disk %d for instance %s, node %s using"
7920
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7921

    
7922
      offset = 0
7923
      size = device.size
7924
      last_output = 0
7925
      start_time = time.time()
7926

    
7927
      while offset < size:
7928
        wipe_size = min(wipe_chunk_size, size - offset)
7929
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7930
                      idx, offset, wipe_size)
7931
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7932
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7933
                     (idx, offset, wipe_size))
7934
        now = time.time()
7935
        offset += wipe_size
7936
        if now - last_output >= 60:
7937
          eta = _CalcEta(now - start_time, offset, size)
7938
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7939
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7940
          last_output = now
7941
  finally:
7942
    logging.info("Resume sync of instance %s disks", instance.name)
7943

    
7944
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7945

    
7946
    for idx, success in enumerate(result.payload):
7947
      if not success:
7948
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7949
                      " look at the status and troubleshoot the issue", idx)
7950
        logging.warn("resume-sync of instance %s for disks %d failed",
7951
                     instance.name, idx)
7952

    
7953

    
7954
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7955
  """Create all disks for an instance.
7956

7957
  This abstracts away some work from AddInstance.
7958

7959
  @type lu: L{LogicalUnit}
7960
  @param lu: the logical unit on whose behalf we execute
7961
  @type instance: L{objects.Instance}
7962
  @param instance: the instance whose disks we should create
7963
  @type to_skip: list
7964
  @param to_skip: list of indices to skip
7965
  @type target_node: string
7966
  @param target_node: if passed, overrides the target node for creation
7967
  @rtype: boolean
7968
  @return: the success of the creation
7969

7970
  """
7971
  info = _GetInstanceInfoText(instance)
7972
  if target_node is None:
7973
    pnode = instance.primary_node
7974
    all_nodes = instance.all_nodes
7975
  else:
7976
    pnode = target_node
7977
    all_nodes = [pnode]
7978

    
7979
  if instance.disk_template in constants.DTS_FILEBASED:
7980
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7981
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7982

    
7983
    result.Raise("Failed to create directory '%s' on"
7984
                 " node %s" % (file_storage_dir, pnode))
7985

    
7986
  # Note: this needs to be kept in sync with adding of disks in
7987
  # LUInstanceSetParams
7988
  for idx, device in enumerate(instance.disks):
7989
    if to_skip and idx in to_skip:
7990
      continue
7991
    logging.info("Creating volume %s for instance %s",
7992
                 device.iv_name, instance.name)
7993
    #HARDCODE
7994
    for node in all_nodes:
7995
      f_create = node == pnode
7996
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7997

    
7998

    
7999
def _RemoveDisks(lu, instance, target_node=None):
8000
  """Remove all disks for an instance.
8001

8002
  This abstracts away some work from `AddInstance()` and
8003
  `RemoveInstance()`. Note that in case some of the devices couldn't
8004
  be removed, the removal will continue with the other ones (compare
8005
  with `_CreateDisks()`).
8006

8007
  @type lu: L{LogicalUnit}
8008
  @param lu: the logical unit on whose behalf we execute
8009
  @type instance: L{objects.Instance}
8010
  @param instance: the instance whose disks we should remove
8011
  @type target_node: string
8012
  @param target_node: used to override the node on which to remove the disks
8013
  @rtype: boolean
8014
  @return: the success of the removal
8015

8016
  """
8017
  logging.info("Removing block devices for instance %s", instance.name)
8018

    
8019
  all_result = True
8020
  for device in instance.disks:
8021
    if target_node:
8022
      edata = [(target_node, device)]
8023
    else:
8024
      edata = device.ComputeNodeTree(instance.primary_node)
8025
    for node, disk in edata:
8026
      lu.cfg.SetDiskID(disk, node)
8027
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8028
      if msg:
8029
        lu.LogWarning("Could not remove block device %s on node %s,"
8030
                      " continuing anyway: %s", device.iv_name, node, msg)
8031
        all_result = False
8032

    
8033
    # if this is a DRBD disk, return its port to the pool
8034
    if device.dev_type in constants.LDS_DRBD:
8035
      tcp_port = device.logical_id[2]
8036
      lu.cfg.AddTcpUdpPort(tcp_port)
8037

    
8038
  if instance.disk_template == constants.DT_FILE:
8039
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8040
    if target_node:
8041
      tgt = target_node
8042
    else:
8043
      tgt = instance.primary_node
8044
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8045
    if result.fail_msg:
8046
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8047
                    file_storage_dir, instance.primary_node, result.fail_msg)
8048
      all_result = False
8049

    
8050
  return all_result
8051

    
8052

    
8053
def _ComputeDiskSizePerVG(disk_template, disks):
8054
  """Compute disk size requirements in the volume group
8055

8056
  """
8057
  def _compute(disks, payload):
8058
    """Universal algorithm.
8059

8060
    """
8061
    vgs = {}
8062
    for disk in disks:
8063
      vgs[disk[constants.IDISK_VG]] = \
8064
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8065

    
8066
    return vgs
8067

    
8068
  # Required free disk space as a function of disk and swap space
8069
  req_size_dict = {
8070
    constants.DT_DISKLESS: {},
8071
    constants.DT_PLAIN: _compute(disks, 0),
8072
    # 128 MB are added for drbd metadata for each disk
8073
    constants.DT_DRBD8: _compute(disks, 128),
8074
    constants.DT_FILE: {},
8075
    constants.DT_SHARED_FILE: {},
8076
  }
8077

    
8078
  if disk_template not in req_size_dict:
8079
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8080
                                 " is unknown" % disk_template)
8081

    
8082
  return req_size_dict[disk_template]
8083

    
8084

    
8085
def _ComputeDiskSize(disk_template, disks):
8086
  """Compute disk size requirements in the volume group
8087

8088
  """
8089
  # Required free disk space as a function of disk and swap space
8090
  req_size_dict = {
8091
    constants.DT_DISKLESS: None,
8092
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8093
    # 128 MB are added for drbd metadata for each disk
8094
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8095
    constants.DT_FILE: None,
8096
    constants.DT_SHARED_FILE: 0,
8097
    constants.DT_BLOCK: 0,
8098
  }
8099

    
8100
  if disk_template not in req_size_dict:
8101
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8102
                                 " is unknown" % disk_template)
8103

    
8104
  return req_size_dict[disk_template]
8105

    
8106

    
8107
def _FilterVmNodes(lu, nodenames):
8108
  """Filters out non-vm_capable nodes from a list.
8109

8110
  @type lu: L{LogicalUnit}
8111
  @param lu: the logical unit for which we check
8112
  @type nodenames: list
8113
  @param nodenames: the list of nodes on which we should check
8114
  @rtype: list
8115
  @return: the list of vm-capable nodes
8116

8117
  """
8118
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8119
  return [name for name in nodenames if name not in vm_nodes]
8120

    
8121

    
8122
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8123
  """Hypervisor parameter validation.
8124

8125
  This function abstract the hypervisor parameter validation to be
8126
  used in both instance create and instance modify.
8127

8128
  @type lu: L{LogicalUnit}
8129
  @param lu: the logical unit for which we check
8130
  @type nodenames: list
8131
  @param nodenames: the list of nodes on which we should check
8132
  @type hvname: string
8133
  @param hvname: the name of the hypervisor we should use
8134
  @type hvparams: dict
8135
  @param hvparams: the parameters which we need to check
8136
  @raise errors.OpPrereqError: if the parameters are not valid
8137

8138
  """
8139
  nodenames = _FilterVmNodes(lu, nodenames)
8140
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8141
                                                  hvname,
8142
                                                  hvparams)
8143
  for node in nodenames:
8144
    info = hvinfo[node]
8145
    if info.offline:
8146
      continue
8147
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8148

    
8149

    
8150
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8151
  """OS parameters validation.
8152

8153
  @type lu: L{LogicalUnit}
8154
  @param lu: the logical unit for which we check
8155
  @type required: boolean
8156
  @param required: whether the validation should fail if the OS is not
8157
      found
8158
  @type nodenames: list
8159
  @param nodenames: the list of nodes on which we should check
8160
  @type osname: string
8161
  @param osname: the name of the hypervisor we should use
8162
  @type osparams: dict
8163
  @param osparams: the parameters which we need to check
8164
  @raise errors.OpPrereqError: if the parameters are not valid
8165

8166
  """
8167
  nodenames = _FilterVmNodes(lu, nodenames)
8168
  result = lu.rpc.call_os_validate(required, nodenames, osname,
8169
                                   [constants.OS_VALIDATE_PARAMETERS],
8170
                                   osparams)
8171
  for node, nres in result.items():
8172
    # we don't check for offline cases since this should be run only
8173
    # against the master node and/or an instance's nodes
8174
    nres.Raise("OS Parameters validation failed on node %s" % node)
8175
    if not nres.payload:
8176
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8177
                 osname, node)
8178

    
8179

    
8180
class LUInstanceCreate(LogicalUnit):
8181
  """Create an instance.
8182

8183
  """
8184
  HPATH = "instance-add"
8185
  HTYPE = constants.HTYPE_INSTANCE
8186
  REQ_BGL = False
8187

    
8188
  def CheckArguments(self):
8189
    """Check arguments.
8190

8191
    """
8192
    # do not require name_check to ease forward/backward compatibility
8193
    # for tools
8194
    if self.op.no_install and self.op.start:
8195
      self.LogInfo("No-installation mode selected, disabling startup")
8196
      self.op.start = False
8197
    # validate/normalize the instance name
8198
    self.op.instance_name = \
8199
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8200

    
8201
    if self.op.ip_check and not self.op.name_check:
8202
      # TODO: make the ip check more flexible and not depend on the name check
8203
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8204
                                 " check", errors.ECODE_INVAL)
8205

    
8206
    # check nics' parameter names
8207
    for nic in self.op.nics:
8208
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8209

    
8210
    # check disks. parameter names and consistent adopt/no-adopt strategy
8211
    has_adopt = has_no_adopt = False
8212
    for disk in self.op.disks:
8213
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8214
      if constants.IDISK_ADOPT in disk:
8215
        has_adopt = True
8216
      else:
8217
        has_no_adopt = True
8218
    if has_adopt and has_no_adopt:
8219
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8220
                                 errors.ECODE_INVAL)
8221
    if has_adopt:
8222
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8223
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8224
                                   " '%s' disk template" %
8225
                                   self.op.disk_template,
8226
                                   errors.ECODE_INVAL)
8227
      if self.op.iallocator is not None:
8228
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8229
                                   " iallocator script", errors.ECODE_INVAL)
8230
      if self.op.mode == constants.INSTANCE_IMPORT:
8231
        raise errors.OpPrereqError("Disk adoption not allowed for"
8232
                                   " instance import", errors.ECODE_INVAL)
8233
    else:
8234
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8235
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8236
                                   " but no 'adopt' parameter given" %
8237
                                   self.op.disk_template,
8238
                                   errors.ECODE_INVAL)
8239

    
8240
    self.adopt_disks = has_adopt
8241

    
8242
    # instance name verification
8243
    if self.op.name_check:
8244
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8245
      self.op.instance_name = self.hostname1.name
8246
      # used in CheckPrereq for ip ping check
8247
      self.check_ip = self.hostname1.ip
8248
    else:
8249
      self.check_ip = None
8250

    
8251
    # file storage checks
8252
    if (self.op.file_driver and
8253
        not self.op.file_driver in constants.FILE_DRIVER):
8254
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8255
                                 self.op.file_driver, errors.ECODE_INVAL)
8256

    
8257
    if self.op.disk_template == constants.DT_FILE:
8258
      opcodes.RequireFileStorage()
8259
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8260
      opcodes.RequireSharedFileStorage()
8261

    
8262
    ### Node/iallocator related checks
8263
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8264

    
8265
    if self.op.pnode is not None:
8266
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8267
        if self.op.snode is None:
8268
          raise errors.OpPrereqError("The networked disk templates need"
8269
                                     " a mirror node", errors.ECODE_INVAL)
8270
      elif self.op.snode:
8271
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8272
                        " template")
8273
        self.op.snode = None
8274

    
8275
    self._cds = _GetClusterDomainSecret()
8276

    
8277
    if self.op.mode == constants.INSTANCE_IMPORT:
8278
      # On import force_variant must be True, because if we forced it at
8279
      # initial install, our only chance when importing it back is that it
8280
      # works again!
8281
      self.op.force_variant = True
8282

    
8283
      if self.op.no_install:
8284
        self.LogInfo("No-installation mode has no effect during import")
8285

    
8286
    elif self.op.mode == constants.INSTANCE_CREATE:
8287
      if self.op.os_type is None:
8288
        raise errors.OpPrereqError("No guest OS specified",
8289
                                   errors.ECODE_INVAL)
8290
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8291
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8292
                                   " installation" % self.op.os_type,
8293
                                   errors.ECODE_STATE)
8294
      if self.op.disk_template is None:
8295
        raise errors.OpPrereqError("No disk template specified",
8296
                                   errors.ECODE_INVAL)
8297

    
8298
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8299
      # Check handshake to ensure both clusters have the same domain secret
8300
      src_handshake = self.op.source_handshake
8301
      if not src_handshake:
8302
        raise errors.OpPrereqError("Missing source handshake",
8303
                                   errors.ECODE_INVAL)
8304

    
8305
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8306
                                                           src_handshake)
8307
      if errmsg:
8308
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8309
                                   errors.ECODE_INVAL)
8310

    
8311
      # Load and check source CA
8312
      self.source_x509_ca_pem = self.op.source_x509_ca
8313
      if not self.source_x509_ca_pem:
8314
        raise errors.OpPrereqError("Missing source X509 CA",
8315
                                   errors.ECODE_INVAL)
8316

    
8317
      try:
8318
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8319
                                                    self._cds)
8320
      except OpenSSL.crypto.Error, err:
8321
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8322
                                   (err, ), errors.ECODE_INVAL)
8323

    
8324
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8325
      if errcode is not None:
8326
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8327
                                   errors.ECODE_INVAL)
8328

    
8329
      self.source_x509_ca = cert
8330

    
8331
      src_instance_name = self.op.source_instance_name
8332
      if not src_instance_name:
8333
        raise errors.OpPrereqError("Missing source instance name",
8334
                                   errors.ECODE_INVAL)
8335

    
8336
      self.source_instance_name = \
8337
          netutils.GetHostname(name=src_instance_name).name
8338

    
8339
    else:
8340
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8341
                                 self.op.mode, errors.ECODE_INVAL)
8342

    
8343
  def ExpandNames(self):
8344
    """ExpandNames for CreateInstance.
8345

8346
    Figure out the right locks for instance creation.
8347

8348
    """
8349
    self.needed_locks = {}
8350

    
8351
    instance_name = self.op.instance_name
8352
    # this is just a preventive check, but someone might still add this
8353
    # instance in the meantime, and creation will fail at lock-add time
8354
    if instance_name in self.cfg.GetInstanceList():
8355
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8356
                                 instance_name, errors.ECODE_EXISTS)
8357

    
8358
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8359

    
8360
    if self.op.iallocator:
8361
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8362
    else:
8363
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8364
      nodelist = [self.op.pnode]
8365
      if self.op.snode is not None:
8366
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8367
        nodelist.append(self.op.snode)
8368
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8369

    
8370
    # in case of import lock the source node too
8371
    if self.op.mode == constants.INSTANCE_IMPORT:
8372
      src_node = self.op.src_node
8373
      src_path = self.op.src_path
8374

    
8375
      if src_path is None:
8376
        self.op.src_path = src_path = self.op.instance_name
8377

    
8378
      if src_node is None:
8379
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8380
        self.op.src_node = None
8381
        if os.path.isabs(src_path):
8382
          raise errors.OpPrereqError("Importing an instance from a path"
8383
                                     " requires a source node option",
8384
                                     errors.ECODE_INVAL)
8385
      else:
8386
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8387
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8388
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8389
        if not os.path.isabs(src_path):
8390
          self.op.src_path = src_path = \
8391
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8392

    
8393
  def _RunAllocator(self):
8394
    """Run the allocator based on input opcode.
8395

8396
    """
8397
    nics = [n.ToDict() for n in self.nics]
8398
    ial = IAllocator(self.cfg, self.rpc,
8399
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8400
                     name=self.op.instance_name,
8401
                     disk_template=self.op.disk_template,
8402
                     tags=self.op.tags,
8403
                     os=self.op.os_type,
8404
                     vcpus=self.be_full[constants.BE_VCPUS],
8405
                     memory=self.be_full[constants.BE_MEMORY],
8406
                     disks=self.disks,
8407
                     nics=nics,
8408
                     hypervisor=self.op.hypervisor,
8409
                     )
8410

    
8411
    ial.Run(self.op.iallocator)
8412

    
8413
    if not ial.success:
8414
      raise errors.OpPrereqError("Can't compute nodes using"
8415
                                 " iallocator '%s': %s" %
8416
                                 (self.op.iallocator, ial.info),
8417
                                 errors.ECODE_NORES)
8418
    if len(ial.result) != ial.required_nodes:
8419
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8420
                                 " of nodes (%s), required %s" %
8421
                                 (self.op.iallocator, len(ial.result),
8422
                                  ial.required_nodes), errors.ECODE_FAULT)
8423
    self.op.pnode = ial.result[0]
8424
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8425
                 self.op.instance_name, self.op.iallocator,
8426
                 utils.CommaJoin(ial.result))
8427
    if ial.required_nodes == 2:
8428
      self.op.snode = ial.result[1]
8429

    
8430
  def BuildHooksEnv(self):
8431
    """Build hooks env.
8432

8433
    This runs on master, primary and secondary nodes of the instance.
8434

8435
    """
8436
    env = {
8437
      "ADD_MODE": self.op.mode,
8438
      }
8439
    if self.op.mode == constants.INSTANCE_IMPORT:
8440
      env["SRC_NODE"] = self.op.src_node
8441
      env["SRC_PATH"] = self.op.src_path
8442
      env["SRC_IMAGES"] = self.src_images
8443

    
8444
    env.update(_BuildInstanceHookEnv(
8445
      name=self.op.instance_name,
8446
      primary_node=self.op.pnode,
8447
      secondary_nodes=self.secondaries,
8448
      status=self.op.start,
8449
      os_type=self.op.os_type,
8450
      memory=self.be_full[constants.BE_MEMORY],
8451
      vcpus=self.be_full[constants.BE_VCPUS],
8452
      nics=_NICListToTuple(self, self.nics),
8453
      disk_template=self.op.disk_template,
8454
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8455
             for d in self.disks],
8456
      bep=self.be_full,
8457
      hvp=self.hv_full,
8458
      hypervisor_name=self.op.hypervisor,
8459
      tags=self.op.tags,
8460
    ))
8461

    
8462
    return env
8463

    
8464
  def BuildHooksNodes(self):
8465
    """Build hooks nodes.
8466

8467
    """
8468
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8469
    return nl, nl
8470

    
8471
  def _ReadExportInfo(self):
8472
    """Reads the export information from disk.
8473

8474
    It will override the opcode source node and path with the actual
8475
    information, if these two were not specified before.
8476

8477
    @return: the export information
8478

8479
    """
8480
    assert self.op.mode == constants.INSTANCE_IMPORT
8481

    
8482
    src_node = self.op.src_node
8483
    src_path = self.op.src_path
8484

    
8485
    if src_node is None:
8486
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8487
      exp_list = self.rpc.call_export_list(locked_nodes)
8488
      found = False
8489
      for node in exp_list:
8490
        if exp_list[node].fail_msg:
8491
          continue
8492
        if src_path in exp_list[node].payload:
8493
          found = True
8494
          self.op.src_node = src_node = node
8495
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8496
                                                       src_path)
8497
          break
8498
      if not found:
8499
        raise errors.OpPrereqError("No export found for relative path %s" %
8500
                                    src_path, errors.ECODE_INVAL)
8501

    
8502
    _CheckNodeOnline(self, src_node)
8503
    result = self.rpc.call_export_info(src_node, src_path)
8504
    result.Raise("No export or invalid export found in dir %s" % src_path)
8505

    
8506
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8507
    if not export_info.has_section(constants.INISECT_EXP):
8508
      raise errors.ProgrammerError("Corrupted export config",
8509
                                   errors.ECODE_ENVIRON)
8510

    
8511
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8512
    if (int(ei_version) != constants.EXPORT_VERSION):
8513
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8514
                                 (ei_version, constants.EXPORT_VERSION),
8515
                                 errors.ECODE_ENVIRON)
8516
    return export_info
8517

    
8518
  def _ReadExportParams(self, einfo):
8519
    """Use export parameters as defaults.
8520

8521
    In case the opcode doesn't specify (as in override) some instance
8522
    parameters, then try to use them from the export information, if
8523
    that declares them.
8524

8525
    """
8526
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8527

    
8528
    if self.op.disk_template is None:
8529
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8530
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8531
                                          "disk_template")
8532
      else:
8533
        raise errors.OpPrereqError("No disk template specified and the export"
8534
                                   " is missing the disk_template information",
8535
                                   errors.ECODE_INVAL)
8536

    
8537
    if not self.op.disks:
8538
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8539
        disks = []
8540
        # TODO: import the disk iv_name too
8541
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8542
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8543
          disks.append({constants.IDISK_SIZE: disk_sz})
8544
        self.op.disks = disks
8545
      else:
8546
        raise errors.OpPrereqError("No disk info specified and the export"
8547
                                   " is missing the disk information",
8548
                                   errors.ECODE_INVAL)
8549

    
8550
    if (not self.op.nics and
8551
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8552
      nics = []
8553
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8554
        ndict = {}
8555
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8556
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8557
          ndict[name] = v
8558
        nics.append(ndict)
8559
      self.op.nics = nics
8560

    
8561
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8562
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8563

    
8564
    if (self.op.hypervisor is None and
8565
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8566
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8567

    
8568
    if einfo.has_section(constants.INISECT_HYP):
8569
      # use the export parameters but do not override the ones
8570
      # specified by the user
8571
      for name, value in einfo.items(constants.INISECT_HYP):
8572
        if name not in self.op.hvparams:
8573
          self.op.hvparams[name] = value
8574

    
8575
    if einfo.has_section(constants.INISECT_BEP):
8576
      # use the parameters, without overriding
8577
      for name, value in einfo.items(constants.INISECT_BEP):
8578
        if name not in self.op.beparams:
8579
          self.op.beparams[name] = value
8580
    else:
8581
      # try to read the parameters old style, from the main section
8582
      for name in constants.BES_PARAMETERS:
8583
        if (name not in self.op.beparams and
8584
            einfo.has_option(constants.INISECT_INS, name)):
8585
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8586

    
8587
    if einfo.has_section(constants.INISECT_OSP):
8588
      # use the parameters, without overriding
8589
      for name, value in einfo.items(constants.INISECT_OSP):
8590
        if name not in self.op.osparams:
8591
          self.op.osparams[name] = value
8592

    
8593
  def _RevertToDefaults(self, cluster):
8594
    """Revert the instance parameters to the default values.
8595

8596
    """
8597
    # hvparams
8598
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8599
    for name in self.op.hvparams.keys():
8600
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8601
        del self.op.hvparams[name]
8602
    # beparams
8603
    be_defs = cluster.SimpleFillBE({})
8604
    for name in self.op.beparams.keys():
8605
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8606
        del self.op.beparams[name]
8607
    # nic params
8608
    nic_defs = cluster.SimpleFillNIC({})
8609
    for nic in self.op.nics:
8610
      for name in constants.NICS_PARAMETERS:
8611
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8612
          del nic[name]
8613
    # osparams
8614
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8615
    for name in self.op.osparams.keys():
8616
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8617
        del self.op.osparams[name]
8618

    
8619
  def _CalculateFileStorageDir(self):
8620
    """Calculate final instance file storage dir.
8621

8622
    """
8623
    # file storage dir calculation/check
8624
    self.instance_file_storage_dir = None
8625
    if self.op.disk_template in constants.DTS_FILEBASED:
8626
      # build the full file storage dir path
8627
      joinargs = []
8628

    
8629
      if self.op.disk_template == constants.DT_SHARED_FILE:
8630
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8631
      else:
8632
        get_fsd_fn = self.cfg.GetFileStorageDir
8633

    
8634
      cfg_storagedir = get_fsd_fn()
8635
      if not cfg_storagedir:
8636
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8637
      joinargs.append(cfg_storagedir)
8638

    
8639
      if self.op.file_storage_dir is not None:
8640
        joinargs.append(self.op.file_storage_dir)
8641

    
8642
      joinargs.append(self.op.instance_name)
8643

    
8644
      # pylint: disable=W0142
8645
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8646

    
8647
  def CheckPrereq(self):
8648
    """Check prerequisites.
8649

8650
    """
8651
    self._CalculateFileStorageDir()
8652

    
8653
    if self.op.mode == constants.INSTANCE_IMPORT:
8654
      export_info = self._ReadExportInfo()
8655
      self._ReadExportParams(export_info)
8656

    
8657
    if (not self.cfg.GetVGName() and
8658
        self.op.disk_template not in constants.DTS_NOT_LVM):
8659
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8660
                                 " instances", errors.ECODE_STATE)
8661

    
8662
    if self.op.hypervisor is None:
8663
      self.op.hypervisor = self.cfg.GetHypervisorType()
8664

    
8665
    cluster = self.cfg.GetClusterInfo()
8666
    enabled_hvs = cluster.enabled_hypervisors
8667
    if self.op.hypervisor not in enabled_hvs:
8668
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8669
                                 " cluster (%s)" % (self.op.hypervisor,
8670
                                  ",".join(enabled_hvs)),
8671
                                 errors.ECODE_STATE)
8672

    
8673
    # Check tag validity
8674
    for tag in self.op.tags:
8675
      objects.TaggableObject.ValidateTag(tag)
8676

    
8677
    # check hypervisor parameter syntax (locally)
8678
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8679
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8680
                                      self.op.hvparams)
8681
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8682
    hv_type.CheckParameterSyntax(filled_hvp)
8683
    self.hv_full = filled_hvp
8684
    # check that we don't specify global parameters on an instance
8685
    _CheckGlobalHvParams(self.op.hvparams)
8686

    
8687
    # fill and remember the beparams dict
8688
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8689
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8690

    
8691
    # build os parameters
8692
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8693

    
8694
    # now that hvp/bep are in final format, let's reset to defaults,
8695
    # if told to do so
8696
    if self.op.identify_defaults:
8697
      self._RevertToDefaults(cluster)
8698

    
8699
    # NIC buildup
8700
    self.nics = []
8701
    for idx, nic in enumerate(self.op.nics):
8702
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8703
      nic_mode = nic_mode_req
8704
      if nic_mode is None:
8705
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8706

    
8707
      # in routed mode, for the first nic, the default ip is 'auto'
8708
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8709
        default_ip_mode = constants.VALUE_AUTO
8710
      else:
8711
        default_ip_mode = constants.VALUE_NONE
8712

    
8713
      # ip validity checks
8714
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8715
      if ip is None or ip.lower() == constants.VALUE_NONE:
8716
        nic_ip = None
8717
      elif ip.lower() == constants.VALUE_AUTO:
8718
        if not self.op.name_check:
8719
          raise errors.OpPrereqError("IP address set to auto but name checks"
8720
                                     " have been skipped",
8721
                                     errors.ECODE_INVAL)
8722
        nic_ip = self.hostname1.ip
8723
      else:
8724
        if not netutils.IPAddress.IsValid(ip):
8725
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8726
                                     errors.ECODE_INVAL)
8727
        nic_ip = ip
8728

    
8729
      # TODO: check the ip address for uniqueness
8730
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8731
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8732
                                   errors.ECODE_INVAL)
8733

    
8734
      # MAC address verification
8735
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8736
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8737
        mac = utils.NormalizeAndValidateMac(mac)
8738

    
8739
        try:
8740
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8741
        except errors.ReservationError:
8742
          raise errors.OpPrereqError("MAC address %s already in use"
8743
                                     " in cluster" % mac,
8744
                                     errors.ECODE_NOTUNIQUE)
8745

    
8746
      #  Build nic parameters
8747
      link = nic.get(constants.INIC_LINK, None)
8748
      nicparams = {}
8749
      if nic_mode_req:
8750
        nicparams[constants.NIC_MODE] = nic_mode_req
8751
      if link:
8752
        nicparams[constants.NIC_LINK] = link
8753

    
8754
      check_params = cluster.SimpleFillNIC(nicparams)
8755
      objects.NIC.CheckParameterSyntax(check_params)
8756
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8757

    
8758
    # disk checks/pre-build
8759
    default_vg = self.cfg.GetVGName()
8760
    self.disks = []
8761
    for disk in self.op.disks:
8762
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8763
      if mode not in constants.DISK_ACCESS_SET:
8764
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8765
                                   mode, errors.ECODE_INVAL)
8766
      size = disk.get(constants.IDISK_SIZE, None)
8767
      if size is None:
8768
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8769
      try:
8770
        size = int(size)
8771
      except (TypeError, ValueError):
8772
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8773
                                   errors.ECODE_INVAL)
8774

    
8775
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8776
      new_disk = {
8777
        constants.IDISK_SIZE: size,
8778
        constants.IDISK_MODE: mode,
8779
        constants.IDISK_VG: data_vg,
8780
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8781
        }
8782
      if constants.IDISK_ADOPT in disk:
8783
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8784
      self.disks.append(new_disk)
8785

    
8786
    if self.op.mode == constants.INSTANCE_IMPORT:
8787

    
8788
      # Check that the new instance doesn't have less disks than the export
8789
      instance_disks = len(self.disks)
8790
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8791
      if instance_disks < export_disks:
8792
        raise errors.OpPrereqError("Not enough disks to import."
8793
                                   " (instance: %d, export: %d)" %
8794
                                   (instance_disks, export_disks),
8795
                                   errors.ECODE_INVAL)
8796

    
8797
      disk_images = []
8798
      for idx in range(export_disks):
8799
        option = "disk%d_dump" % idx
8800
        if export_info.has_option(constants.INISECT_INS, option):
8801
          # FIXME: are the old os-es, disk sizes, etc. useful?
8802
          export_name = export_info.get(constants.INISECT_INS, option)
8803
          image = utils.PathJoin(self.op.src_path, export_name)
8804
          disk_images.append(image)
8805
        else:
8806
          disk_images.append(False)
8807

    
8808
      self.src_images = disk_images
8809

    
8810
      old_name = export_info.get(constants.INISECT_INS, "name")
8811
      try:
8812
        exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8813
      except (TypeError, ValueError), err:
8814
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8815
                                   " an integer: %s" % str(err),
8816
                                   errors.ECODE_STATE)
8817
      if self.op.instance_name == old_name:
8818
        for idx, nic in enumerate(self.nics):
8819
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8820
            nic_mac_ini = "nic%d_mac" % idx
8821
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8822

    
8823
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8824

    
8825
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8826
    if self.op.ip_check:
8827
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8828
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8829
                                   (self.check_ip, self.op.instance_name),
8830
                                   errors.ECODE_NOTUNIQUE)
8831

    
8832
    #### mac address generation
8833
    # By generating here the mac address both the allocator and the hooks get
8834
    # the real final mac address rather than the 'auto' or 'generate' value.
8835
    # There is a race condition between the generation and the instance object
8836
    # creation, which means that we know the mac is valid now, but we're not
8837
    # sure it will be when we actually add the instance. If things go bad
8838
    # adding the instance will abort because of a duplicate mac, and the
8839
    # creation job will fail.
8840
    for nic in self.nics:
8841
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8842
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8843

    
8844
    #### allocator run
8845

    
8846
    if self.op.iallocator is not None:
8847
      self._RunAllocator()
8848

    
8849
    # Release all unneeded node locks
8850
    _ReleaseLocks(self, locking.LEVEL_NODE,
8851
                  keep=filter(None, [self.op.pnode, self.op.snode,
8852
                                     self.op.src_node]))
8853

    
8854
    #### node related checks
8855

    
8856
    # check primary node
8857
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8858
    assert self.pnode is not None, \
8859
      "Cannot retrieve locked node %s" % self.op.pnode
8860
    if pnode.offline:
8861
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8862
                                 pnode.name, errors.ECODE_STATE)
8863
    if pnode.drained:
8864
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8865
                                 pnode.name, errors.ECODE_STATE)
8866
    if not pnode.vm_capable:
8867
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8868
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8869

    
8870
    self.secondaries = []
8871

    
8872
    # mirror node verification
8873
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8874
      if self.op.snode == pnode.name:
8875
        raise errors.OpPrereqError("The secondary node cannot be the"
8876
                                   " primary node", errors.ECODE_INVAL)
8877
      _CheckNodeOnline(self, self.op.snode)
8878
      _CheckNodeNotDrained(self, self.op.snode)
8879
      _CheckNodeVmCapable(self, self.op.snode)
8880
      self.secondaries.append(self.op.snode)
8881

    
8882
    nodenames = [pnode.name] + self.secondaries
8883

    
8884
    if not self.adopt_disks:
8885
      # Check lv size requirements, if not adopting
8886
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8887
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8888

    
8889
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8890
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8891
                                disk[constants.IDISK_ADOPT])
8892
                     for disk in self.disks])
8893
      if len(all_lvs) != len(self.disks):
8894
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8895
                                   errors.ECODE_INVAL)
8896
      for lv_name in all_lvs:
8897
        try:
8898
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8899
          # to ReserveLV uses the same syntax
8900
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8901
        except errors.ReservationError:
8902
          raise errors.OpPrereqError("LV named %s used by another instance" %
8903
                                     lv_name, errors.ECODE_NOTUNIQUE)
8904

    
8905
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8906
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8907

    
8908
      node_lvs = self.rpc.call_lv_list([pnode.name],
8909
                                       vg_names.payload.keys())[pnode.name]
8910
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8911
      node_lvs = node_lvs.payload
8912

    
8913
      delta = all_lvs.difference(node_lvs.keys())
8914
      if delta:
8915
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8916
                                   utils.CommaJoin(delta),
8917
                                   errors.ECODE_INVAL)
8918
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8919
      if online_lvs:
8920
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8921
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8922
                                   errors.ECODE_STATE)
8923
      # update the size of disk based on what is found
8924
      for dsk in self.disks:
8925
        dsk[constants.IDISK_SIZE] = \
8926
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8927
                                        dsk[constants.IDISK_ADOPT])][0]))
8928

    
8929
    elif self.op.disk_template == constants.DT_BLOCK:
8930
      # Normalize and de-duplicate device paths
8931
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8932
                       for disk in self.disks])
8933
      if len(all_disks) != len(self.disks):
8934
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8935
                                   errors.ECODE_INVAL)
8936
      baddisks = [d for d in all_disks
8937
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8938
      if baddisks:
8939
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8940
                                   " cannot be adopted" %
8941
                                   (", ".join(baddisks),
8942
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8943
                                   errors.ECODE_INVAL)
8944

    
8945
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8946
                                            list(all_disks))[pnode.name]
8947
      node_disks.Raise("Cannot get block device information from node %s" %
8948
                       pnode.name)
8949
      node_disks = node_disks.payload
8950
      delta = all_disks.difference(node_disks.keys())
8951
      if delta:
8952
        raise errors.OpPrereqError("Missing block device(s): %s" %
8953
                                   utils.CommaJoin(delta),
8954
                                   errors.ECODE_INVAL)
8955
      for dsk in self.disks:
8956
        dsk[constants.IDISK_SIZE] = \
8957
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8958

    
8959
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8960

    
8961
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8962
    # check OS parameters (remotely)
8963
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8964

    
8965
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8966

    
8967
    # memory check on primary node
8968
    if self.op.start:
8969
      _CheckNodeFreeMemory(self, self.pnode.name,
8970
                           "creating instance %s" % self.op.instance_name,
8971
                           self.be_full[constants.BE_MEMORY],
8972
                           self.op.hypervisor)
8973

    
8974
    self.dry_run_result = list(nodenames)
8975

    
8976
  def Exec(self, feedback_fn):
8977
    """Create and add the instance to the cluster.
8978

8979
    """
8980
    instance = self.op.instance_name
8981
    pnode_name = self.pnode.name
8982

    
8983
    ht_kind = self.op.hypervisor
8984
    if ht_kind in constants.HTS_REQ_PORT:
8985
      network_port = self.cfg.AllocatePort()
8986
    else:
8987
      network_port = None
8988

    
8989
    disks = _GenerateDiskTemplate(self,
8990
                                  self.op.disk_template,
8991
                                  instance, pnode_name,
8992
                                  self.secondaries,
8993
                                  self.disks,
8994
                                  self.instance_file_storage_dir,
8995
                                  self.op.file_driver,
8996
                                  0,
8997
                                  feedback_fn)
8998

    
8999
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9000
                            primary_node=pnode_name,
9001
                            nics=self.nics, disks=disks,
9002
                            disk_template=self.op.disk_template,
9003
                            admin_up=False,
9004
                            network_port=network_port,
9005
                            beparams=self.op.beparams,
9006
                            hvparams=self.op.hvparams,
9007
                            hypervisor=self.op.hypervisor,
9008
                            osparams=self.op.osparams,
9009
                            )
9010

    
9011
    if self.op.tags:
9012
      for tag in self.op.tags:
9013
        iobj.AddTag(tag)
9014

    
9015
    if self.adopt_disks:
9016
      if self.op.disk_template == constants.DT_PLAIN:
9017
        # rename LVs to the newly-generated names; we need to construct
9018
        # 'fake' LV disks with the old data, plus the new unique_id
9019
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9020
        rename_to = []
9021
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9022
          rename_to.append(t_dsk.logical_id)
9023
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9024
          self.cfg.SetDiskID(t_dsk, pnode_name)
9025
        result = self.rpc.call_blockdev_rename(pnode_name,
9026
                                               zip(tmp_disks, rename_to))
9027
        result.Raise("Failed to rename adoped LVs")
9028
    else:
9029
      feedback_fn("* creating instance disks...")
9030
      try:
9031
        _CreateDisks(self, iobj)
9032
      except errors.OpExecError:
9033
        self.LogWarning("Device creation failed, reverting...")
9034
        try:
9035
          _RemoveDisks(self, iobj)
9036
        finally:
9037
          self.cfg.ReleaseDRBDMinors(instance)
9038
          raise
9039

    
9040
    feedback_fn("adding instance %s to cluster config" % instance)
9041

    
9042
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9043

    
9044
    # Declare that we don't want to remove the instance lock anymore, as we've
9045
    # added the instance to the config
9046
    del self.remove_locks[locking.LEVEL_INSTANCE]
9047

    
9048
    if self.op.mode == constants.INSTANCE_IMPORT:
9049
      # Release unused nodes
9050
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9051
    else:
9052
      # Release all nodes
9053
      _ReleaseLocks(self, locking.LEVEL_NODE)
9054

    
9055
    disk_abort = False
9056
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9057
      feedback_fn("* wiping instance disks...")
9058
      try:
9059
        _WipeDisks(self, iobj)
9060
      except errors.OpExecError, err:
9061
        logging.exception("Wiping disks failed")
9062
        self.LogWarning("Wiping instance disks failed (%s)", err)
9063
        disk_abort = True
9064

    
9065
    if disk_abort:
9066
      # Something is already wrong with the disks, don't do anything else
9067
      pass
9068
    elif self.op.wait_for_sync:
9069
      disk_abort = not _WaitForSync(self, iobj)
9070
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9071
      # make sure the disks are not degraded (still sync-ing is ok)
9072
      feedback_fn("* checking mirrors status")
9073
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9074
    else:
9075
      disk_abort = False
9076

    
9077
    if disk_abort:
9078
      _RemoveDisks(self, iobj)
9079
      self.cfg.RemoveInstance(iobj.name)
9080
      # Make sure the instance lock gets removed
9081
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9082
      raise errors.OpExecError("There are some degraded disks for"
9083
                               " this instance")
9084

    
9085
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9086
      if self.op.mode == constants.INSTANCE_CREATE:
9087
        if not self.op.no_install:
9088
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9089
                        not self.op.wait_for_sync)
9090
          if pause_sync:
9091
            feedback_fn("* pausing disk sync to install instance OS")
9092
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9093
                                                              iobj.disks, True)
9094
            for idx, success in enumerate(result.payload):
9095
              if not success:
9096
                logging.warn("pause-sync of instance %s for disk %d failed",
9097
                             instance, idx)
9098

    
9099
          feedback_fn("* running the instance OS create scripts...")
9100
          # FIXME: pass debug option from opcode to backend
9101
          os_add_result = \
9102
            self.rpc.call_instance_os_add(pnode_name, iobj, False,
9103
                                          self.op.debug_level)
9104
          if pause_sync:
9105
            feedback_fn("* resuming disk sync")
9106
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9107
                                                              iobj.disks, False)
9108
            for idx, success in enumerate(result.payload):
9109
              if not success:
9110
                logging.warn("resume-sync of instance %s for disk %d failed",
9111
                             instance, idx)
9112

    
9113
          os_add_result.Raise("Could not add os for instance %s"
9114
                              " on node %s" % (instance, pnode_name))
9115

    
9116
      elif self.op.mode == constants.INSTANCE_IMPORT:
9117
        feedback_fn("* running the instance OS import scripts...")
9118

    
9119
        transfers = []
9120

    
9121
        for idx, image in enumerate(self.src_images):
9122
          if not image:
9123
            continue
9124

    
9125
          # FIXME: pass debug option from opcode to backend
9126
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9127
                                             constants.IEIO_FILE, (image, ),
9128
                                             constants.IEIO_SCRIPT,
9129
                                             (iobj.disks[idx], idx),
9130
                                             None)
9131
          transfers.append(dt)
9132

    
9133
        import_result = \
9134
          masterd.instance.TransferInstanceData(self, feedback_fn,
9135
                                                self.op.src_node, pnode_name,
9136
                                                self.pnode.secondary_ip,
9137
                                                iobj, transfers)
9138
        if not compat.all(import_result):
9139
          self.LogWarning("Some disks for instance %s on node %s were not"
9140
                          " imported successfully" % (instance, pnode_name))
9141

    
9142
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9143
        feedback_fn("* preparing remote import...")
9144
        # The source cluster will stop the instance before attempting to make a
9145
        # connection. In some cases stopping an instance can take a long time,
9146
        # hence the shutdown timeout is added to the connection timeout.
9147
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9148
                           self.op.source_shutdown_timeout)
9149
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9150

    
9151
        assert iobj.primary_node == self.pnode.name
9152
        disk_results = \
9153
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9154
                                        self.source_x509_ca,
9155
                                        self._cds, timeouts)
9156
        if not compat.all(disk_results):
9157
          # TODO: Should the instance still be started, even if some disks
9158
          # failed to import (valid for local imports, too)?
9159
          self.LogWarning("Some disks for instance %s on node %s were not"
9160
                          " imported successfully" % (instance, pnode_name))
9161

    
9162
        # Run rename script on newly imported instance
9163
        assert iobj.name == instance
9164
        feedback_fn("Running rename script for %s" % instance)
9165
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9166
                                                   self.source_instance_name,
9167
                                                   self.op.debug_level)
9168
        if result.fail_msg:
9169
          self.LogWarning("Failed to run rename script for %s on node"
9170
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9171

    
9172
      else:
9173
        # also checked in the prereq part
9174
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9175
                                     % self.op.mode)
9176

    
9177
    if self.op.start:
9178
      iobj.admin_up = True
9179
      self.cfg.Update(iobj, feedback_fn)
9180
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9181
      feedback_fn("* starting instance...")
9182
      result = self.rpc.call_instance_start(pnode_name, iobj,
9183
                                            None, None, False)
9184
      result.Raise("Could not start instance")
9185

    
9186
    return list(iobj.all_nodes)
9187

    
9188

    
9189
class LUInstanceConsole(NoHooksLU):
9190
  """Connect to an instance's console.
9191

9192
  This is somewhat special in that it returns the command line that
9193
  you need to run on the master node in order to connect to the
9194
  console.
9195

9196
  """
9197
  REQ_BGL = False
9198

    
9199
  def ExpandNames(self):
9200
    self._ExpandAndLockInstance()
9201

    
9202
  def CheckPrereq(self):
9203
    """Check prerequisites.
9204

9205
    This checks that the instance is in the cluster.
9206

9207
    """
9208
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9209
    assert self.instance is not None, \
9210
      "Cannot retrieve locked instance %s" % self.op.instance_name
9211
    _CheckNodeOnline(self, self.instance.primary_node)
9212

    
9213
  def Exec(self, feedback_fn):
9214
    """Connect to the console of an instance
9215

9216
    """
9217
    instance = self.instance
9218
    node = instance.primary_node
9219

    
9220
    node_insts = self.rpc.call_instance_list([node],
9221
                                             [instance.hypervisor])[node]
9222
    node_insts.Raise("Can't get node information from %s" % node)
9223

    
9224
    if instance.name not in node_insts.payload:
9225
      if instance.admin_up:
9226
        state = constants.INSTST_ERRORDOWN
9227
      else:
9228
        state = constants.INSTST_ADMINDOWN
9229
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9230
                               (instance.name, state))
9231

    
9232
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9233

    
9234
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9235

    
9236

    
9237
def _GetInstanceConsole(cluster, instance):
9238
  """Returns console information for an instance.
9239

9240
  @type cluster: L{objects.Cluster}
9241
  @type instance: L{objects.Instance}
9242
  @rtype: dict
9243

9244
  """
9245
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9246
  # beparams and hvparams are passed separately, to avoid editing the
9247
  # instance and then saving the defaults in the instance itself.
9248
  hvparams = cluster.FillHV(instance)
9249
  beparams = cluster.FillBE(instance)
9250
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9251

    
9252
  assert console.instance == instance.name
9253
  assert console.Validate()
9254

    
9255
  return console.ToDict()
9256

    
9257

    
9258
class LUInstanceReplaceDisks(LogicalUnit):
9259
  """Replace the disks of an instance.
9260

9261
  """
9262
  HPATH = "mirrors-replace"
9263
  HTYPE = constants.HTYPE_INSTANCE
9264
  REQ_BGL = False
9265

    
9266
  def CheckArguments(self):
9267
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9268
                                  self.op.iallocator)
9269

    
9270
  def ExpandNames(self):
9271
    self._ExpandAndLockInstance()
9272

    
9273
    assert locking.LEVEL_NODE not in self.needed_locks
9274
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9275

    
9276
    assert self.op.iallocator is None or self.op.remote_node is None, \
9277
      "Conflicting options"
9278

    
9279
    if self.op.remote_node is not None:
9280
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9281

    
9282
      # Warning: do not remove the locking of the new secondary here
9283
      # unless DRBD8.AddChildren is changed to work in parallel;
9284
      # currently it doesn't since parallel invocations of
9285
      # FindUnusedMinor will conflict
9286
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9287
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9288
    else:
9289
      self.needed_locks[locking.LEVEL_NODE] = []
9290
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9291

    
9292
      if self.op.iallocator is not None:
9293
        # iallocator will select a new node in the same group
9294
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9295

    
9296
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9297
                                   self.op.iallocator, self.op.remote_node,
9298
                                   self.op.disks, False, self.op.early_release)
9299

    
9300
    self.tasklets = [self.replacer]
9301

    
9302
  def DeclareLocks(self, level):
9303
    if level == locking.LEVEL_NODEGROUP:
9304
      assert self.op.remote_node is None
9305
      assert self.op.iallocator is not None
9306
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9307

    
9308
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9309
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9310
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9311

    
9312
    elif level == locking.LEVEL_NODE:
9313
      if self.op.iallocator is not None:
9314
        assert self.op.remote_node is None
9315
        assert not self.needed_locks[locking.LEVEL_NODE]
9316

    
9317
        # Lock member nodes of all locked groups
9318
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9319
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9320
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9321
      else:
9322
        self._LockInstancesNodes()
9323

    
9324
  def BuildHooksEnv(self):
9325
    """Build hooks env.
9326

9327
    This runs on the master, the primary and all the secondaries.
9328

9329
    """
9330
    instance = self.replacer.instance
9331
    env = {
9332
      "MODE": self.op.mode,
9333
      "NEW_SECONDARY": self.op.remote_node,
9334
      "OLD_SECONDARY": instance.secondary_nodes[0],
9335
      }
9336
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9337
    return env
9338

    
9339
  def BuildHooksNodes(self):
9340
    """Build hooks nodes.
9341

9342
    """
9343
    instance = self.replacer.instance
9344
    nl = [
9345
      self.cfg.GetMasterNode(),
9346
      instance.primary_node,
9347
      ]
9348
    if self.op.remote_node is not None:
9349
      nl.append(self.op.remote_node)
9350
    return nl, nl
9351

    
9352
  def CheckPrereq(self):
9353
    """Check prerequisites.
9354

9355
    """
9356
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9357
            self.op.iallocator is None)
9358

    
9359
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9360
    if owned_groups:
9361
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9362

    
9363
    return LogicalUnit.CheckPrereq(self)
9364

    
9365

    
9366
class TLReplaceDisks(Tasklet):
9367
  """Replaces disks for an instance.
9368

9369
  Note: Locking is not within the scope of this class.
9370

9371
  """
9372
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9373
               disks, delay_iallocator, early_release):
9374
    """Initializes this class.
9375

9376
    """
9377
    Tasklet.__init__(self, lu)
9378

    
9379
    # Parameters
9380
    self.instance_name = instance_name
9381
    self.mode = mode
9382
    self.iallocator_name = iallocator_name
9383
    self.remote_node = remote_node
9384
    self.disks = disks
9385
    self.delay_iallocator = delay_iallocator
9386
    self.early_release = early_release
9387

    
9388
    # Runtime data
9389
    self.instance = None
9390
    self.new_node = None
9391
    self.target_node = None
9392
    self.other_node = None
9393
    self.remote_node_info = None
9394
    self.node_secondary_ip = None
9395

    
9396
  @staticmethod
9397
  def CheckArguments(mode, remote_node, iallocator):
9398
    """Helper function for users of this class.
9399

9400
    """
9401
    # check for valid parameter combination
9402
    if mode == constants.REPLACE_DISK_CHG:
9403
      if remote_node is None and iallocator is None:
9404
        raise errors.OpPrereqError("When changing the secondary either an"
9405
                                   " iallocator script must be used or the"
9406
                                   " new node given", errors.ECODE_INVAL)
9407

    
9408
      if remote_node is not None and iallocator is not None:
9409
        raise errors.OpPrereqError("Give either the iallocator or the new"
9410
                                   " secondary, not both", errors.ECODE_INVAL)
9411

    
9412
    elif remote_node is not None or iallocator is not None:
9413
      # Not replacing the secondary
9414
      raise errors.OpPrereqError("The iallocator and new node options can"
9415
                                 " only be used when changing the"
9416
                                 " secondary node", errors.ECODE_INVAL)
9417

    
9418
  @staticmethod
9419
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9420
    """Compute a new secondary node using an IAllocator.
9421

9422
    """
9423
    ial = IAllocator(lu.cfg, lu.rpc,
9424
                     mode=constants.IALLOCATOR_MODE_RELOC,
9425
                     name=instance_name,
9426
                     relocate_from=list(relocate_from))
9427

    
9428
    ial.Run(iallocator_name)
9429

    
9430
    if not ial.success:
9431
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9432
                                 " %s" % (iallocator_name, ial.info),
9433
                                 errors.ECODE_NORES)
9434

    
9435
    if len(ial.result) != ial.required_nodes:
9436
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9437
                                 " of nodes (%s), required %s" %
9438
                                 (iallocator_name,
9439
                                  len(ial.result), ial.required_nodes),
9440
                                 errors.ECODE_FAULT)
9441

    
9442
    remote_node_name = ial.result[0]
9443

    
9444
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9445
               instance_name, remote_node_name)
9446

    
9447
    return remote_node_name
9448

    
9449
  def _FindFaultyDisks(self, node_name):
9450
    """Wrapper for L{_FindFaultyInstanceDisks}.
9451

9452
    """
9453
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9454
                                    node_name, True)
9455

    
9456
  def _CheckDisksActivated(self, instance):
9457
    """Checks if the instance disks are activated.
9458

9459
    @param instance: The instance to check disks
9460
    @return: True if they are activated, False otherwise
9461

9462
    """
9463
    nodes = instance.all_nodes
9464

    
9465
    for idx, dev in enumerate(instance.disks):
9466
      for node in nodes:
9467
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9468
        self.cfg.SetDiskID(dev, node)
9469

    
9470
        result = self.rpc.call_blockdev_find(node, dev)
9471

    
9472
        if result.offline:
9473
          continue
9474
        elif result.fail_msg or not result.payload:
9475
          return False
9476

    
9477
    return True
9478

    
9479
  def CheckPrereq(self):
9480
    """Check prerequisites.
9481

9482
    This checks that the instance is in the cluster.
9483

9484
    """
9485
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9486
    assert instance is not None, \
9487
      "Cannot retrieve locked instance %s" % self.instance_name
9488

    
9489
    if instance.disk_template != constants.DT_DRBD8:
9490
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9491
                                 " instances", errors.ECODE_INVAL)
9492

    
9493
    if len(instance.secondary_nodes) != 1:
9494
      raise errors.OpPrereqError("The instance has a strange layout,"
9495
                                 " expected one secondary but found %d" %
9496
                                 len(instance.secondary_nodes),
9497
                                 errors.ECODE_FAULT)
9498

    
9499
    if not self.delay_iallocator:
9500
      self._CheckPrereq2()
9501

    
9502
  def _CheckPrereq2(self):
9503
    """Check prerequisites, second part.
9504

9505
    This function should always be part of CheckPrereq. It was separated and is
9506
    now called from Exec because during node evacuation iallocator was only
9507
    called with an unmodified cluster model, not taking planned changes into
9508
    account.
9509

9510
    """
9511
    instance = self.instance
9512
    secondary_node = instance.secondary_nodes[0]
9513

    
9514
    if self.iallocator_name is None:
9515
      remote_node = self.remote_node
9516
    else:
9517
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9518
                                       instance.name, instance.secondary_nodes)
9519

    
9520
    if remote_node is None:
9521
      self.remote_node_info = None
9522
    else:
9523
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9524
             "Remote node '%s' is not locked" % remote_node
9525

    
9526
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9527
      assert self.remote_node_info is not None, \
9528
        "Cannot retrieve locked node %s" % remote_node
9529

    
9530
    if remote_node == self.instance.primary_node:
9531
      raise errors.OpPrereqError("The specified node is the primary node of"
9532
                                 " the instance", errors.ECODE_INVAL)
9533

    
9534
    if remote_node == secondary_node:
9535
      raise errors.OpPrereqError("The specified node is already the"
9536
                                 " secondary node of the instance",
9537
                                 errors.ECODE_INVAL)
9538

    
9539
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9540
                                    constants.REPLACE_DISK_CHG):
9541
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9542
                                 errors.ECODE_INVAL)
9543

    
9544
    if self.mode == constants.REPLACE_DISK_AUTO:
9545
      if not self._CheckDisksActivated(instance):
9546
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9547
                                   " first" % self.instance_name,
9548
                                   errors.ECODE_STATE)
9549
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9550
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9551

    
9552
      if faulty_primary and faulty_secondary:
9553
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9554
                                   " one node and can not be repaired"
9555
                                   " automatically" % self.instance_name,
9556
                                   errors.ECODE_STATE)
9557

    
9558
      if faulty_primary:
9559
        self.disks = faulty_primary
9560
        self.target_node = instance.primary_node
9561
        self.other_node = secondary_node
9562
        check_nodes = [self.target_node, self.other_node]
9563
      elif faulty_secondary:
9564
        self.disks = faulty_secondary
9565
        self.target_node = secondary_node
9566
        self.other_node = instance.primary_node
9567
        check_nodes = [self.target_node, self.other_node]
9568
      else:
9569
        self.disks = []
9570
        check_nodes = []
9571

    
9572
    else:
9573
      # Non-automatic modes
9574
      if self.mode == constants.REPLACE_DISK_PRI:
9575
        self.target_node = instance.primary_node
9576
        self.other_node = secondary_node
9577
        check_nodes = [self.target_node, self.other_node]
9578

    
9579
      elif self.mode == constants.REPLACE_DISK_SEC:
9580
        self.target_node = secondary_node
9581
        self.other_node = instance.primary_node
9582
        check_nodes = [self.target_node, self.other_node]
9583

    
9584
      elif self.mode == constants.REPLACE_DISK_CHG:
9585
        self.new_node = remote_node
9586
        self.other_node = instance.primary_node
9587
        self.target_node = secondary_node
9588
        check_nodes = [self.new_node, self.other_node]
9589

    
9590
        _CheckNodeNotDrained(self.lu, remote_node)
9591
        _CheckNodeVmCapable(self.lu, remote_node)
9592

    
9593
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9594
        assert old_node_info is not None
9595
        if old_node_info.offline and not self.early_release:
9596
          # doesn't make sense to delay the release
9597
          self.early_release = True
9598
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9599
                          " early-release mode", secondary_node)
9600

    
9601
      else:
9602
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9603
                                     self.mode)
9604

    
9605
      # If not specified all disks should be replaced
9606
      if not self.disks:
9607
        self.disks = range(len(self.instance.disks))
9608

    
9609
    for node in check_nodes:
9610
      _CheckNodeOnline(self.lu, node)
9611

    
9612
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9613
                                                          self.other_node,
9614
                                                          self.target_node]
9615
                              if node_name is not None)
9616

    
9617
    # Release unneeded node locks
9618
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9619

    
9620
    # Release any owned node group
9621
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9622
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9623

    
9624
    # Check whether disks are valid
9625
    for disk_idx in self.disks:
9626
      instance.FindDisk(disk_idx)
9627

    
9628
    # Get secondary node IP addresses
9629
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9630
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9631

    
9632
  def Exec(self, feedback_fn):
9633
    """Execute disk replacement.
9634

9635
    This dispatches the disk replacement to the appropriate handler.
9636

9637
    """
9638
    if self.delay_iallocator:
9639
      self._CheckPrereq2()
9640

    
9641
    if __debug__:
9642
      # Verify owned locks before starting operation
9643
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9644
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9645
          ("Incorrect node locks, owning %s, expected %s" %
9646
           (owned_nodes, self.node_secondary_ip.keys()))
9647

    
9648
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9649
      assert list(owned_instances) == [self.instance_name], \
9650
          "Instance '%s' not locked" % self.instance_name
9651

    
9652
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9653
          "Should not own any node group lock at this point"
9654

    
9655
    if not self.disks:
9656
      feedback_fn("No disks need replacement")
9657
      return
9658

    
9659
    feedback_fn("Replacing disk(s) %s for %s" %
9660
                (utils.CommaJoin(self.disks), self.instance.name))
9661

    
9662
    activate_disks = (not self.instance.admin_up)
9663

    
9664
    # Activate the instance disks if we're replacing them on a down instance
9665
    if activate_disks:
9666
      _StartInstanceDisks(self.lu, self.instance, True)
9667

    
9668
    try:
9669
      # Should we replace the secondary node?
9670
      if self.new_node is not None:
9671
        fn = self._ExecDrbd8Secondary
9672
      else:
9673
        fn = self._ExecDrbd8DiskOnly
9674

    
9675
      result = fn(feedback_fn)
9676
    finally:
9677
      # Deactivate the instance disks if we're replacing them on a
9678
      # down instance
9679
      if activate_disks:
9680
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9681

    
9682
    if __debug__:
9683
      # Verify owned locks
9684
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9685
      nodes = frozenset(self.node_secondary_ip)
9686
      assert ((self.early_release and not owned_nodes) or
9687
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9688
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9689
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9690

    
9691
    return result
9692

    
9693
  def _CheckVolumeGroup(self, nodes):
9694
    self.lu.LogInfo("Checking volume groups")
9695

    
9696
    vgname = self.cfg.GetVGName()
9697

    
9698
    # Make sure volume group exists on all involved nodes
9699
    results = self.rpc.call_vg_list(nodes)
9700
    if not results:
9701
      raise errors.OpExecError("Can't list volume groups on the nodes")
9702

    
9703
    for node in nodes:
9704
      res = results[node]
9705
      res.Raise("Error checking node %s" % node)
9706
      if vgname not in res.payload:
9707
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9708
                                 (vgname, node))
9709

    
9710
  def _CheckDisksExistence(self, nodes):
9711
    # Check disk existence
9712
    for idx, dev in enumerate(self.instance.disks):
9713
      if idx not in self.disks:
9714
        continue
9715

    
9716
      for node in nodes:
9717
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9718
        self.cfg.SetDiskID(dev, node)
9719

    
9720
        result = self.rpc.call_blockdev_find(node, dev)
9721

    
9722
        msg = result.fail_msg
9723
        if msg or not result.payload:
9724
          if not msg:
9725
            msg = "disk not found"
9726
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9727
                                   (idx, node, msg))
9728

    
9729
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9730
    for idx, dev in enumerate(self.instance.disks):
9731
      if idx not in self.disks:
9732
        continue
9733

    
9734
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9735
                      (idx, node_name))
9736

    
9737
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9738
                                   ldisk=ldisk):
9739
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9740
                                 " replace disks for instance %s" %
9741
                                 (node_name, self.instance.name))
9742

    
9743
  def _CreateNewStorage(self, node_name):
9744
    """Create new storage on the primary or secondary node.
9745

9746
    This is only used for same-node replaces, not for changing the
9747
    secondary node, hence we don't want to modify the existing disk.
9748

9749
    """
9750
    iv_names = {}
9751

    
9752
    for idx, dev in enumerate(self.instance.disks):
9753
      if idx not in self.disks:
9754
        continue
9755

    
9756
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9757

    
9758
      self.cfg.SetDiskID(dev, node_name)
9759

    
9760
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9761
      names = _GenerateUniqueNames(self.lu, lv_names)
9762

    
9763
      vg_data = dev.children[0].logical_id[0]
9764
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9765
                             logical_id=(vg_data, names[0]))
9766
      vg_meta = dev.children[1].logical_id[0]
9767
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9768
                             logical_id=(vg_meta, names[1]))
9769

    
9770
      new_lvs = [lv_data, lv_meta]
9771
      old_lvs = [child.Copy() for child in dev.children]
9772
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9773

    
9774
      # we pass force_create=True to force the LVM creation
9775
      for new_lv in new_lvs:
9776
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9777
                        _GetInstanceInfoText(self.instance), False)
9778

    
9779
    return iv_names
9780

    
9781
  def _CheckDevices(self, node_name, iv_names):
9782
    for name, (dev, _, _) in iv_names.iteritems():
9783
      self.cfg.SetDiskID(dev, node_name)
9784

    
9785
      result = self.rpc.call_blockdev_find(node_name, dev)
9786

    
9787
      msg = result.fail_msg
9788
      if msg or not result.payload:
9789
        if not msg:
9790
          msg = "disk not found"
9791
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9792
                                 (name, msg))
9793

    
9794
      if result.payload.is_degraded:
9795
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9796

    
9797
  def _RemoveOldStorage(self, node_name, iv_names):
9798
    for name, (_, old_lvs, _) in iv_names.iteritems():
9799
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9800

    
9801
      for lv in old_lvs:
9802
        self.cfg.SetDiskID(lv, node_name)
9803

    
9804
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9805
        if msg:
9806
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9807
                             hint="remove unused LVs manually")
9808

    
9809
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9810
    """Replace a disk on the primary or secondary for DRBD 8.
9811

9812
    The algorithm for replace is quite complicated:
9813

9814
      1. for each disk to be replaced:
9815

9816
        1. create new LVs on the target node with unique names
9817
        1. detach old LVs from the drbd device
9818
        1. rename old LVs to name_replaced.<time_t>
9819
        1. rename new LVs to old LVs
9820
        1. attach the new LVs (with the old names now) to the drbd device
9821

9822
      1. wait for sync across all devices
9823

9824
      1. for each modified disk:
9825

9826
        1. remove old LVs (which have the name name_replaces.<time_t>)
9827

9828
    Failures are not very well handled.
9829

9830
    """
9831
    steps_total = 6
9832

    
9833
    # Step: check device activation
9834
    self.lu.LogStep(1, steps_total, "Check device existence")
9835
    self._CheckDisksExistence([self.other_node, self.target_node])
9836
    self._CheckVolumeGroup([self.target_node, self.other_node])
9837

    
9838
    # Step: check other node consistency
9839
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9840
    self._CheckDisksConsistency(self.other_node,
9841
                                self.other_node == self.instance.primary_node,
9842
                                False)
9843

    
9844
    # Step: create new storage
9845
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9846
    iv_names = self._CreateNewStorage(self.target_node)
9847

    
9848
    # Step: for each lv, detach+rename*2+attach
9849
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9850
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9851
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9852

    
9853
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9854
                                                     old_lvs)
9855
      result.Raise("Can't detach drbd from local storage on node"
9856
                   " %s for device %s" % (self.target_node, dev.iv_name))
9857
      #dev.children = []
9858
      #cfg.Update(instance)
9859

    
9860
      # ok, we created the new LVs, so now we know we have the needed
9861
      # storage; as such, we proceed on the target node to rename
9862
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9863
      # using the assumption that logical_id == physical_id (which in
9864
      # turn is the unique_id on that node)
9865

    
9866
      # FIXME(iustin): use a better name for the replaced LVs
9867
      temp_suffix = int(time.time())
9868
      ren_fn = lambda d, suff: (d.physical_id[0],
9869
                                d.physical_id[1] + "_replaced-%s" % suff)
9870

    
9871
      # Build the rename list based on what LVs exist on the node
9872
      rename_old_to_new = []
9873
      for to_ren in old_lvs:
9874
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9875
        if not result.fail_msg and result.payload:
9876
          # device exists
9877
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9878

    
9879
      self.lu.LogInfo("Renaming the old LVs on the target node")
9880
      result = self.rpc.call_blockdev_rename(self.target_node,
9881
                                             rename_old_to_new)
9882
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9883

    
9884
      # Now we rename the new LVs to the old LVs
9885
      self.lu.LogInfo("Renaming the new LVs on the target node")
9886
      rename_new_to_old = [(new, old.physical_id)
9887
                           for old, new in zip(old_lvs, new_lvs)]
9888
      result = self.rpc.call_blockdev_rename(self.target_node,
9889
                                             rename_new_to_old)
9890
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9891

    
9892
      # Intermediate steps of in memory modifications
9893
      for old, new in zip(old_lvs, new_lvs):
9894
        new.logical_id = old.logical_id
9895
        self.cfg.SetDiskID(new, self.target_node)
9896

    
9897
      # We need to modify old_lvs so that removal later removes the
9898
      # right LVs, not the newly added ones; note that old_lvs is a
9899
      # copy here
9900
      for disk in old_lvs:
9901
        disk.logical_id = ren_fn(disk, temp_suffix)
9902
        self.cfg.SetDiskID(disk, self.target_node)
9903

    
9904
      # Now that the new lvs have the old name, we can add them to the device
9905
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9906
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9907
                                                  new_lvs)
9908
      msg = result.fail_msg
9909
      if msg:
9910
        for new_lv in new_lvs:
9911
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9912
                                               new_lv).fail_msg
9913
          if msg2:
9914
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9915
                               hint=("cleanup manually the unused logical"
9916
                                     "volumes"))
9917
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9918

    
9919
    cstep = 5
9920
    if self.early_release:
9921
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9922
      cstep += 1
9923
      self._RemoveOldStorage(self.target_node, iv_names)
9924
      # WARNING: we release both node locks here, do not do other RPCs
9925
      # than WaitForSync to the primary node
9926
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9927
                    names=[self.target_node, self.other_node])
9928

    
9929
    # Wait for sync
9930
    # This can fail as the old devices are degraded and _WaitForSync
9931
    # does a combined result over all disks, so we don't check its return value
9932
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9933
    cstep += 1
9934
    _WaitForSync(self.lu, self.instance)
9935

    
9936
    # Check all devices manually
9937
    self._CheckDevices(self.instance.primary_node, iv_names)
9938

    
9939
    # Step: remove old storage
9940
    if not self.early_release:
9941
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9942
      cstep += 1
9943
      self._RemoveOldStorage(self.target_node, iv_names)
9944

    
9945
  def _ExecDrbd8Secondary(self, feedback_fn):
9946
    """Replace the secondary node for DRBD 8.
9947

9948
    The algorithm for replace is quite complicated:
9949
      - for all disks of the instance:
9950
        - create new LVs on the new node with same names
9951
        - shutdown the drbd device on the old secondary
9952
        - disconnect the drbd network on the primary
9953
        - create the drbd device on the new secondary
9954
        - network attach the drbd on the primary, using an artifice:
9955
          the drbd code for Attach() will connect to the network if it
9956
          finds a device which is connected to the good local disks but
9957
          not network enabled
9958
      - wait for sync across all devices
9959
      - remove all disks from the old secondary
9960

9961
    Failures are not very well handled.
9962

9963
    """
9964
    steps_total = 6
9965

    
9966
    pnode = self.instance.primary_node
9967

    
9968
    # Step: check device activation
9969
    self.lu.LogStep(1, steps_total, "Check device existence")
9970
    self._CheckDisksExistence([self.instance.primary_node])
9971
    self._CheckVolumeGroup([self.instance.primary_node])
9972

    
9973
    # Step: check other node consistency
9974
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9975
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9976

    
9977
    # Step: create new storage
9978
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9979
    for idx, dev in enumerate(self.instance.disks):
9980
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9981
                      (self.new_node, idx))
9982
      # we pass force_create=True to force LVM creation
9983
      for new_lv in dev.children:
9984
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9985
                        _GetInstanceInfoText(self.instance), False)
9986

    
9987
    # Step 4: dbrd minors and drbd setups changes
9988
    # after this, we must manually remove the drbd minors on both the
9989
    # error and the success paths
9990
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9991
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9992
                                         for dev in self.instance.disks],
9993
                                        self.instance.name)
9994
    logging.debug("Allocated minors %r", minors)
9995

    
9996
    iv_names = {}
9997
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9998
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9999
                      (self.new_node, idx))
10000
      # create new devices on new_node; note that we create two IDs:
10001
      # one without port, so the drbd will be activated without
10002
      # networking information on the new node at this stage, and one
10003
      # with network, for the latter activation in step 4
10004
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10005
      if self.instance.primary_node == o_node1:
10006
        p_minor = o_minor1
10007
      else:
10008
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10009
        p_minor = o_minor2
10010

    
10011
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10012
                      p_minor, new_minor, o_secret)
10013
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10014
                    p_minor, new_minor, o_secret)
10015

    
10016
      iv_names[idx] = (dev, dev.children, new_net_id)
10017
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10018
                    new_net_id)
10019
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10020
                              logical_id=new_alone_id,
10021
                              children=dev.children,
10022
                              size=dev.size)
10023
      try:
10024
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10025
                              _GetInstanceInfoText(self.instance), False)
10026
      except errors.GenericError:
10027
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10028
        raise
10029

    
10030
    # We have new devices, shutdown the drbd on the old secondary
10031
    for idx, dev in enumerate(self.instance.disks):
10032
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10033
      self.cfg.SetDiskID(dev, self.target_node)
10034
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10035
      if msg:
10036
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10037
                           "node: %s" % (idx, msg),
10038
                           hint=("Please cleanup this device manually as"
10039
                                 " soon as possible"))
10040

    
10041
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10042
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10043
                                               self.instance.disks)[pnode]
10044

    
10045
    msg = result.fail_msg
10046
    if msg:
10047
      # detaches didn't succeed (unlikely)
10048
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10049
      raise errors.OpExecError("Can't detach the disks from the network on"
10050
                               " old node: %s" % (msg,))
10051

    
10052
    # if we managed to detach at least one, we update all the disks of
10053
    # the instance to point to the new secondary
10054
    self.lu.LogInfo("Updating instance configuration")
10055
    for dev, _, new_logical_id in iv_names.itervalues():
10056
      dev.logical_id = new_logical_id
10057
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10058

    
10059
    self.cfg.Update(self.instance, feedback_fn)
10060

    
10061
    # and now perform the drbd attach
10062
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10063
                    " (standalone => connected)")
10064
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10065
                                            self.new_node],
10066
                                           self.node_secondary_ip,
10067
                                           self.instance.disks,
10068
                                           self.instance.name,
10069
                                           False)
10070
    for to_node, to_result in result.items():
10071
      msg = to_result.fail_msg
10072
      if msg:
10073
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10074
                           to_node, msg,
10075
                           hint=("please do a gnt-instance info to see the"
10076
                                 " status of disks"))
10077
    cstep = 5
10078
    if self.early_release:
10079
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10080
      cstep += 1
10081
      self._RemoveOldStorage(self.target_node, iv_names)
10082
      # WARNING: we release all node locks here, do not do other RPCs
10083
      # than WaitForSync to the primary node
10084
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10085
                    names=[self.instance.primary_node,
10086
                           self.target_node,
10087
                           self.new_node])
10088

    
10089
    # Wait for sync
10090
    # This can fail as the old devices are degraded and _WaitForSync
10091
    # does a combined result over all disks, so we don't check its return value
10092
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10093
    cstep += 1
10094
    _WaitForSync(self.lu, self.instance)
10095

    
10096
    # Check all devices manually
10097
    self._CheckDevices(self.instance.primary_node, iv_names)
10098

    
10099
    # Step: remove old storage
10100
    if not self.early_release:
10101
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10102
      self._RemoveOldStorage(self.target_node, iv_names)
10103

    
10104

    
10105
class LURepairNodeStorage(NoHooksLU):
10106
  """Repairs the volume group on a node.
10107

10108
  """
10109
  REQ_BGL = False
10110

    
10111
  def CheckArguments(self):
10112
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10113

    
10114
    storage_type = self.op.storage_type
10115

    
10116
    if (constants.SO_FIX_CONSISTENCY not in
10117
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10118
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10119
                                 " repaired" % storage_type,
10120
                                 errors.ECODE_INVAL)
10121

    
10122
  def ExpandNames(self):
10123
    self.needed_locks = {
10124
      locking.LEVEL_NODE: [self.op.node_name],
10125
      }
10126

    
10127
  def _CheckFaultyDisks(self, instance, node_name):
10128
    """Ensure faulty disks abort the opcode or at least warn."""
10129
    try:
10130
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10131
                                  node_name, True):
10132
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10133
                                   " node '%s'" % (instance.name, node_name),
10134
                                   errors.ECODE_STATE)
10135
    except errors.OpPrereqError, err:
10136
      if self.op.ignore_consistency:
10137
        self.proc.LogWarning(str(err.args[0]))
10138
      else:
10139
        raise
10140

    
10141
  def CheckPrereq(self):
10142
    """Check prerequisites.
10143

10144
    """
10145
    # Check whether any instance on this node has faulty disks
10146
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10147
      if not inst.admin_up:
10148
        continue
10149
      check_nodes = set(inst.all_nodes)
10150
      check_nodes.discard(self.op.node_name)
10151
      for inst_node_name in check_nodes:
10152
        self._CheckFaultyDisks(inst, inst_node_name)
10153

    
10154
  def Exec(self, feedback_fn):
10155
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10156
                (self.op.name, self.op.node_name))
10157

    
10158
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10159
    result = self.rpc.call_storage_execute(self.op.node_name,
10160
                                           self.op.storage_type, st_args,
10161
                                           self.op.name,
10162
                                           constants.SO_FIX_CONSISTENCY)
10163
    result.Raise("Failed to repair storage unit '%s' on %s" %
10164
                 (self.op.name, self.op.node_name))
10165

    
10166

    
10167
class LUNodeEvacuate(NoHooksLU):
10168
  """Evacuates instances off a list of nodes.
10169

10170
  """
10171
  REQ_BGL = False
10172

    
10173
  _MODE2IALLOCATOR = {
10174
    constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10175
    constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10176
    constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10177
    }
10178
  assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10179
  assert (frozenset(_MODE2IALLOCATOR.values()) ==
10180
          constants.IALLOCATOR_NEVAC_MODES)
10181

    
10182
  def CheckArguments(self):
10183
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10184

    
10185
  def ExpandNames(self):
10186
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10187

    
10188
    if self.op.remote_node is not None:
10189
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10190
      assert self.op.remote_node
10191

    
10192
      if self.op.remote_node == self.op.node_name:
10193
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10194
                                   " secondary node", errors.ECODE_INVAL)
10195

    
10196
      if self.op.mode != constants.NODE_EVAC_SEC:
10197
        raise errors.OpPrereqError("Without the use of an iallocator only"
10198
                                   " secondary instances can be evacuated",
10199
                                   errors.ECODE_INVAL)
10200

    
10201
    # Declare locks
10202
    self.share_locks = _ShareAll()
10203
    self.needed_locks = {
10204
      locking.LEVEL_INSTANCE: [],
10205
      locking.LEVEL_NODEGROUP: [],
10206
      locking.LEVEL_NODE: [],
10207
      }
10208

    
10209
    # Determine nodes (via group) optimistically, needs verification once locks
10210
    # have been acquired
10211
    self.lock_nodes = self._DetermineNodes()
10212

    
10213
  def _DetermineNodes(self):
10214
    """Gets the list of nodes to operate on.
10215

10216
    """
10217
    if self.op.remote_node is None:
10218
      # Iallocator will choose any node(s) in the same group
10219
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10220
    else:
10221
      group_nodes = frozenset([self.op.remote_node])
10222

    
10223
    # Determine nodes to be locked
10224
    return set([self.op.node_name]) | group_nodes
10225

    
10226
  def _DetermineInstances(self):
10227
    """Builds list of instances to operate on.
10228

10229
    """
10230
    assert self.op.mode in constants.NODE_EVAC_MODES
10231

    
10232
    if self.op.mode == constants.NODE_EVAC_PRI:
10233
      # Primary instances only
10234
      inst_fn = _GetNodePrimaryInstances
10235
      assert self.op.remote_node is None, \
10236
        "Evacuating primary instances requires iallocator"
10237
    elif self.op.mode == constants.NODE_EVAC_SEC:
10238
      # Secondary instances only
10239
      inst_fn = _GetNodeSecondaryInstances
10240
    else:
10241
      # All instances
10242
      assert self.op.mode == constants.NODE_EVAC_ALL
10243
      inst_fn = _GetNodeInstances
10244
      # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10245
      # per instance
10246
      raise errors.OpPrereqError("Due to an issue with the iallocator"
10247
                                 " interface it is not possible to evacuate"
10248
                                 " all instances at once; specify explicitly"
10249
                                 " whether to evacuate primary or secondary"
10250
                                 " instances",
10251
                                 errors.ECODE_INVAL)
10252

    
10253
    return inst_fn(self.cfg, self.op.node_name)
10254

    
10255
  def DeclareLocks(self, level):
10256
    if level == locking.LEVEL_INSTANCE:
10257
      # Lock instances optimistically, needs verification once node and group
10258
      # locks have been acquired
10259
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10260
        set(i.name for i in self._DetermineInstances())
10261

    
10262
    elif level == locking.LEVEL_NODEGROUP:
10263
      # Lock node groups for all potential target nodes optimistically, needs
10264
      # verification once nodes have been acquired
10265
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10266
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10267

    
10268
    elif level == locking.LEVEL_NODE:
10269
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10270

    
10271
  def CheckPrereq(self):
10272
    # Verify locks
10273
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10274
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10275
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10276

    
10277
    need_nodes = self._DetermineNodes()
10278

    
10279
    if not owned_nodes.issuperset(need_nodes):
10280
      raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10281
                                 " locks were acquired, current nodes are"
10282
                                 " are '%s', used to be '%s'; retry the"
10283
                                 " operation" %
10284
                                 (self.op.node_name,
10285
                                  utils.CommaJoin(need_nodes),
10286
                                  utils.CommaJoin(owned_nodes)),
10287
                                 errors.ECODE_STATE)
10288

    
10289
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10290
    if owned_groups != wanted_groups:
10291
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10292
                               " current groups are '%s', used to be '%s';"
10293
                               " retry the operation" %
10294
                               (utils.CommaJoin(wanted_groups),
10295
                                utils.CommaJoin(owned_groups)))
10296

    
10297
    # Determine affected instances
10298
    self.instances = self._DetermineInstances()
10299
    self.instance_names = [i.name for i in self.instances]
10300

    
10301
    if set(self.instance_names) != owned_instances:
10302
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10303
                               " were acquired, current instances are '%s',"
10304
                               " used to be '%s'; retry the operation" %
10305
                               (self.op.node_name,
10306
                                utils.CommaJoin(self.instance_names),
10307
                                utils.CommaJoin(owned_instances)))
10308

    
10309
    if self.instance_names:
10310
      self.LogInfo("Evacuating instances from node '%s': %s",
10311
                   self.op.node_name,
10312
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10313
    else:
10314
      self.LogInfo("No instances to evacuate from node '%s'",
10315
                   self.op.node_name)
10316

    
10317
    if self.op.remote_node is not None:
10318
      for i in self.instances:
10319
        if i.primary_node == self.op.remote_node:
10320
          raise errors.OpPrereqError("Node %s is the primary node of"
10321
                                     " instance %s, cannot use it as"
10322
                                     " secondary" %
10323
                                     (self.op.remote_node, i.name),
10324
                                     errors.ECODE_INVAL)
10325

    
10326
  def Exec(self, feedback_fn):
10327
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10328

    
10329
    if not self.instance_names:
10330
      # No instances to evacuate
10331
      jobs = []
10332

    
10333
    elif self.op.iallocator is not None:
10334
      # TODO: Implement relocation to other group
10335
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10336
                       evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10337
                       instances=list(self.instance_names))
10338

    
10339
      ial.Run(self.op.iallocator)
10340

    
10341
      if not ial.success:
10342
        raise errors.OpPrereqError("Can't compute node evacuation using"
10343
                                   " iallocator '%s': %s" %
10344
                                   (self.op.iallocator, ial.info),
10345
                                   errors.ECODE_NORES)
10346

    
10347
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10348

    
10349
    elif self.op.remote_node is not None:
10350
      assert self.op.mode == constants.NODE_EVAC_SEC
10351
      jobs = [
10352
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10353
                                        remote_node=self.op.remote_node,
10354
                                        disks=[],
10355
                                        mode=constants.REPLACE_DISK_CHG,
10356
                                        early_release=self.op.early_release)]
10357
        for instance_name in self.instance_names
10358
        ]
10359

    
10360
    else:
10361
      raise errors.ProgrammerError("No iallocator or remote node")
10362

    
10363
    return ResultWithJobs(jobs)
10364

    
10365

    
10366
def _SetOpEarlyRelease(early_release, op):
10367
  """Sets C{early_release} flag on opcodes if available.
10368

10369
  """
10370
  try:
10371
    op.early_release = early_release
10372
  except AttributeError:
10373
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10374

    
10375
  return op
10376

    
10377

    
10378
def _NodeEvacDest(use_nodes, group, nodes):
10379
  """Returns group or nodes depending on caller's choice.
10380

10381
  """
10382
  if use_nodes:
10383
    return utils.CommaJoin(nodes)
10384
  else:
10385
    return group
10386

    
10387

    
10388
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10389
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10390

10391
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10392
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10393

10394
  @type lu: L{LogicalUnit}
10395
  @param lu: Logical unit instance
10396
  @type alloc_result: tuple/list
10397
  @param alloc_result: Result from iallocator
10398
  @type early_release: bool
10399
  @param early_release: Whether to release locks early if possible
10400
  @type use_nodes: bool
10401
  @param use_nodes: Whether to display node names instead of groups
10402

10403
  """
10404
  (moved, failed, jobs) = alloc_result
10405

    
10406
  if failed:
10407
    failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10408
                                 for (name, reason) in failed)
10409
    lu.LogWarning("Unable to evacuate instances %s", failreason)
10410
    raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10411

    
10412
  if moved:
10413
    lu.LogInfo("Instances to be moved: %s",
10414
               utils.CommaJoin("%s (to %s)" %
10415
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10416
                               for (name, group, nodes) in moved))
10417

    
10418
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10419
              map(opcodes.OpCode.LoadOpCode, ops))
10420
          for ops in jobs]
10421

    
10422

    
10423
class LUInstanceGrowDisk(LogicalUnit):
10424
  """Grow a disk of an instance.
10425

10426
  """
10427
  HPATH = "disk-grow"
10428
  HTYPE = constants.HTYPE_INSTANCE
10429
  REQ_BGL = False
10430

    
10431
  def ExpandNames(self):
10432
    self._ExpandAndLockInstance()
10433
    self.needed_locks[locking.LEVEL_NODE] = []
10434
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10435

    
10436
  def DeclareLocks(self, level):
10437
    if level == locking.LEVEL_NODE:
10438
      self._LockInstancesNodes()
10439

    
10440
  def BuildHooksEnv(self):
10441
    """Build hooks env.
10442

10443
    This runs on the master, the primary and all the secondaries.
10444

10445
    """
10446
    env = {
10447
      "DISK": self.op.disk,
10448
      "AMOUNT": self.op.amount,
10449
      }
10450
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10451
    return env
10452

    
10453
  def BuildHooksNodes(self):
10454
    """Build hooks nodes.
10455

10456
    """
10457
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10458
    return (nl, nl)
10459

    
10460
  def CheckPrereq(self):
10461
    """Check prerequisites.
10462

10463
    This checks that the instance is in the cluster.
10464

10465
    """
10466
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10467
    assert instance is not None, \
10468
      "Cannot retrieve locked instance %s" % self.op.instance_name
10469
    nodenames = list(instance.all_nodes)
10470
    for node in nodenames:
10471
      _CheckNodeOnline(self, node)
10472

    
10473
    self.instance = instance
10474

    
10475
    if instance.disk_template not in constants.DTS_GROWABLE:
10476
      raise errors.OpPrereqError("Instance's disk layout does not support"
10477
                                 " growing", errors.ECODE_INVAL)
10478

    
10479
    self.disk = instance.FindDisk(self.op.disk)
10480

    
10481
    if instance.disk_template not in (constants.DT_FILE,
10482
                                      constants.DT_SHARED_FILE):
10483
      # TODO: check the free disk space for file, when that feature will be
10484
      # supported
10485
      _CheckNodesFreeDiskPerVG(self, nodenames,
10486
                               self.disk.ComputeGrowth(self.op.amount))
10487

    
10488
  def Exec(self, feedback_fn):
10489
    """Execute disk grow.
10490

10491
    """
10492
    instance = self.instance
10493
    disk = self.disk
10494

    
10495
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10496
    if not disks_ok:
10497
      raise errors.OpExecError("Cannot activate block device to grow")
10498

    
10499
    # First run all grow ops in dry-run mode
10500
    for node in instance.all_nodes:
10501
      self.cfg.SetDiskID(disk, node)
10502
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10503
      result.Raise("Grow request failed to node %s" % node)
10504

    
10505
    # We know that (as far as we can test) operations across different
10506
    # nodes will succeed, time to run it for real
10507
    for node in instance.all_nodes:
10508
      self.cfg.SetDiskID(disk, node)
10509
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10510
      result.Raise("Grow request failed to node %s" % node)
10511

    
10512
      # TODO: Rewrite code to work properly
10513
      # DRBD goes into sync mode for a short amount of time after executing the
10514
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10515
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10516
      # time is a work-around.
10517
      time.sleep(5)
10518

    
10519
    disk.RecordGrow(self.op.amount)
10520
    self.cfg.Update(instance, feedback_fn)
10521
    if self.op.wait_for_sync:
10522
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10523
      if disk_abort:
10524
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10525
                             " status; please check the instance")
10526
      if not instance.admin_up:
10527
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10528
    elif not instance.admin_up:
10529
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10530
                           " not supposed to be running because no wait for"
10531
                           " sync mode was requested")
10532

    
10533

    
10534
class LUInstanceQueryData(NoHooksLU):
10535
  """Query runtime instance data.
10536

10537
  """
10538
  REQ_BGL = False
10539

    
10540
  def ExpandNames(self):
10541
    self.needed_locks = {}
10542

    
10543
    # Use locking if requested or when non-static information is wanted
10544
    if not (self.op.static or self.op.use_locking):
10545
      self.LogWarning("Non-static data requested, locks need to be acquired")
10546
      self.op.use_locking = True
10547

    
10548
    if self.op.instances or not self.op.use_locking:
10549
      # Expand instance names right here
10550
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10551
    else:
10552
      # Will use acquired locks
10553
      self.wanted_names = None
10554

    
10555
    if self.op.use_locking:
10556
      self.share_locks = _ShareAll()
10557

    
10558
      if self.wanted_names is None:
10559
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10560
      else:
10561
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10562

    
10563
      self.needed_locks[locking.LEVEL_NODEGROUP] = []
10564
      self.needed_locks[locking.LEVEL_NODE] = []
10565
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10566

    
10567
  def DeclareLocks(self, level):
10568
    if self.op.use_locking:
10569
      if level == locking.LEVEL_NODEGROUP:
10570
        owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10571

    
10572
        # Lock all groups used by instances optimistically; this requires going
10573
        # via the node before it's locked, requiring verification later on
10574
        self.needed_locks[locking.LEVEL_NODEGROUP] = \
10575
          frozenset(group_uuid
10576
                    for instance_name in owned_instances
10577
                    for group_uuid in
10578
                      self.cfg.GetInstanceNodeGroups(instance_name))
10579

    
10580
      elif level == locking.LEVEL_NODE:
10581
        self._LockInstancesNodes()
10582

    
10583
  def CheckPrereq(self):
10584
    """Check prerequisites.
10585

10586
    This only checks the optional instance list against the existing names.
10587

10588
    """
10589
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
10590
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
10591
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
10592

    
10593
    if self.wanted_names is None:
10594
      assert self.op.use_locking, "Locking was not used"
10595
      self.wanted_names = owned_instances
10596

    
10597
    instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
10598

    
10599
    if self.op.use_locking:
10600
      _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
10601
                                None)
10602
    else:
10603
      assert not (owned_instances or owned_groups or owned_nodes)
10604

    
10605
    self.wanted_instances = instances.values()
10606

    
10607
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10608
    """Returns the status of a block device
10609

10610
    """
10611
    if self.op.static or not node:
10612
      return None
10613

    
10614
    self.cfg.SetDiskID(dev, node)
10615

    
10616
    result = self.rpc.call_blockdev_find(node, dev)
10617
    if result.offline:
10618
      return None
10619

    
10620
    result.Raise("Can't compute disk status for %s" % instance_name)
10621

    
10622
    status = result.payload
10623
    if status is None:
10624
      return None
10625

    
10626
    return (status.dev_path, status.major, status.minor,
10627
            status.sync_percent, status.estimated_time,
10628
            status.is_degraded, status.ldisk_status)
10629

    
10630
  def _ComputeDiskStatus(self, instance, snode, dev):
10631
    """Compute block device status.
10632

10633
    """
10634
    if dev.dev_type in constants.LDS_DRBD:
10635
      # we change the snode then (otherwise we use the one passed in)
10636
      if dev.logical_id[0] == instance.primary_node:
10637
        snode = dev.logical_id[1]
10638
      else:
10639
        snode = dev.logical_id[0]
10640

    
10641
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10642
                                              instance.name, dev)
10643
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10644

    
10645
    if dev.children:
10646
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10647
                                        instance, snode),
10648
                         dev.children)
10649
    else:
10650
      dev_children = []
10651

    
10652
    return {
10653
      "iv_name": dev.iv_name,
10654
      "dev_type": dev.dev_type,
10655
      "logical_id": dev.logical_id,
10656
      "physical_id": dev.physical_id,
10657
      "pstatus": dev_pstatus,
10658
      "sstatus": dev_sstatus,
10659
      "children": dev_children,
10660
      "mode": dev.mode,
10661
      "size": dev.size,
10662
      }
10663

    
10664
  def Exec(self, feedback_fn):
10665
    """Gather and return data"""
10666
    result = {}
10667

    
10668
    cluster = self.cfg.GetClusterInfo()
10669

    
10670
    node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
10671
    nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
10672

    
10673
    groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
10674
                                                 for node in nodes.values()))
10675

    
10676
    group2name_fn = lambda uuid: groups[uuid].name
10677

    
10678
    for instance in self.wanted_instances:
10679
      pnode = nodes[instance.primary_node]
10680

    
10681
      if self.op.static or pnode.offline:
10682
        remote_state = None
10683
        if pnode.offline:
10684
          self.LogWarning("Primary node %s is marked offline, returning static"
10685
                          " information only for instance %s" %
10686
                          (pnode.name, instance.name))
10687
      else:
10688
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10689
                                                  instance.name,
10690
                                                  instance.hypervisor)
10691
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10692
        remote_info = remote_info.payload
10693
        if remote_info and "state" in remote_info:
10694
          remote_state = "up"
10695
        else:
10696
          remote_state = "down"
10697

    
10698
      if instance.admin_up:
10699
        config_state = "up"
10700
      else:
10701
        config_state = "down"
10702

    
10703
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10704
                  instance.disks)
10705

    
10706
      snodes_group_uuids = [nodes[snode_name].group
10707
                            for snode_name in instance.secondary_nodes]
10708

    
10709
      result[instance.name] = {
10710
        "name": instance.name,
10711
        "config_state": config_state,
10712
        "run_state": remote_state,
10713
        "pnode": instance.primary_node,
10714
        "pnode_group_uuid": pnode.group,
10715
        "pnode_group_name": group2name_fn(pnode.group),
10716
        "snodes": instance.secondary_nodes,
10717
        "snodes_group_uuids": snodes_group_uuids,
10718
        "snodes_group_names": map(group2name_fn, snodes_group_uuids),
10719
        "os": instance.os,
10720
        # this happens to be the same format used for hooks
10721
        "nics": _NICListToTuple(self, instance.nics),
10722
        "disk_template": instance.disk_template,
10723
        "disks": disks,
10724
        "hypervisor": instance.hypervisor,
10725
        "network_port": instance.network_port,
10726
        "hv_instance": instance.hvparams,
10727
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10728
        "be_instance": instance.beparams,
10729
        "be_actual": cluster.FillBE(instance),
10730
        "os_instance": instance.osparams,
10731
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10732
        "serial_no": instance.serial_no,
10733
        "mtime": instance.mtime,
10734
        "ctime": instance.ctime,
10735
        "uuid": instance.uuid,
10736
        }
10737

    
10738
    return result
10739

    
10740

    
10741
class LUInstanceSetParams(LogicalUnit):
10742
  """Modifies an instances's parameters.
10743

10744
  """
10745
  HPATH = "instance-modify"
10746
  HTYPE = constants.HTYPE_INSTANCE
10747
  REQ_BGL = False
10748

    
10749
  def CheckArguments(self):
10750
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10751
            self.op.hvparams or self.op.beparams or self.op.os_name):
10752
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10753

    
10754
    if self.op.hvparams:
10755
      _CheckGlobalHvParams(self.op.hvparams)
10756

    
10757
    # Disk validation
10758
    disk_addremove = 0
10759
    for disk_op, disk_dict in self.op.disks:
10760
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10761
      if disk_op == constants.DDM_REMOVE:
10762
        disk_addremove += 1
10763
        continue
10764
      elif disk_op == constants.DDM_ADD:
10765
        disk_addremove += 1
10766
      else:
10767
        if not isinstance(disk_op, int):
10768
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10769
        if not isinstance(disk_dict, dict):
10770
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10771
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10772

    
10773
      if disk_op == constants.DDM_ADD:
10774
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10775
        if mode not in constants.DISK_ACCESS_SET:
10776
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10777
                                     errors.ECODE_INVAL)
10778
        size = disk_dict.get(constants.IDISK_SIZE, None)
10779
        if size is None:
10780
          raise errors.OpPrereqError("Required disk parameter size missing",
10781
                                     errors.ECODE_INVAL)
10782
        try:
10783
          size = int(size)
10784
        except (TypeError, ValueError), err:
10785
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10786
                                     str(err), errors.ECODE_INVAL)
10787
        disk_dict[constants.IDISK_SIZE] = size
10788
      else:
10789
        # modification of disk
10790
        if constants.IDISK_SIZE in disk_dict:
10791
          raise errors.OpPrereqError("Disk size change not possible, use"
10792
                                     " grow-disk", errors.ECODE_INVAL)
10793

    
10794
    if disk_addremove > 1:
10795
      raise errors.OpPrereqError("Only one disk add or remove operation"
10796
                                 " supported at a time", errors.ECODE_INVAL)
10797

    
10798
    if self.op.disks and self.op.disk_template is not None:
10799
      raise errors.OpPrereqError("Disk template conversion and other disk"
10800
                                 " changes not supported at the same time",
10801
                                 errors.ECODE_INVAL)
10802

    
10803
    if (self.op.disk_template and
10804
        self.op.disk_template in constants.DTS_INT_MIRROR and
10805
        self.op.remote_node is None):
10806
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10807
                                 " one requires specifying a secondary node",
10808
                                 errors.ECODE_INVAL)
10809

    
10810
    # NIC validation
10811
    nic_addremove = 0
10812
    for nic_op, nic_dict in self.op.nics:
10813
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10814
      if nic_op == constants.DDM_REMOVE:
10815
        nic_addremove += 1
10816
        continue
10817
      elif nic_op == constants.DDM_ADD:
10818
        nic_addremove += 1
10819
      else:
10820
        if not isinstance(nic_op, int):
10821
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10822
        if not isinstance(nic_dict, dict):
10823
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10824
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10825

    
10826
      # nic_dict should be a dict
10827
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10828
      if nic_ip is not None:
10829
        if nic_ip.lower() == constants.VALUE_NONE:
10830
          nic_dict[constants.INIC_IP] = None
10831
        else:
10832
          if not netutils.IPAddress.IsValid(nic_ip):
10833
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10834
                                       errors.ECODE_INVAL)
10835

    
10836
      nic_bridge = nic_dict.get("bridge", None)
10837
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10838
      if nic_bridge and nic_link:
10839
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10840
                                   " at the same time", errors.ECODE_INVAL)
10841
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10842
        nic_dict["bridge"] = None
10843
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10844
        nic_dict[constants.INIC_LINK] = None
10845

    
10846
      if nic_op == constants.DDM_ADD:
10847
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10848
        if nic_mac is None:
10849
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10850

    
10851
      if constants.INIC_MAC in nic_dict:
10852
        nic_mac = nic_dict[constants.INIC_MAC]
10853
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10854
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10855

    
10856
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10857
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10858
                                     " modifying an existing nic",
10859
                                     errors.ECODE_INVAL)
10860

    
10861
    if nic_addremove > 1:
10862
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10863
                                 " supported at a time", errors.ECODE_INVAL)
10864

    
10865
  def ExpandNames(self):
10866
    self._ExpandAndLockInstance()
10867
    self.needed_locks[locking.LEVEL_NODE] = []
10868
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10869

    
10870
  def DeclareLocks(self, level):
10871
    if level == locking.LEVEL_NODE:
10872
      self._LockInstancesNodes()
10873
      if self.op.disk_template and self.op.remote_node:
10874
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10875
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10876

    
10877
  def BuildHooksEnv(self):
10878
    """Build hooks env.
10879

10880
    This runs on the master, primary and secondaries.
10881

10882
    """
10883
    args = dict()
10884
    if constants.BE_MEMORY in self.be_new:
10885
      args["memory"] = self.be_new[constants.BE_MEMORY]
10886
    if constants.BE_VCPUS in self.be_new:
10887
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10888
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10889
    # information at all.
10890
    if self.op.nics:
10891
      args["nics"] = []
10892
      nic_override = dict(self.op.nics)
10893
      for idx, nic in enumerate(self.instance.nics):
10894
        if idx in nic_override:
10895
          this_nic_override = nic_override[idx]
10896
        else:
10897
          this_nic_override = {}
10898
        if constants.INIC_IP in this_nic_override:
10899
          ip = this_nic_override[constants.INIC_IP]
10900
        else:
10901
          ip = nic.ip
10902
        if constants.INIC_MAC in this_nic_override:
10903
          mac = this_nic_override[constants.INIC_MAC]
10904
        else:
10905
          mac = nic.mac
10906
        if idx in self.nic_pnew:
10907
          nicparams = self.nic_pnew[idx]
10908
        else:
10909
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10910
        mode = nicparams[constants.NIC_MODE]
10911
        link = nicparams[constants.NIC_LINK]
10912
        args["nics"].append((ip, mac, mode, link))
10913
      if constants.DDM_ADD in nic_override:
10914
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10915
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10916
        nicparams = self.nic_pnew[constants.DDM_ADD]
10917
        mode = nicparams[constants.NIC_MODE]
10918
        link = nicparams[constants.NIC_LINK]
10919
        args["nics"].append((ip, mac, mode, link))
10920
      elif constants.DDM_REMOVE in nic_override:
10921
        del args["nics"][-1]
10922

    
10923
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10924
    if self.op.disk_template:
10925
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10926

    
10927
    return env
10928

    
10929
  def BuildHooksNodes(self):
10930
    """Build hooks nodes.
10931

10932
    """
10933
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10934
    return (nl, nl)
10935

    
10936
  def CheckPrereq(self):
10937
    """Check prerequisites.
10938

10939
    This only checks the instance list against the existing names.
10940

10941
    """
10942
    # checking the new params on the primary/secondary nodes
10943

    
10944
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10945
    cluster = self.cluster = self.cfg.GetClusterInfo()
10946
    assert self.instance is not None, \
10947
      "Cannot retrieve locked instance %s" % self.op.instance_name
10948
    pnode = instance.primary_node
10949
    nodelist = list(instance.all_nodes)
10950

    
10951
    # OS change
10952
    if self.op.os_name and not self.op.force:
10953
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10954
                      self.op.force_variant)
10955
      instance_os = self.op.os_name
10956
    else:
10957
      instance_os = instance.os
10958

    
10959
    if self.op.disk_template:
10960
      if instance.disk_template == self.op.disk_template:
10961
        raise errors.OpPrereqError("Instance already has disk template %s" %
10962
                                   instance.disk_template, errors.ECODE_INVAL)
10963

    
10964
      if (instance.disk_template,
10965
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10966
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10967
                                   " %s to %s" % (instance.disk_template,
10968
                                                  self.op.disk_template),
10969
                                   errors.ECODE_INVAL)
10970
      _CheckInstanceDown(self, instance, "cannot change disk template")
10971
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10972
        if self.op.remote_node == pnode:
10973
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10974
                                     " as the primary node of the instance" %
10975
                                     self.op.remote_node, errors.ECODE_STATE)
10976
        _CheckNodeOnline(self, self.op.remote_node)
10977
        _CheckNodeNotDrained(self, self.op.remote_node)
10978
        # FIXME: here we assume that the old instance type is DT_PLAIN
10979
        assert instance.disk_template == constants.DT_PLAIN
10980
        disks = [{constants.IDISK_SIZE: d.size,
10981
                  constants.IDISK_VG: d.logical_id[0]}
10982
                 for d in instance.disks]
10983
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10984
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10985

    
10986
    # hvparams processing
10987
    if self.op.hvparams:
10988
      hv_type = instance.hypervisor
10989
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10990
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10991
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10992

    
10993
      # local check
10994
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10995
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10996
      self.hv_new = hv_new # the new actual values
10997
      self.hv_inst = i_hvdict # the new dict (without defaults)
10998
    else:
10999
      self.hv_new = self.hv_inst = {}
11000

    
11001
    # beparams processing
11002
    if self.op.beparams:
11003
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11004
                                   use_none=True)
11005
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11006
      be_new = cluster.SimpleFillBE(i_bedict)
11007
      self.be_new = be_new # the new actual values
11008
      self.be_inst = i_bedict # the new dict (without defaults)
11009
    else:
11010
      self.be_new = self.be_inst = {}
11011
    be_old = cluster.FillBE(instance)
11012

    
11013
    # osparams processing
11014
    if self.op.osparams:
11015
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11016
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11017
      self.os_inst = i_osdict # the new dict (without defaults)
11018
    else:
11019
      self.os_inst = {}
11020

    
11021
    self.warn = []
11022

    
11023
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11024
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11025
      mem_check_list = [pnode]
11026
      if be_new[constants.BE_AUTO_BALANCE]:
11027
        # either we changed auto_balance to yes or it was from before
11028
        mem_check_list.extend(instance.secondary_nodes)
11029
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11030
                                                  instance.hypervisor)
11031
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11032
                                         instance.hypervisor)
11033
      pninfo = nodeinfo[pnode]
11034
      msg = pninfo.fail_msg
11035
      if msg:
11036
        # Assume the primary node is unreachable and go ahead
11037
        self.warn.append("Can't get info from primary node %s: %s" %
11038
                         (pnode, msg))
11039
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
11040
        self.warn.append("Node data from primary node %s doesn't contain"
11041
                         " free memory information" % pnode)
11042
      elif instance_info.fail_msg:
11043
        self.warn.append("Can't get instance runtime information: %s" %
11044
                        instance_info.fail_msg)
11045
      else:
11046
        if instance_info.payload:
11047
          current_mem = int(instance_info.payload["memory"])
11048
        else:
11049
          # Assume instance not running
11050
          # (there is a slight race condition here, but it's not very probable,
11051
          # and we have no other way to check)
11052
          current_mem = 0
11053
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11054
                    pninfo.payload["memory_free"])
11055
        if miss_mem > 0:
11056
          raise errors.OpPrereqError("This change will prevent the instance"
11057
                                     " from starting, due to %d MB of memory"
11058
                                     " missing on its primary node" % miss_mem,
11059
                                     errors.ECODE_NORES)
11060

    
11061
      if be_new[constants.BE_AUTO_BALANCE]:
11062
        for node, nres in nodeinfo.items():
11063
          if node not in instance.secondary_nodes:
11064
            continue
11065
          nres.Raise("Can't get info from secondary node %s" % node,
11066
                     prereq=True, ecode=errors.ECODE_STATE)
11067
          if not isinstance(nres.payload.get("memory_free", None), int):
11068
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11069
                                       " memory information" % node,
11070
                                       errors.ECODE_STATE)
11071
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11072
            raise errors.OpPrereqError("This change will prevent the instance"
11073
                                       " from failover to its secondary node"
11074
                                       " %s, due to not enough memory" % node,
11075
                                       errors.ECODE_STATE)
11076

    
11077
    # NIC processing
11078
    self.nic_pnew = {}
11079
    self.nic_pinst = {}
11080
    for nic_op, nic_dict in self.op.nics:
11081
      if nic_op == constants.DDM_REMOVE:
11082
        if not instance.nics:
11083
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11084
                                     errors.ECODE_INVAL)
11085
        continue
11086
      if nic_op != constants.DDM_ADD:
11087
        # an existing nic
11088
        if not instance.nics:
11089
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11090
                                     " no NICs" % nic_op,
11091
                                     errors.ECODE_INVAL)
11092
        if nic_op < 0 or nic_op >= len(instance.nics):
11093
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11094
                                     " are 0 to %d" %
11095
                                     (nic_op, len(instance.nics) - 1),
11096
                                     errors.ECODE_INVAL)
11097
        old_nic_params = instance.nics[nic_op].nicparams
11098
        old_nic_ip = instance.nics[nic_op].ip
11099
      else:
11100
        old_nic_params = {}
11101
        old_nic_ip = None
11102

    
11103
      update_params_dict = dict([(key, nic_dict[key])
11104
                                 for key in constants.NICS_PARAMETERS
11105
                                 if key in nic_dict])
11106

    
11107
      if "bridge" in nic_dict:
11108
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11109

    
11110
      new_nic_params = _GetUpdatedParams(old_nic_params,
11111
                                         update_params_dict)
11112
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11113
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11114
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11115
      self.nic_pinst[nic_op] = new_nic_params
11116
      self.nic_pnew[nic_op] = new_filled_nic_params
11117
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11118

    
11119
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11120
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11121
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11122
        if msg:
11123
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11124
          if self.op.force:
11125
            self.warn.append(msg)
11126
          else:
11127
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11128
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11129
        if constants.INIC_IP in nic_dict:
11130
          nic_ip = nic_dict[constants.INIC_IP]
11131
        else:
11132
          nic_ip = old_nic_ip
11133
        if nic_ip is None:
11134
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11135
                                     " on a routed nic", errors.ECODE_INVAL)
11136
      if constants.INIC_MAC in nic_dict:
11137
        nic_mac = nic_dict[constants.INIC_MAC]
11138
        if nic_mac is None:
11139
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11140
                                     errors.ECODE_INVAL)
11141
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11142
          # otherwise generate the mac
11143
          nic_dict[constants.INIC_MAC] = \
11144
            self.cfg.GenerateMAC(self.proc.GetECId())
11145
        else:
11146
          # or validate/reserve the current one
11147
          try:
11148
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11149
          except errors.ReservationError:
11150
            raise errors.OpPrereqError("MAC address %s already in use"
11151
                                       " in cluster" % nic_mac,
11152
                                       errors.ECODE_NOTUNIQUE)
11153

    
11154
    # DISK processing
11155
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11156
      raise errors.OpPrereqError("Disk operations not supported for"
11157
                                 " diskless instances",
11158
                                 errors.ECODE_INVAL)
11159
    for disk_op, _ in self.op.disks:
11160
      if disk_op == constants.DDM_REMOVE:
11161
        if len(instance.disks) == 1:
11162
          raise errors.OpPrereqError("Cannot remove the last disk of"
11163
                                     " an instance", errors.ECODE_INVAL)
11164
        _CheckInstanceDown(self, instance, "cannot remove disks")
11165

    
11166
      if (disk_op == constants.DDM_ADD and
11167
          len(instance.disks) >= constants.MAX_DISKS):
11168
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11169
                                   " add more" % constants.MAX_DISKS,
11170
                                   errors.ECODE_STATE)
11171
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11172
        # an existing disk
11173
        if disk_op < 0 or disk_op >= len(instance.disks):
11174
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11175
                                     " are 0 to %d" %
11176
                                     (disk_op, len(instance.disks)),
11177
                                     errors.ECODE_INVAL)
11178

    
11179
    return
11180

    
11181
  def _ConvertPlainToDrbd(self, feedback_fn):
11182
    """Converts an instance from plain to drbd.
11183

11184
    """
11185
    feedback_fn("Converting template to drbd")
11186
    instance = self.instance
11187
    pnode = instance.primary_node
11188
    snode = self.op.remote_node
11189

    
11190
    # create a fake disk info for _GenerateDiskTemplate
11191
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11192
                  constants.IDISK_VG: d.logical_id[0]}
11193
                 for d in instance.disks]
11194
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11195
                                      instance.name, pnode, [snode],
11196
                                      disk_info, None, None, 0, feedback_fn)
11197
    info = _GetInstanceInfoText(instance)
11198
    feedback_fn("Creating aditional volumes...")
11199
    # first, create the missing data and meta devices
11200
    for disk in new_disks:
11201
      # unfortunately this is... not too nice
11202
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11203
                            info, True)
11204
      for child in disk.children:
11205
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11206
    # at this stage, all new LVs have been created, we can rename the
11207
    # old ones
11208
    feedback_fn("Renaming original volumes...")
11209
    rename_list = [(o, n.children[0].logical_id)
11210
                   for (o, n) in zip(instance.disks, new_disks)]
11211
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11212
    result.Raise("Failed to rename original LVs")
11213

    
11214
    feedback_fn("Initializing DRBD devices...")
11215
    # all child devices are in place, we can now create the DRBD devices
11216
    for disk in new_disks:
11217
      for node in [pnode, snode]:
11218
        f_create = node == pnode
11219
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11220

    
11221
    # at this point, the instance has been modified
11222
    instance.disk_template = constants.DT_DRBD8
11223
    instance.disks = new_disks
11224
    self.cfg.Update(instance, feedback_fn)
11225

    
11226
    # disks are created, waiting for sync
11227
    disk_abort = not _WaitForSync(self, instance,
11228
                                  oneshot=not self.op.wait_for_sync)
11229
    if disk_abort:
11230
      raise errors.OpExecError("There are some degraded disks for"
11231
                               " this instance, please cleanup manually")
11232

    
11233
  def _ConvertDrbdToPlain(self, feedback_fn):
11234
    """Converts an instance from drbd to plain.
11235

11236
    """
11237
    instance = self.instance
11238
    assert len(instance.secondary_nodes) == 1
11239
    pnode = instance.primary_node
11240
    snode = instance.secondary_nodes[0]
11241
    feedback_fn("Converting template to plain")
11242

    
11243
    old_disks = instance.disks
11244
    new_disks = [d.children[0] for d in old_disks]
11245

    
11246
    # copy over size and mode
11247
    for parent, child in zip(old_disks, new_disks):
11248
      child.size = parent.size
11249
      child.mode = parent.mode
11250

    
11251
    # update instance structure
11252
    instance.disks = new_disks
11253
    instance.disk_template = constants.DT_PLAIN
11254
    self.cfg.Update(instance, feedback_fn)
11255

    
11256
    feedback_fn("Removing volumes on the secondary node...")
11257
    for disk in old_disks:
11258
      self.cfg.SetDiskID(disk, snode)
11259
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11260
      if msg:
11261
        self.LogWarning("Could not remove block device %s on node %s,"
11262
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11263

    
11264
    feedback_fn("Removing unneeded volumes on the primary node...")
11265
    for idx, disk in enumerate(old_disks):
11266
      meta = disk.children[1]
11267
      self.cfg.SetDiskID(meta, pnode)
11268
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11269
      if msg:
11270
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11271
                        " continuing anyway: %s", idx, pnode, msg)
11272

    
11273
    # this is a DRBD disk, return its port to the pool
11274
    for disk in old_disks:
11275
      tcp_port = disk.logical_id[2]
11276
      self.cfg.AddTcpUdpPort(tcp_port)
11277

    
11278
  def Exec(self, feedback_fn):
11279
    """Modifies an instance.
11280

11281
    All parameters take effect only at the next restart of the instance.
11282

11283
    """
11284
    # Process here the warnings from CheckPrereq, as we don't have a
11285
    # feedback_fn there.
11286
    for warn in self.warn:
11287
      feedback_fn("WARNING: %s" % warn)
11288

    
11289
    result = []
11290
    instance = self.instance
11291
    # disk changes
11292
    for disk_op, disk_dict in self.op.disks:
11293
      if disk_op == constants.DDM_REMOVE:
11294
        # remove the last disk
11295
        device = instance.disks.pop()
11296
        device_idx = len(instance.disks)
11297
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11298
          self.cfg.SetDiskID(disk, node)
11299
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11300
          if msg:
11301
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11302
                            " continuing anyway", device_idx, node, msg)
11303
        result.append(("disk/%d" % device_idx, "remove"))
11304

    
11305
        # if this is a DRBD disk, return its port to the pool
11306
        if device.dev_type in constants.LDS_DRBD:
11307
          tcp_port = device.logical_id[2]
11308
          self.cfg.AddTcpUdpPort(tcp_port)
11309
      elif disk_op == constants.DDM_ADD:
11310
        # add a new disk
11311
        if instance.disk_template in (constants.DT_FILE,
11312
                                        constants.DT_SHARED_FILE):
11313
          file_driver, file_path = instance.disks[0].logical_id
11314
          file_path = os.path.dirname(file_path)
11315
        else:
11316
          file_driver = file_path = None
11317
        disk_idx_base = len(instance.disks)
11318
        new_disk = _GenerateDiskTemplate(self,
11319
                                         instance.disk_template,
11320
                                         instance.name, instance.primary_node,
11321
                                         instance.secondary_nodes,
11322
                                         [disk_dict],
11323
                                         file_path,
11324
                                         file_driver,
11325
                                         disk_idx_base, feedback_fn)[0]
11326
        instance.disks.append(new_disk)
11327
        info = _GetInstanceInfoText(instance)
11328

    
11329
        logging.info("Creating volume %s for instance %s",
11330
                     new_disk.iv_name, instance.name)
11331
        # Note: this needs to be kept in sync with _CreateDisks
11332
        #HARDCODE
11333
        for node in instance.all_nodes:
11334
          f_create = node == instance.primary_node
11335
          try:
11336
            _CreateBlockDev(self, node, instance, new_disk,
11337
                            f_create, info, f_create)
11338
          except errors.OpExecError, err:
11339
            self.LogWarning("Failed to create volume %s (%s) on"
11340
                            " node %s: %s",
11341
                            new_disk.iv_name, new_disk, node, err)
11342
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11343
                       (new_disk.size, new_disk.mode)))
11344
      else:
11345
        # change a given disk
11346
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11347
        result.append(("disk.mode/%d" % disk_op,
11348
                       disk_dict[constants.IDISK_MODE]))
11349

    
11350
    if self.op.disk_template:
11351
      r_shut = _ShutdownInstanceDisks(self, instance)
11352
      if not r_shut:
11353
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11354
                                 " proceed with disk template conversion")
11355
      mode = (instance.disk_template, self.op.disk_template)
11356
      try:
11357
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11358
      except:
11359
        self.cfg.ReleaseDRBDMinors(instance.name)
11360
        raise
11361
      result.append(("disk_template", self.op.disk_template))
11362

    
11363
    # NIC changes
11364
    for nic_op, nic_dict in self.op.nics:
11365
      if nic_op == constants.DDM_REMOVE:
11366
        # remove the last nic
11367
        del instance.nics[-1]
11368
        result.append(("nic.%d" % len(instance.nics), "remove"))
11369
      elif nic_op == constants.DDM_ADD:
11370
        # mac and bridge should be set, by now
11371
        mac = nic_dict[constants.INIC_MAC]
11372
        ip = nic_dict.get(constants.INIC_IP, None)
11373
        nicparams = self.nic_pinst[constants.DDM_ADD]
11374
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11375
        instance.nics.append(new_nic)
11376
        result.append(("nic.%d" % (len(instance.nics) - 1),
11377
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11378
                       (new_nic.mac, new_nic.ip,
11379
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11380
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11381
                       )))
11382
      else:
11383
        for key in (constants.INIC_MAC, constants.INIC_IP):
11384
          if key in nic_dict:
11385
            setattr(instance.nics[nic_op], key, nic_dict[key])
11386
        if nic_op in self.nic_pinst:
11387
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11388
        for key, val in nic_dict.iteritems():
11389
          result.append(("nic.%s/%d" % (key, nic_op), val))
11390

    
11391
    # hvparams changes
11392
    if self.op.hvparams:
11393
      instance.hvparams = self.hv_inst
11394
      for key, val in self.op.hvparams.iteritems():
11395
        result.append(("hv/%s" % key, val))
11396

    
11397
    # beparams changes
11398
    if self.op.beparams:
11399
      instance.beparams = self.be_inst
11400
      for key, val in self.op.beparams.iteritems():
11401
        result.append(("be/%s" % key, val))
11402

    
11403
    # OS change
11404
    if self.op.os_name:
11405
      instance.os = self.op.os_name
11406

    
11407
    # osparams changes
11408
    if self.op.osparams:
11409
      instance.osparams = self.os_inst
11410
      for key, val in self.op.osparams.iteritems():
11411
        result.append(("os/%s" % key, val))
11412

    
11413
    self.cfg.Update(instance, feedback_fn)
11414

    
11415
    return result
11416

    
11417
  _DISK_CONVERSIONS = {
11418
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11419
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11420
    }
11421

    
11422

    
11423
class LUInstanceChangeGroup(LogicalUnit):
11424
  HPATH = "instance-change-group"
11425
  HTYPE = constants.HTYPE_INSTANCE
11426
  REQ_BGL = False
11427

    
11428
  def ExpandNames(self):
11429
    self.share_locks = _ShareAll()
11430
    self.needed_locks = {
11431
      locking.LEVEL_NODEGROUP: [],
11432
      locking.LEVEL_NODE: [],
11433
      }
11434

    
11435
    self._ExpandAndLockInstance()
11436

    
11437
    if self.op.target_groups:
11438
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11439
                                  self.op.target_groups)
11440
    else:
11441
      self.req_target_uuids = None
11442

    
11443
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11444

    
11445
  def DeclareLocks(self, level):
11446
    if level == locking.LEVEL_NODEGROUP:
11447
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11448

    
11449
      if self.req_target_uuids:
11450
        lock_groups = set(self.req_target_uuids)
11451

    
11452
        # Lock all groups used by instance optimistically; this requires going
11453
        # via the node before it's locked, requiring verification later on
11454
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11455
        lock_groups.update(instance_groups)
11456
      else:
11457
        # No target groups, need to lock all of them
11458
        lock_groups = locking.ALL_SET
11459

    
11460
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11461

    
11462
    elif level == locking.LEVEL_NODE:
11463
      if self.req_target_uuids:
11464
        # Lock all nodes used by instances
11465
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11466
        self._LockInstancesNodes()
11467

    
11468
        # Lock all nodes in all potential target groups
11469
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11470
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11471
        member_nodes = [node_name
11472
                        for group in lock_groups
11473
                        for node_name in self.cfg.GetNodeGroup(group).members]
11474
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11475
      else:
11476
        # Lock all nodes as all groups are potential targets
11477
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11478

    
11479
  def CheckPrereq(self):
11480
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11481
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11482
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11483

    
11484
    assert (self.req_target_uuids is None or
11485
            owned_groups.issuperset(self.req_target_uuids))
11486
    assert owned_instances == set([self.op.instance_name])
11487

    
11488
    # Get instance information
11489
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11490

    
11491
    # Check if node groups for locked instance are still correct
11492
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11493
      ("Instance %s's nodes changed while we kept the lock" %
11494
       self.op.instance_name)
11495

    
11496
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11497
                                           owned_groups)
11498

    
11499
    if self.req_target_uuids:
11500
      # User requested specific target groups
11501
      self.target_uuids = self.req_target_uuids
11502
    else:
11503
      # All groups except those used by the instance are potential targets
11504
      self.target_uuids = owned_groups - inst_groups
11505

    
11506
    conflicting_groups = self.target_uuids & inst_groups
11507
    if conflicting_groups:
11508
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11509
                                 " used by the instance '%s'" %
11510
                                 (utils.CommaJoin(conflicting_groups),
11511
                                  self.op.instance_name),
11512
                                 errors.ECODE_INVAL)
11513

    
11514
    if not self.target_uuids:
11515
      raise errors.OpPrereqError("There are no possible target groups",
11516
                                 errors.ECODE_INVAL)
11517

    
11518
  def BuildHooksEnv(self):
11519
    """Build hooks env.
11520

11521
    """
11522
    assert self.target_uuids
11523

    
11524
    env = {
11525
      "TARGET_GROUPS": " ".join(self.target_uuids),
11526
      }
11527

    
11528
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11529

    
11530
    return env
11531

    
11532
  def BuildHooksNodes(self):
11533
    """Build hooks nodes.
11534

11535
    """
11536
    mn = self.cfg.GetMasterNode()
11537
    return ([mn], [mn])
11538

    
11539
  def Exec(self, feedback_fn):
11540
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11541

    
11542
    assert instances == [self.op.instance_name], "Instance not locked"
11543

    
11544
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11545
                     instances=instances, target_groups=list(self.target_uuids))
11546

    
11547
    ial.Run(self.op.iallocator)
11548

    
11549
    if not ial.success:
11550
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11551
                                 " instance '%s' using iallocator '%s': %s" %
11552
                                 (self.op.instance_name, self.op.iallocator,
11553
                                  ial.info),
11554
                                 errors.ECODE_NORES)
11555

    
11556
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11557

    
11558
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11559
                 " instance '%s'", len(jobs), self.op.instance_name)
11560

    
11561
    return ResultWithJobs(jobs)
11562

    
11563

    
11564
class LUBackupQuery(NoHooksLU):
11565
  """Query the exports list
11566

11567
  """
11568
  REQ_BGL = False
11569

    
11570
  def ExpandNames(self):
11571
    self.needed_locks = {}
11572
    self.share_locks[locking.LEVEL_NODE] = 1
11573
    if not self.op.nodes:
11574
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11575
    else:
11576
      self.needed_locks[locking.LEVEL_NODE] = \
11577
        _GetWantedNodes(self, self.op.nodes)
11578

    
11579
  def Exec(self, feedback_fn):
11580
    """Compute the list of all the exported system images.
11581

11582
    @rtype: dict
11583
    @return: a dictionary with the structure node->(export-list)
11584
        where export-list is a list of the instances exported on
11585
        that node.
11586

11587
    """
11588
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11589
    rpcresult = self.rpc.call_export_list(self.nodes)
11590
    result = {}
11591
    for node in rpcresult:
11592
      if rpcresult[node].fail_msg:
11593
        result[node] = False
11594
      else:
11595
        result[node] = rpcresult[node].payload
11596

    
11597
    return result
11598

    
11599

    
11600
class LUBackupPrepare(NoHooksLU):
11601
  """Prepares an instance for an export and returns useful information.
11602

11603
  """
11604
  REQ_BGL = False
11605

    
11606
  def ExpandNames(self):
11607
    self._ExpandAndLockInstance()
11608

    
11609
  def CheckPrereq(self):
11610
    """Check prerequisites.
11611

11612
    """
11613
    instance_name = self.op.instance_name
11614

    
11615
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11616
    assert self.instance is not None, \
11617
          "Cannot retrieve locked instance %s" % self.op.instance_name
11618
    _CheckNodeOnline(self, self.instance.primary_node)
11619

    
11620
    self._cds = _GetClusterDomainSecret()
11621

    
11622
  def Exec(self, feedback_fn):
11623
    """Prepares an instance for an export.
11624

11625
    """
11626
    instance = self.instance
11627

    
11628
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11629
      salt = utils.GenerateSecret(8)
11630

    
11631
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11632
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11633
                                              constants.RIE_CERT_VALIDITY)
11634
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11635

    
11636
      (name, cert_pem) = result.payload
11637

    
11638
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11639
                                             cert_pem)
11640

    
11641
      return {
11642
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11643
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11644
                          salt),
11645
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11646
        }
11647

    
11648
    return None
11649

    
11650

    
11651
class LUBackupExport(LogicalUnit):
11652
  """Export an instance to an image in the cluster.
11653

11654
  """
11655
  HPATH = "instance-export"
11656
  HTYPE = constants.HTYPE_INSTANCE
11657
  REQ_BGL = False
11658

    
11659
  def CheckArguments(self):
11660
    """Check the arguments.
11661

11662
    """
11663
    self.x509_key_name = self.op.x509_key_name
11664
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11665

    
11666
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11667
      if not self.x509_key_name:
11668
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11669
                                   errors.ECODE_INVAL)
11670

    
11671
      if not self.dest_x509_ca_pem:
11672
        raise errors.OpPrereqError("Missing destination X509 CA",
11673
                                   errors.ECODE_INVAL)
11674

    
11675
  def ExpandNames(self):
11676
    self._ExpandAndLockInstance()
11677

    
11678
    # Lock all nodes for local exports
11679
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11680
      # FIXME: lock only instance primary and destination node
11681
      #
11682
      # Sad but true, for now we have do lock all nodes, as we don't know where
11683
      # the previous export might be, and in this LU we search for it and
11684
      # remove it from its current node. In the future we could fix this by:
11685
      #  - making a tasklet to search (share-lock all), then create the
11686
      #    new one, then one to remove, after
11687
      #  - removing the removal operation altogether
11688
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11689

    
11690
  def DeclareLocks(self, level):
11691
    """Last minute lock declaration."""
11692
    # All nodes are locked anyway, so nothing to do here.
11693

    
11694
  def BuildHooksEnv(self):
11695
    """Build hooks env.
11696

11697
    This will run on the master, primary node and target node.
11698

11699
    """
11700
    env = {
11701
      "EXPORT_MODE": self.op.mode,
11702
      "EXPORT_NODE": self.op.target_node,
11703
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11704
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11705
      # TODO: Generic function for boolean env variables
11706
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11707
      }
11708

    
11709
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11710

    
11711
    return env
11712

    
11713
  def BuildHooksNodes(self):
11714
    """Build hooks nodes.
11715

11716
    """
11717
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11718

    
11719
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11720
      nl.append(self.op.target_node)
11721

    
11722
    return (nl, nl)
11723

    
11724
  def CheckPrereq(self):
11725
    """Check prerequisites.
11726

11727
    This checks that the instance and node names are valid.
11728

11729
    """
11730
    instance_name = self.op.instance_name
11731

    
11732
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11733
    assert self.instance is not None, \
11734
          "Cannot retrieve locked instance %s" % self.op.instance_name
11735
    _CheckNodeOnline(self, self.instance.primary_node)
11736

    
11737
    if (self.op.remove_instance and self.instance.admin_up and
11738
        not self.op.shutdown):
11739
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11740
                                 " down before")
11741

    
11742
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11743
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11744
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11745
      assert self.dst_node is not None
11746

    
11747
      _CheckNodeOnline(self, self.dst_node.name)
11748
      _CheckNodeNotDrained(self, self.dst_node.name)
11749

    
11750
      self._cds = None
11751
      self.dest_disk_info = None
11752
      self.dest_x509_ca = None
11753

    
11754
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11755
      self.dst_node = None
11756

    
11757
      if len(self.op.target_node) != len(self.instance.disks):
11758
        raise errors.OpPrereqError(("Received destination information for %s"
11759
                                    " disks, but instance %s has %s disks") %
11760
                                   (len(self.op.target_node), instance_name,
11761
                                    len(self.instance.disks)),
11762
                                   errors.ECODE_INVAL)
11763

    
11764
      cds = _GetClusterDomainSecret()
11765

    
11766
      # Check X509 key name
11767
      try:
11768
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11769
      except (TypeError, ValueError), err:
11770
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11771

    
11772
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11773
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11774
                                   errors.ECODE_INVAL)
11775

    
11776
      # Load and verify CA
11777
      try:
11778
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11779
      except OpenSSL.crypto.Error, err:
11780
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11781
                                   (err, ), errors.ECODE_INVAL)
11782

    
11783
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11784
      if errcode is not None:
11785
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11786
                                   (msg, ), errors.ECODE_INVAL)
11787

    
11788
      self.dest_x509_ca = cert
11789

    
11790
      # Verify target information
11791
      disk_info = []
11792
      for idx, disk_data in enumerate(self.op.target_node):
11793
        try:
11794
          (host, port, magic) = \
11795
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11796
        except errors.GenericError, err:
11797
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11798
                                     (idx, err), errors.ECODE_INVAL)
11799

    
11800
        disk_info.append((host, port, magic))
11801

    
11802
      assert len(disk_info) == len(self.op.target_node)
11803
      self.dest_disk_info = disk_info
11804

    
11805
    else:
11806
      raise errors.ProgrammerError("Unhandled export mode %r" %
11807
                                   self.op.mode)
11808

    
11809
    # instance disk type verification
11810
    # TODO: Implement export support for file-based disks
11811
    for disk in self.instance.disks:
11812
      if disk.dev_type == constants.LD_FILE:
11813
        raise errors.OpPrereqError("Export not supported for instances with"
11814
                                   " file-based disks", errors.ECODE_INVAL)
11815

    
11816
  def _CleanupExports(self, feedback_fn):
11817
    """Removes exports of current instance from all other nodes.
11818

11819
    If an instance in a cluster with nodes A..D was exported to node C, its
11820
    exports will be removed from the nodes A, B and D.
11821

11822
    """
11823
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11824

    
11825
    nodelist = self.cfg.GetNodeList()
11826
    nodelist.remove(self.dst_node.name)
11827

    
11828
    # on one-node clusters nodelist will be empty after the removal
11829
    # if we proceed the backup would be removed because OpBackupQuery
11830
    # substitutes an empty list with the full cluster node list.
11831
    iname = self.instance.name
11832
    if nodelist:
11833
      feedback_fn("Removing old exports for instance %s" % iname)
11834
      exportlist = self.rpc.call_export_list(nodelist)
11835
      for node in exportlist:
11836
        if exportlist[node].fail_msg:
11837
          continue
11838
        if iname in exportlist[node].payload:
11839
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11840
          if msg:
11841
            self.LogWarning("Could not remove older export for instance %s"
11842
                            " on node %s: %s", iname, node, msg)
11843

    
11844
  def Exec(self, feedback_fn):
11845
    """Export an instance to an image in the cluster.
11846

11847
    """
11848
    assert self.op.mode in constants.EXPORT_MODES
11849

    
11850
    instance = self.instance
11851
    src_node = instance.primary_node
11852

    
11853
    if self.op.shutdown:
11854
      # shutdown the instance, but not the disks
11855
      feedback_fn("Shutting down instance %s" % instance.name)
11856
      result = self.rpc.call_instance_shutdown(src_node, instance,
11857
                                               self.op.shutdown_timeout)
11858
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11859
      result.Raise("Could not shutdown instance %s on"
11860
                   " node %s" % (instance.name, src_node))
11861

    
11862
    # set the disks ID correctly since call_instance_start needs the
11863
    # correct drbd minor to create the symlinks
11864
    for disk in instance.disks:
11865
      self.cfg.SetDiskID(disk, src_node)
11866

    
11867
    activate_disks = (not instance.admin_up)
11868

    
11869
    if activate_disks:
11870
      # Activate the instance disks if we'exporting a stopped instance
11871
      feedback_fn("Activating disks for %s" % instance.name)
11872
      _StartInstanceDisks(self, instance, None)
11873

    
11874
    try:
11875
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11876
                                                     instance)
11877

    
11878
      helper.CreateSnapshots()
11879
      try:
11880
        if (self.op.shutdown and instance.admin_up and
11881
            not self.op.remove_instance):
11882
          assert not activate_disks
11883
          feedback_fn("Starting instance %s" % instance.name)
11884
          result = self.rpc.call_instance_start(src_node, instance,
11885
                                                None, None, False)
11886
          msg = result.fail_msg
11887
          if msg:
11888
            feedback_fn("Failed to start instance: %s" % msg)
11889
            _ShutdownInstanceDisks(self, instance)
11890
            raise errors.OpExecError("Could not start instance: %s" % msg)
11891

    
11892
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11893
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11894
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11895
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11896
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11897

    
11898
          (key_name, _, _) = self.x509_key_name
11899

    
11900
          dest_ca_pem = \
11901
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11902
                                            self.dest_x509_ca)
11903

    
11904
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11905
                                                     key_name, dest_ca_pem,
11906
                                                     timeouts)
11907
      finally:
11908
        helper.Cleanup()
11909

    
11910
      # Check for backwards compatibility
11911
      assert len(dresults) == len(instance.disks)
11912
      assert compat.all(isinstance(i, bool) for i in dresults), \
11913
             "Not all results are boolean: %r" % dresults
11914

    
11915
    finally:
11916
      if activate_disks:
11917
        feedback_fn("Deactivating disks for %s" % instance.name)
11918
        _ShutdownInstanceDisks(self, instance)
11919

    
11920
    if not (compat.all(dresults) and fin_resu):
11921
      failures = []
11922
      if not fin_resu:
11923
        failures.append("export finalization")
11924
      if not compat.all(dresults):
11925
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11926
                               if not dsk)
11927
        failures.append("disk export: disk(s) %s" % fdsk)
11928

    
11929
      raise errors.OpExecError("Export failed, errors in %s" %
11930
                               utils.CommaJoin(failures))
11931

    
11932
    # At this point, the export was successful, we can cleanup/finish
11933

    
11934
    # Remove instance if requested
11935
    if self.op.remove_instance:
11936
      feedback_fn("Removing instance %s" % instance.name)
11937
      _RemoveInstance(self, feedback_fn, instance,
11938
                      self.op.ignore_remove_failures)
11939

    
11940
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11941
      self._CleanupExports(feedback_fn)
11942

    
11943
    return fin_resu, dresults
11944

    
11945

    
11946
class LUBackupRemove(NoHooksLU):
11947
  """Remove exports related to the named instance.
11948

11949
  """
11950
  REQ_BGL = False
11951

    
11952
  def ExpandNames(self):
11953
    self.needed_locks = {}
11954
    # We need all nodes to be locked in order for RemoveExport to work, but we
11955
    # don't need to lock the instance itself, as nothing will happen to it (and
11956
    # we can remove exports also for a removed instance)
11957
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11958

    
11959
  def Exec(self, feedback_fn):
11960
    """Remove any export.
11961

11962
    """
11963
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11964
    # If the instance was not found we'll try with the name that was passed in.
11965
    # This will only work if it was an FQDN, though.
11966
    fqdn_warn = False
11967
    if not instance_name:
11968
      fqdn_warn = True
11969
      instance_name = self.op.instance_name
11970

    
11971
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11972
    exportlist = self.rpc.call_export_list(locked_nodes)
11973
    found = False
11974
    for node in exportlist:
11975
      msg = exportlist[node].fail_msg
11976
      if msg:
11977
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11978
        continue
11979
      if instance_name in exportlist[node].payload:
11980
        found = True
11981
        result = self.rpc.call_export_remove(node, instance_name)
11982
        msg = result.fail_msg
11983
        if msg:
11984
          logging.error("Could not remove export for instance %s"
11985
                        " on node %s: %s", instance_name, node, msg)
11986

    
11987
    if fqdn_warn and not found:
11988
      feedback_fn("Export not found. If trying to remove an export belonging"
11989
                  " to a deleted instance please use its Fully Qualified"
11990
                  " Domain Name.")
11991

    
11992

    
11993
class LUGroupAdd(LogicalUnit):
11994
  """Logical unit for creating node groups.
11995

11996
  """
11997
  HPATH = "group-add"
11998
  HTYPE = constants.HTYPE_GROUP
11999
  REQ_BGL = False
12000

    
12001
  def ExpandNames(self):
12002
    # We need the new group's UUID here so that we can create and acquire the
12003
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12004
    # that it should not check whether the UUID exists in the configuration.
12005
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12006
    self.needed_locks = {}
12007
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12008

    
12009
  def CheckPrereq(self):
12010
    """Check prerequisites.
12011

12012
    This checks that the given group name is not an existing node group
12013
    already.
12014

12015
    """
12016
    try:
12017
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12018
    except errors.OpPrereqError:
12019
      pass
12020
    else:
12021
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12022
                                 " node group (UUID: %s)" %
12023
                                 (self.op.group_name, existing_uuid),
12024
                                 errors.ECODE_EXISTS)
12025

    
12026
    if self.op.ndparams:
12027
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12028

    
12029
  def BuildHooksEnv(self):
12030
    """Build hooks env.
12031

12032
    """
12033
    return {
12034
      "GROUP_NAME": self.op.group_name,
12035
      }
12036

    
12037
  def BuildHooksNodes(self):
12038
    """Build hooks nodes.
12039

12040
    """
12041
    mn = self.cfg.GetMasterNode()
12042
    return ([mn], [mn])
12043

    
12044
  def Exec(self, feedback_fn):
12045
    """Add the node group to the cluster.
12046

12047
    """
12048
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12049
                                  uuid=self.group_uuid,
12050
                                  alloc_policy=self.op.alloc_policy,
12051
                                  ndparams=self.op.ndparams)
12052

    
12053
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12054
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12055

    
12056

    
12057
class LUGroupAssignNodes(NoHooksLU):
12058
  """Logical unit for assigning nodes to groups.
12059

12060
  """
12061
  REQ_BGL = False
12062

    
12063
  def ExpandNames(self):
12064
    # These raise errors.OpPrereqError on their own:
12065
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12066
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12067

    
12068
    # We want to lock all the affected nodes and groups. We have readily
12069
    # available the list of nodes, and the *destination* group. To gather the
12070
    # list of "source" groups, we need to fetch node information later on.
12071
    self.needed_locks = {
12072
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12073
      locking.LEVEL_NODE: self.op.nodes,
12074
      }
12075

    
12076
  def DeclareLocks(self, level):
12077
    if level == locking.LEVEL_NODEGROUP:
12078
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12079

    
12080
      # Try to get all affected nodes' groups without having the group or node
12081
      # lock yet. Needs verification later in the code flow.
12082
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12083

    
12084
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12085

    
12086
  def CheckPrereq(self):
12087
    """Check prerequisites.
12088

12089
    """
12090
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12091
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12092
            frozenset(self.op.nodes))
12093

    
12094
    expected_locks = (set([self.group_uuid]) |
12095
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12096
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12097
    if actual_locks != expected_locks:
12098
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12099
                               " current groups are '%s', used to be '%s'" %
12100
                               (utils.CommaJoin(expected_locks),
12101
                                utils.CommaJoin(actual_locks)))
12102

    
12103
    self.node_data = self.cfg.GetAllNodesInfo()
12104
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12105
    instance_data = self.cfg.GetAllInstancesInfo()
12106

    
12107
    if self.group is None:
12108
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12109
                               (self.op.group_name, self.group_uuid))
12110

    
12111
    (new_splits, previous_splits) = \
12112
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12113
                                             for node in self.op.nodes],
12114
                                            self.node_data, instance_data)
12115

    
12116
    if new_splits:
12117
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12118

    
12119
      if not self.op.force:
12120
        raise errors.OpExecError("The following instances get split by this"
12121
                                 " change and --force was not given: %s" %
12122
                                 fmt_new_splits)
12123
      else:
12124
        self.LogWarning("This operation will split the following instances: %s",
12125
                        fmt_new_splits)
12126

    
12127
        if previous_splits:
12128
          self.LogWarning("In addition, these already-split instances continue"
12129
                          " to be split across groups: %s",
12130
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12131

    
12132
  def Exec(self, feedback_fn):
12133
    """Assign nodes to a new group.
12134

12135
    """
12136
    mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12137

    
12138
    self.cfg.AssignGroupNodes(mods)
12139

    
12140
  @staticmethod
12141
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12142
    """Check for split instances after a node assignment.
12143

12144
    This method considers a series of node assignments as an atomic operation,
12145
    and returns information about split instances after applying the set of
12146
    changes.
12147

12148
    In particular, it returns information about newly split instances, and
12149
    instances that were already split, and remain so after the change.
12150

12151
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12152
    considered.
12153

12154
    @type changes: list of (node_name, new_group_uuid) pairs.
12155
    @param changes: list of node assignments to consider.
12156
    @param node_data: a dict with data for all nodes
12157
    @param instance_data: a dict with all instances to consider
12158
    @rtype: a two-tuple
12159
    @return: a list of instances that were previously okay and result split as a
12160
      consequence of this change, and a list of instances that were previously
12161
      split and this change does not fix.
12162

12163
    """
12164
    changed_nodes = dict((node, group) for node, group in changes
12165
                         if node_data[node].group != group)
12166

    
12167
    all_split_instances = set()
12168
    previously_split_instances = set()
12169

    
12170
    def InstanceNodes(instance):
12171
      return [instance.primary_node] + list(instance.secondary_nodes)
12172

    
12173
    for inst in instance_data.values():
12174
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12175
        continue
12176

    
12177
      instance_nodes = InstanceNodes(inst)
12178

    
12179
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12180
        previously_split_instances.add(inst.name)
12181

    
12182
      if len(set(changed_nodes.get(node, node_data[node].group)
12183
                 for node in instance_nodes)) > 1:
12184
        all_split_instances.add(inst.name)
12185

    
12186
    return (list(all_split_instances - previously_split_instances),
12187
            list(previously_split_instances & all_split_instances))
12188

    
12189

    
12190
class _GroupQuery(_QueryBase):
12191
  FIELDS = query.GROUP_FIELDS
12192

    
12193
  def ExpandNames(self, lu):
12194
    lu.needed_locks = {}
12195

    
12196
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12197
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12198

    
12199
    if not self.names:
12200
      self.wanted = [name_to_uuid[name]
12201
                     for name in utils.NiceSort(name_to_uuid.keys())]
12202
    else:
12203
      # Accept names to be either names or UUIDs.
12204
      missing = []
12205
      self.wanted = []
12206
      all_uuid = frozenset(self._all_groups.keys())
12207

    
12208
      for name in self.names:
12209
        if name in all_uuid:
12210
          self.wanted.append(name)
12211
        elif name in name_to_uuid:
12212
          self.wanted.append(name_to_uuid[name])
12213
        else:
12214
          missing.append(name)
12215

    
12216
      if missing:
12217
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12218
                                   utils.CommaJoin(missing),
12219
                                   errors.ECODE_NOENT)
12220

    
12221
  def DeclareLocks(self, lu, level):
12222
    pass
12223

    
12224
  def _GetQueryData(self, lu):
12225
    """Computes the list of node groups and their attributes.
12226

12227
    """
12228
    do_nodes = query.GQ_NODE in self.requested_data
12229
    do_instances = query.GQ_INST in self.requested_data
12230

    
12231
    group_to_nodes = None
12232
    group_to_instances = None
12233

    
12234
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12235
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12236
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12237
    # instance->node. Hence, we will need to process nodes even if we only need
12238
    # instance information.
12239
    if do_nodes or do_instances:
12240
      all_nodes = lu.cfg.GetAllNodesInfo()
12241
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12242
      node_to_group = {}
12243

    
12244
      for node in all_nodes.values():
12245
        if node.group in group_to_nodes:
12246
          group_to_nodes[node.group].append(node.name)
12247
          node_to_group[node.name] = node.group
12248

    
12249
      if do_instances:
12250
        all_instances = lu.cfg.GetAllInstancesInfo()
12251
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12252

    
12253
        for instance in all_instances.values():
12254
          node = instance.primary_node
12255
          if node in node_to_group:
12256
            group_to_instances[node_to_group[node]].append(instance.name)
12257

    
12258
        if not do_nodes:
12259
          # Do not pass on node information if it was not requested.
12260
          group_to_nodes = None
12261

    
12262
    return query.GroupQueryData([self._all_groups[uuid]
12263
                                 for uuid in self.wanted],
12264
                                group_to_nodes, group_to_instances)
12265

    
12266

    
12267
class LUGroupQuery(NoHooksLU):
12268
  """Logical unit for querying node groups.
12269

12270
  """
12271
  REQ_BGL = False
12272

    
12273
  def CheckArguments(self):
12274
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12275
                          self.op.output_fields, False)
12276

    
12277
  def ExpandNames(self):
12278
    self.gq.ExpandNames(self)
12279

    
12280
  def DeclareLocks(self, level):
12281
    self.gq.DeclareLocks(self, level)
12282

    
12283
  def Exec(self, feedback_fn):
12284
    return self.gq.OldStyleQuery(self)
12285

    
12286

    
12287
class LUGroupSetParams(LogicalUnit):
12288
  """Modifies the parameters of a node group.
12289

12290
  """
12291
  HPATH = "group-modify"
12292
  HTYPE = constants.HTYPE_GROUP
12293
  REQ_BGL = False
12294

    
12295
  def CheckArguments(self):
12296
    all_changes = [
12297
      self.op.ndparams,
12298
      self.op.alloc_policy,
12299
      ]
12300

    
12301
    if all_changes.count(None) == len(all_changes):
12302
      raise errors.OpPrereqError("Please pass at least one modification",
12303
                                 errors.ECODE_INVAL)
12304

    
12305
  def ExpandNames(self):
12306
    # This raises errors.OpPrereqError on its own:
12307
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12308

    
12309
    self.needed_locks = {
12310
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12311
      }
12312

    
12313
  def CheckPrereq(self):
12314
    """Check prerequisites.
12315

12316
    """
12317
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12318

    
12319
    if self.group is None:
12320
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12321
                               (self.op.group_name, self.group_uuid))
12322

    
12323
    if self.op.ndparams:
12324
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12325
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12326
      self.new_ndparams = new_ndparams
12327

    
12328
  def BuildHooksEnv(self):
12329
    """Build hooks env.
12330

12331
    """
12332
    return {
12333
      "GROUP_NAME": self.op.group_name,
12334
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12335
      }
12336

    
12337
  def BuildHooksNodes(self):
12338
    """Build hooks nodes.
12339

12340
    """
12341
    mn = self.cfg.GetMasterNode()
12342
    return ([mn], [mn])
12343

    
12344
  def Exec(self, feedback_fn):
12345
    """Modifies the node group.
12346

12347
    """
12348
    result = []
12349

    
12350
    if self.op.ndparams:
12351
      self.group.ndparams = self.new_ndparams
12352
      result.append(("ndparams", str(self.group.ndparams)))
12353

    
12354
    if self.op.alloc_policy:
12355
      self.group.alloc_policy = self.op.alloc_policy
12356

    
12357
    self.cfg.Update(self.group, feedback_fn)
12358
    return result
12359

    
12360

    
12361
class LUGroupRemove(LogicalUnit):
12362
  HPATH = "group-remove"
12363
  HTYPE = constants.HTYPE_GROUP
12364
  REQ_BGL = False
12365

    
12366
  def ExpandNames(self):
12367
    # This will raises errors.OpPrereqError on its own:
12368
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12369
    self.needed_locks = {
12370
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12371
      }
12372

    
12373
  def CheckPrereq(self):
12374
    """Check prerequisites.
12375

12376
    This checks that the given group name exists as a node group, that is
12377
    empty (i.e., contains no nodes), and that is not the last group of the
12378
    cluster.
12379

12380
    """
12381
    # Verify that the group is empty.
12382
    group_nodes = [node.name
12383
                   for node in self.cfg.GetAllNodesInfo().values()
12384
                   if node.group == self.group_uuid]
12385

    
12386
    if group_nodes:
12387
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12388
                                 " nodes: %s" %
12389
                                 (self.op.group_name,
12390
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12391
                                 errors.ECODE_STATE)
12392

    
12393
    # Verify the cluster would not be left group-less.
12394
    if len(self.cfg.GetNodeGroupList()) == 1:
12395
      raise errors.OpPrereqError("Group '%s' is the only group,"
12396
                                 " cannot be removed" %
12397
                                 self.op.group_name,
12398
                                 errors.ECODE_STATE)
12399

    
12400
  def BuildHooksEnv(self):
12401
    """Build hooks env.
12402

12403
    """
12404
    return {
12405
      "GROUP_NAME": self.op.group_name,
12406
      }
12407

    
12408
  def BuildHooksNodes(self):
12409
    """Build hooks nodes.
12410

12411
    """
12412
    mn = self.cfg.GetMasterNode()
12413
    return ([mn], [mn])
12414

    
12415
  def Exec(self, feedback_fn):
12416
    """Remove the node group.
12417

12418
    """
12419
    try:
12420
      self.cfg.RemoveNodeGroup(self.group_uuid)
12421
    except errors.ConfigurationError:
12422
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12423
                               (self.op.group_name, self.group_uuid))
12424

    
12425
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12426

    
12427

    
12428
class LUGroupRename(LogicalUnit):
12429
  HPATH = "group-rename"
12430
  HTYPE = constants.HTYPE_GROUP
12431
  REQ_BGL = False
12432

    
12433
  def ExpandNames(self):
12434
    # This raises errors.OpPrereqError on its own:
12435
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12436

    
12437
    self.needed_locks = {
12438
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12439
      }
12440

    
12441
  def CheckPrereq(self):
12442
    """Check prerequisites.
12443

12444
    Ensures requested new name is not yet used.
12445

12446
    """
12447
    try:
12448
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12449
    except errors.OpPrereqError:
12450
      pass
12451
    else:
12452
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12453
                                 " node group (UUID: %s)" %
12454
                                 (self.op.new_name, new_name_uuid),
12455
                                 errors.ECODE_EXISTS)
12456

    
12457
  def BuildHooksEnv(self):
12458
    """Build hooks env.
12459

12460
    """
12461
    return {
12462
      "OLD_NAME": self.op.group_name,
12463
      "NEW_NAME": self.op.new_name,
12464
      }
12465

    
12466
  def BuildHooksNodes(self):
12467
    """Build hooks nodes.
12468

12469
    """
12470
    mn = self.cfg.GetMasterNode()
12471

    
12472
    all_nodes = self.cfg.GetAllNodesInfo()
12473
    all_nodes.pop(mn, None)
12474

    
12475
    run_nodes = [mn]
12476
    run_nodes.extend(node.name for node in all_nodes.values()
12477
                     if node.group == self.group_uuid)
12478

    
12479
    return (run_nodes, run_nodes)
12480

    
12481
  def Exec(self, feedback_fn):
12482
    """Rename the node group.
12483

12484
    """
12485
    group = self.cfg.GetNodeGroup(self.group_uuid)
12486

    
12487
    if group is None:
12488
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12489
                               (self.op.group_name, self.group_uuid))
12490

    
12491
    group.name = self.op.new_name
12492
    self.cfg.Update(group, feedback_fn)
12493

    
12494
    return self.op.new_name
12495

    
12496

    
12497
class LUGroupEvacuate(LogicalUnit):
12498
  HPATH = "group-evacuate"
12499
  HTYPE = constants.HTYPE_GROUP
12500
  REQ_BGL = False
12501

    
12502
  def ExpandNames(self):
12503
    # This raises errors.OpPrereqError on its own:
12504
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12505

    
12506
    if self.op.target_groups:
12507
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12508
                                  self.op.target_groups)
12509
    else:
12510
      self.req_target_uuids = []
12511

    
12512
    if self.group_uuid in self.req_target_uuids:
12513
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12514
                                 " as a target group (targets are %s)" %
12515
                                 (self.group_uuid,
12516
                                  utils.CommaJoin(self.req_target_uuids)),
12517
                                 errors.ECODE_INVAL)
12518

    
12519
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12520

    
12521
    self.share_locks = _ShareAll()
12522
    self.needed_locks = {
12523
      locking.LEVEL_INSTANCE: [],
12524
      locking.LEVEL_NODEGROUP: [],
12525
      locking.LEVEL_NODE: [],
12526
      }
12527

    
12528
  def DeclareLocks(self, level):
12529
    if level == locking.LEVEL_INSTANCE:
12530
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12531

    
12532
      # Lock instances optimistically, needs verification once node and group
12533
      # locks have been acquired
12534
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12535
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12536

    
12537
    elif level == locking.LEVEL_NODEGROUP:
12538
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12539

    
12540
      if self.req_target_uuids:
12541
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12542

    
12543
        # Lock all groups used by instances optimistically; this requires going
12544
        # via the node before it's locked, requiring verification later on
12545
        lock_groups.update(group_uuid
12546
                           for instance_name in
12547
                             self.owned_locks(locking.LEVEL_INSTANCE)
12548
                           for group_uuid in
12549
                             self.cfg.GetInstanceNodeGroups(instance_name))
12550
      else:
12551
        # No target groups, need to lock all of them
12552
        lock_groups = locking.ALL_SET
12553

    
12554
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12555

    
12556
    elif level == locking.LEVEL_NODE:
12557
      # This will only lock the nodes in the group to be evacuated which
12558
      # contain actual instances
12559
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12560
      self._LockInstancesNodes()
12561

    
12562
      # Lock all nodes in group to be evacuated and target groups
12563
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12564
      assert self.group_uuid in owned_groups
12565
      member_nodes = [node_name
12566
                      for group in owned_groups
12567
                      for node_name in self.cfg.GetNodeGroup(group).members]
12568
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12569

    
12570
  def CheckPrereq(self):
12571
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12572
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12573
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12574

    
12575
    assert owned_groups.issuperset(self.req_target_uuids)
12576
    assert self.group_uuid in owned_groups
12577

    
12578
    # Check if locked instances are still correct
12579
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12580

    
12581
    # Get instance information
12582
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12583

    
12584
    # Check if node groups for locked instances are still correct
12585
    _CheckInstancesNodeGroups(self.cfg, self.instances,
12586
                              owned_groups, owned_nodes, self.group_uuid)
12587

    
12588
    if self.req_target_uuids:
12589
      # User requested specific target groups
12590
      self.target_uuids = self.req_target_uuids
12591
    else:
12592
      # All groups except the one to be evacuated are potential targets
12593
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12594
                           if group_uuid != self.group_uuid]
12595

    
12596
      if not self.target_uuids:
12597
        raise errors.OpPrereqError("There are no possible target groups",
12598
                                   errors.ECODE_INVAL)
12599

    
12600
  def BuildHooksEnv(self):
12601
    """Build hooks env.
12602

12603
    """
12604
    return {
12605
      "GROUP_NAME": self.op.group_name,
12606
      "TARGET_GROUPS": " ".join(self.target_uuids),
12607
      }
12608

    
12609
  def BuildHooksNodes(self):
12610
    """Build hooks nodes.
12611

12612
    """
12613
    mn = self.cfg.GetMasterNode()
12614

    
12615
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12616

    
12617
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12618

    
12619
    return (run_nodes, run_nodes)
12620

    
12621
  def Exec(self, feedback_fn):
12622
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12623

    
12624
    assert self.group_uuid not in self.target_uuids
12625

    
12626
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12627
                     instances=instances, target_groups=self.target_uuids)
12628

    
12629
    ial.Run(self.op.iallocator)
12630

    
12631
    if not ial.success:
12632
      raise errors.OpPrereqError("Can't compute group evacuation using"
12633
                                 " iallocator '%s': %s" %
12634
                                 (self.op.iallocator, ial.info),
12635
                                 errors.ECODE_NORES)
12636

    
12637
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12638

    
12639
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12640
                 len(jobs), self.op.group_name)
12641

    
12642
    return ResultWithJobs(jobs)
12643

    
12644

    
12645
class TagsLU(NoHooksLU): # pylint: disable=W0223
12646
  """Generic tags LU.
12647

12648
  This is an abstract class which is the parent of all the other tags LUs.
12649

12650
  """
12651
  def ExpandNames(self):
12652
    self.group_uuid = None
12653
    self.needed_locks = {}
12654
    if self.op.kind == constants.TAG_NODE:
12655
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12656
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12657
    elif self.op.kind == constants.TAG_INSTANCE:
12658
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12659
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12660
    elif self.op.kind == constants.TAG_NODEGROUP:
12661
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12662

    
12663
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12664
    # not possible to acquire the BGL based on opcode parameters)
12665

    
12666
  def CheckPrereq(self):
12667
    """Check prerequisites.
12668

12669
    """
12670
    if self.op.kind == constants.TAG_CLUSTER:
12671
      self.target = self.cfg.GetClusterInfo()
12672
    elif self.op.kind == constants.TAG_NODE:
12673
      self.target = self.cfg.GetNodeInfo(self.op.name)
12674
    elif self.op.kind == constants.TAG_INSTANCE:
12675
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12676
    elif self.op.kind == constants.TAG_NODEGROUP:
12677
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12678
    else:
12679
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12680
                                 str(self.op.kind), errors.ECODE_INVAL)
12681

    
12682

    
12683
class LUTagsGet(TagsLU):
12684
  """Returns the tags of a given object.
12685

12686
  """
12687
  REQ_BGL = False
12688

    
12689
  def ExpandNames(self):
12690
    TagsLU.ExpandNames(self)
12691

    
12692
    # Share locks as this is only a read operation
12693
    self.share_locks = _ShareAll()
12694

    
12695
  def Exec(self, feedback_fn):
12696
    """Returns the tag list.
12697

12698
    """
12699
    return list(self.target.GetTags())
12700

    
12701

    
12702
class LUTagsSearch(NoHooksLU):
12703
  """Searches the tags for a given pattern.
12704

12705
  """
12706
  REQ_BGL = False
12707

    
12708
  def ExpandNames(self):
12709
    self.needed_locks = {}
12710

    
12711
  def CheckPrereq(self):
12712
    """Check prerequisites.
12713

12714
    This checks the pattern passed for validity by compiling it.
12715

12716
    """
12717
    try:
12718
      self.re = re.compile(self.op.pattern)
12719
    except re.error, err:
12720
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12721
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12722

    
12723
  def Exec(self, feedback_fn):
12724
    """Returns the tag list.
12725

12726
    """
12727
    cfg = self.cfg
12728
    tgts = [("/cluster", cfg.GetClusterInfo())]
12729
    ilist = cfg.GetAllInstancesInfo().values()
12730
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12731
    nlist = cfg.GetAllNodesInfo().values()
12732
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12733
    tgts.extend(("/nodegroup/%s" % n.name, n)
12734
                for n in cfg.GetAllNodeGroupsInfo().values())
12735
    results = []
12736
    for path, target in tgts:
12737
      for tag in target.GetTags():
12738
        if self.re.search(tag):
12739
          results.append((path, tag))
12740
    return results
12741

    
12742

    
12743
class LUTagsSet(TagsLU):
12744
  """Sets a tag on a given object.
12745

12746
  """
12747
  REQ_BGL = False
12748

    
12749
  def CheckPrereq(self):
12750
    """Check prerequisites.
12751

12752
    This checks the type and length of the tag name and value.
12753

12754
    """
12755
    TagsLU.CheckPrereq(self)
12756
    for tag in self.op.tags:
12757
      objects.TaggableObject.ValidateTag(tag)
12758

    
12759
  def Exec(self, feedback_fn):
12760
    """Sets the tag.
12761

12762
    """
12763
    try:
12764
      for tag in self.op.tags:
12765
        self.target.AddTag(tag)
12766
    except errors.TagError, err:
12767
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12768
    self.cfg.Update(self.target, feedback_fn)
12769

    
12770

    
12771
class LUTagsDel(TagsLU):
12772
  """Delete a list of tags from a given object.
12773

12774
  """
12775
  REQ_BGL = False
12776

    
12777
  def CheckPrereq(self):
12778
    """Check prerequisites.
12779

12780
    This checks that we have the given tag.
12781

12782
    """
12783
    TagsLU.CheckPrereq(self)
12784
    for tag in self.op.tags:
12785
      objects.TaggableObject.ValidateTag(tag)
12786
    del_tags = frozenset(self.op.tags)
12787
    cur_tags = self.target.GetTags()
12788

    
12789
    diff_tags = del_tags - cur_tags
12790
    if diff_tags:
12791
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12792
      raise errors.OpPrereqError("Tag(s) %s not found" %
12793
                                 (utils.CommaJoin(diff_names), ),
12794
                                 errors.ECODE_NOENT)
12795

    
12796
  def Exec(self, feedback_fn):
12797
    """Remove the tag from the object.
12798

12799
    """
12800
    for tag in self.op.tags:
12801
      self.target.RemoveTag(tag)
12802
    self.cfg.Update(self.target, feedback_fn)
12803

    
12804

    
12805
class LUTestDelay(NoHooksLU):
12806
  """Sleep for a specified amount of time.
12807

12808
  This LU sleeps on the master and/or nodes for a specified amount of
12809
  time.
12810

12811
  """
12812
  REQ_BGL = False
12813

    
12814
  def ExpandNames(self):
12815
    """Expand names and set required locks.
12816

12817
    This expands the node list, if any.
12818

12819
    """
12820
    self.needed_locks = {}
12821
    if self.op.on_nodes:
12822
      # _GetWantedNodes can be used here, but is not always appropriate to use
12823
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12824
      # more information.
12825
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12826
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12827

    
12828
  def _TestDelay(self):
12829
    """Do the actual sleep.
12830

12831
    """
12832
    if self.op.on_master:
12833
      if not utils.TestDelay(self.op.duration):
12834
        raise errors.OpExecError("Error during master delay test")
12835
    if self.op.on_nodes:
12836
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12837
      for node, node_result in result.items():
12838
        node_result.Raise("Failure during rpc call to node %s" % node)
12839

    
12840
  def Exec(self, feedback_fn):
12841
    """Execute the test delay opcode, with the wanted repetitions.
12842

12843
    """
12844
    if self.op.repeat == 0:
12845
      self._TestDelay()
12846
    else:
12847
      top_value = self.op.repeat - 1
12848
      for i in range(self.op.repeat):
12849
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12850
        self._TestDelay()
12851

    
12852

    
12853
class LUTestJqueue(NoHooksLU):
12854
  """Utility LU to test some aspects of the job queue.
12855

12856
  """
12857
  REQ_BGL = False
12858

    
12859
  # Must be lower than default timeout for WaitForJobChange to see whether it
12860
  # notices changed jobs
12861
  _CLIENT_CONNECT_TIMEOUT = 20.0
12862
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12863

    
12864
  @classmethod
12865
  def _NotifyUsingSocket(cls, cb, errcls):
12866
    """Opens a Unix socket and waits for another program to connect.
12867

12868
    @type cb: callable
12869
    @param cb: Callback to send socket name to client
12870
    @type errcls: class
12871
    @param errcls: Exception class to use for errors
12872

12873
    """
12874
    # Using a temporary directory as there's no easy way to create temporary
12875
    # sockets without writing a custom loop around tempfile.mktemp and
12876
    # socket.bind
12877
    tmpdir = tempfile.mkdtemp()
12878
    try:
12879
      tmpsock = utils.PathJoin(tmpdir, "sock")
12880

    
12881
      logging.debug("Creating temporary socket at %s", tmpsock)
12882
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12883
      try:
12884
        sock.bind(tmpsock)
12885
        sock.listen(1)
12886

    
12887
        # Send details to client
12888
        cb(tmpsock)
12889

    
12890
        # Wait for client to connect before continuing
12891
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12892
        try:
12893
          (conn, _) = sock.accept()
12894
        except socket.error, err:
12895
          raise errcls("Client didn't connect in time (%s)" % err)
12896
      finally:
12897
        sock.close()
12898
    finally:
12899
      # Remove as soon as client is connected
12900
      shutil.rmtree(tmpdir)
12901

    
12902
    # Wait for client to close
12903
    try:
12904
      try:
12905
        # pylint: disable=E1101
12906
        # Instance of '_socketobject' has no ... member
12907
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12908
        conn.recv(1)
12909
      except socket.error, err:
12910
        raise errcls("Client failed to confirm notification (%s)" % err)
12911
    finally:
12912
      conn.close()
12913

    
12914
  def _SendNotification(self, test, arg, sockname):
12915
    """Sends a notification to the client.
12916

12917
    @type test: string
12918
    @param test: Test name
12919
    @param arg: Test argument (depends on test)
12920
    @type sockname: string
12921
    @param sockname: Socket path
12922

12923
    """
12924
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12925

    
12926
  def _Notify(self, prereq, test, arg):
12927
    """Notifies the client of a test.
12928

12929
    @type prereq: bool
12930
    @param prereq: Whether this is a prereq-phase test
12931
    @type test: string
12932
    @param test: Test name
12933
    @param arg: Test argument (depends on test)
12934

12935
    """
12936
    if prereq:
12937
      errcls = errors.OpPrereqError
12938
    else:
12939
      errcls = errors.OpExecError
12940

    
12941
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12942
                                                  test, arg),
12943
                                   errcls)
12944

    
12945
  def CheckArguments(self):
12946
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12947
    self.expandnames_calls = 0
12948

    
12949
  def ExpandNames(self):
12950
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12951
    if checkargs_calls < 1:
12952
      raise errors.ProgrammerError("CheckArguments was not called")
12953

    
12954
    self.expandnames_calls += 1
12955

    
12956
    if self.op.notify_waitlock:
12957
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12958

    
12959
    self.LogInfo("Expanding names")
12960

    
12961
    # Get lock on master node (just to get a lock, not for a particular reason)
12962
    self.needed_locks = {
12963
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12964
      }
12965

    
12966
  def Exec(self, feedback_fn):
12967
    if self.expandnames_calls < 1:
12968
      raise errors.ProgrammerError("ExpandNames was not called")
12969

    
12970
    if self.op.notify_exec:
12971
      self._Notify(False, constants.JQT_EXEC, None)
12972

    
12973
    self.LogInfo("Executing")
12974

    
12975
    if self.op.log_messages:
12976
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12977
      for idx, msg in enumerate(self.op.log_messages):
12978
        self.LogInfo("Sending log message %s", idx + 1)
12979
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12980
        # Report how many test messages have been sent
12981
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12982

    
12983
    if self.op.fail:
12984
      raise errors.OpExecError("Opcode failure was requested")
12985

    
12986
    return True
12987

    
12988

    
12989
class IAllocator(object):
12990
  """IAllocator framework.
12991

12992
  An IAllocator instance has three sets of attributes:
12993
    - cfg that is needed to query the cluster
12994
    - input data (all members of the _KEYS class attribute are required)
12995
    - four buffer attributes (in|out_data|text), that represent the
12996
      input (to the external script) in text and data structure format,
12997
      and the output from it, again in two formats
12998
    - the result variables from the script (success, info, nodes) for
12999
      easy usage
13000

13001
  """
13002
  # pylint: disable=R0902
13003
  # lots of instance attributes
13004

    
13005
  def __init__(self, cfg, rpc, mode, **kwargs):
13006
    self.cfg = cfg
13007
    self.rpc = rpc
13008
    # init buffer variables
13009
    self.in_text = self.out_text = self.in_data = self.out_data = None
13010
    # init all input fields so that pylint is happy
13011
    self.mode = mode
13012
    self.memory = self.disks = self.disk_template = None
13013
    self.os = self.tags = self.nics = self.vcpus = None
13014
    self.hypervisor = None
13015
    self.relocate_from = None
13016
    self.name = None
13017
    self.instances = None
13018
    self.evac_mode = None
13019
    self.target_groups = []
13020
    # computed fields
13021
    self.required_nodes = None
13022
    # init result fields
13023
    self.success = self.info = self.result = None
13024

    
13025
    try:
13026
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13027
    except KeyError:
13028
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13029
                                   " IAllocator" % self.mode)
13030

    
13031
    keyset = [n for (n, _) in keydata]
13032

    
13033
    for key in kwargs:
13034
      if key not in keyset:
13035
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13036
                                     " IAllocator" % key)
13037
      setattr(self, key, kwargs[key])
13038

    
13039
    for key in keyset:
13040
      if key not in kwargs:
13041
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13042
                                     " IAllocator" % key)
13043
    self._BuildInputData(compat.partial(fn, self), keydata)
13044

    
13045
  def _ComputeClusterData(self):
13046
    """Compute the generic allocator input data.
13047

13048
    This is the data that is independent of the actual operation.
13049

13050
    """
13051
    cfg = self.cfg
13052
    cluster_info = cfg.GetClusterInfo()
13053
    # cluster data
13054
    data = {
13055
      "version": constants.IALLOCATOR_VERSION,
13056
      "cluster_name": cfg.GetClusterName(),
13057
      "cluster_tags": list(cluster_info.GetTags()),
13058
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13059
      # we don't have job IDs
13060
      }
13061
    ninfo = cfg.GetAllNodesInfo()
13062
    iinfo = cfg.GetAllInstancesInfo().values()
13063
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13064

    
13065
    # node data
13066
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13067

    
13068
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13069
      hypervisor_name = self.hypervisor
13070
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13071
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13072
    else:
13073
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13074

    
13075
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13076
                                        hypervisor_name)
13077
    node_iinfo = \
13078
      self.rpc.call_all_instances_info(node_list,
13079
                                       cluster_info.enabled_hypervisors)
13080

    
13081
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13082

    
13083
    config_ndata = self._ComputeBasicNodeData(ninfo)
13084
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13085
                                                 i_list, config_ndata)
13086
    assert len(data["nodes"]) == len(ninfo), \
13087
        "Incomplete node data computed"
13088

    
13089
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13090

    
13091
    self.in_data = data
13092

    
13093
  @staticmethod
13094
  def _ComputeNodeGroupData(cfg):
13095
    """Compute node groups data.
13096

13097
    """
13098
    ng = dict((guuid, {
13099
      "name": gdata.name,
13100
      "alloc_policy": gdata.alloc_policy,
13101
      })
13102
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13103

    
13104
    return ng
13105

    
13106
  @staticmethod
13107
  def _ComputeBasicNodeData(node_cfg):
13108
    """Compute global node data.
13109

13110
    @rtype: dict
13111
    @returns: a dict of name: (node dict, node config)
13112

13113
    """
13114
    # fill in static (config-based) values
13115
    node_results = dict((ninfo.name, {
13116
      "tags": list(ninfo.GetTags()),
13117
      "primary_ip": ninfo.primary_ip,
13118
      "secondary_ip": ninfo.secondary_ip,
13119
      "offline": ninfo.offline,
13120
      "drained": ninfo.drained,
13121
      "master_candidate": ninfo.master_candidate,
13122
      "group": ninfo.group,
13123
      "master_capable": ninfo.master_capable,
13124
      "vm_capable": ninfo.vm_capable,
13125
      })
13126
      for ninfo in node_cfg.values())
13127

    
13128
    return node_results
13129

    
13130
  @staticmethod
13131
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13132
                              node_results):
13133
    """Compute global node data.
13134

13135
    @param node_results: the basic node structures as filled from the config
13136

13137
    """
13138
    # make a copy of the current dict
13139
    node_results = dict(node_results)
13140
    for nname, nresult in node_data.items():
13141
      assert nname in node_results, "Missing basic data for node %s" % nname
13142
      ninfo = node_cfg[nname]
13143

    
13144
      if not (ninfo.offline or ninfo.drained):
13145
        nresult.Raise("Can't get data for node %s" % nname)
13146
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13147
                                nname)
13148
        remote_info = nresult.payload
13149

    
13150
        for attr in ["memory_total", "memory_free", "memory_dom0",
13151
                     "vg_size", "vg_free", "cpu_total"]:
13152
          if attr not in remote_info:
13153
            raise errors.OpExecError("Node '%s' didn't return attribute"
13154
                                     " '%s'" % (nname, attr))
13155
          if not isinstance(remote_info[attr], int):
13156
            raise errors.OpExecError("Node '%s' returned invalid value"
13157
                                     " for '%s': %s" %
13158
                                     (nname, attr, remote_info[attr]))
13159
        # compute memory used by primary instances
13160
        i_p_mem = i_p_up_mem = 0
13161
        for iinfo, beinfo in i_list:
13162
          if iinfo.primary_node == nname:
13163
            i_p_mem += beinfo[constants.BE_MEMORY]
13164
            if iinfo.name not in node_iinfo[nname].payload:
13165
              i_used_mem = 0
13166
            else:
13167
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13168
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13169
            remote_info["memory_free"] -= max(0, i_mem_diff)
13170

    
13171
            if iinfo.admin_up:
13172
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13173

    
13174
        # compute memory used by instances
13175
        pnr_dyn = {
13176
          "total_memory": remote_info["memory_total"],
13177
          "reserved_memory": remote_info["memory_dom0"],
13178
          "free_memory": remote_info["memory_free"],
13179
          "total_disk": remote_info["vg_size"],
13180
          "free_disk": remote_info["vg_free"],
13181
          "total_cpus": remote_info["cpu_total"],
13182
          "i_pri_memory": i_p_mem,
13183
          "i_pri_up_memory": i_p_up_mem,
13184
          }
13185
        pnr_dyn.update(node_results[nname])
13186
        node_results[nname] = pnr_dyn
13187

    
13188
    return node_results
13189

    
13190
  @staticmethod
13191
  def _ComputeInstanceData(cluster_info, i_list):
13192
    """Compute global instance data.
13193

13194
    """
13195
    instance_data = {}
13196
    for iinfo, beinfo in i_list:
13197
      nic_data = []
13198
      for nic in iinfo.nics:
13199
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13200
        nic_dict = {
13201
          "mac": nic.mac,
13202
          "ip": nic.ip,
13203
          "mode": filled_params[constants.NIC_MODE],
13204
          "link": filled_params[constants.NIC_LINK],
13205
          }
13206
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13207
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13208
        nic_data.append(nic_dict)
13209
      pir = {
13210
        "tags": list(iinfo.GetTags()),
13211
        "admin_up": iinfo.admin_up,
13212
        "vcpus": beinfo[constants.BE_VCPUS],
13213
        "memory": beinfo[constants.BE_MEMORY],
13214
        "os": iinfo.os,
13215
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13216
        "nics": nic_data,
13217
        "disks": [{constants.IDISK_SIZE: dsk.size,
13218
                   constants.IDISK_MODE: dsk.mode}
13219
                  for dsk in iinfo.disks],
13220
        "disk_template": iinfo.disk_template,
13221
        "hypervisor": iinfo.hypervisor,
13222
        }
13223
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13224
                                                 pir["disks"])
13225
      instance_data[iinfo.name] = pir
13226

    
13227
    return instance_data
13228

    
13229
  def _AddNewInstance(self):
13230
    """Add new instance data to allocator structure.
13231

13232
    This in combination with _AllocatorGetClusterData will create the
13233
    correct structure needed as input for the allocator.
13234

13235
    The checks for the completeness of the opcode must have already been
13236
    done.
13237

13238
    """
13239
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13240

    
13241
    if self.disk_template in constants.DTS_INT_MIRROR:
13242
      self.required_nodes = 2
13243
    else:
13244
      self.required_nodes = 1
13245

    
13246
    request = {
13247
      "name": self.name,
13248
      "disk_template": self.disk_template,
13249
      "tags": self.tags,
13250
      "os": self.os,
13251
      "vcpus": self.vcpus,
13252
      "memory": self.memory,
13253
      "disks": self.disks,
13254
      "disk_space_total": disk_space,
13255
      "nics": self.nics,
13256
      "required_nodes": self.required_nodes,
13257
      "hypervisor": self.hypervisor,
13258
      }
13259

    
13260
    return request
13261

    
13262
  def _AddRelocateInstance(self):
13263
    """Add relocate instance data to allocator structure.
13264

13265
    This in combination with _IAllocatorGetClusterData will create the
13266
    correct structure needed as input for the allocator.
13267

13268
    The checks for the completeness of the opcode must have already been
13269
    done.
13270

13271
    """
13272
    instance = self.cfg.GetInstanceInfo(self.name)
13273
    if instance is None:
13274
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13275
                                   " IAllocator" % self.name)
13276

    
13277
    if instance.disk_template not in constants.DTS_MIRRORED:
13278
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13279
                                 errors.ECODE_INVAL)
13280

    
13281
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13282
        len(instance.secondary_nodes) != 1:
13283
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13284
                                 errors.ECODE_STATE)
13285

    
13286
    self.required_nodes = 1
13287
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13288
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13289

    
13290
    request = {
13291
      "name": self.name,
13292
      "disk_space_total": disk_space,
13293
      "required_nodes": self.required_nodes,
13294
      "relocate_from": self.relocate_from,
13295
      }
13296
    return request
13297

    
13298
  def _AddNodeEvacuate(self):
13299
    """Get data for node-evacuate requests.
13300

13301
    """
13302
    return {
13303
      "instances": self.instances,
13304
      "evac_mode": self.evac_mode,
13305
      }
13306

    
13307
  def _AddChangeGroup(self):
13308
    """Get data for node-evacuate requests.
13309

13310
    """
13311
    return {
13312
      "instances": self.instances,
13313
      "target_groups": self.target_groups,
13314
      }
13315

    
13316
  def _BuildInputData(self, fn, keydata):
13317
    """Build input data structures.
13318

13319
    """
13320
    self._ComputeClusterData()
13321

    
13322
    request = fn()
13323
    request["type"] = self.mode
13324
    for keyname, keytype in keydata:
13325
      if keyname not in request:
13326
        raise errors.ProgrammerError("Request parameter %s is missing" %
13327
                                     keyname)
13328
      val = request[keyname]
13329
      if not keytype(val):
13330
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13331
                                     " validation, value %s, expected"
13332
                                     " type %s" % (keyname, val, keytype))
13333
    self.in_data["request"] = request
13334

    
13335
    self.in_text = serializer.Dump(self.in_data)
13336

    
13337
  _STRING_LIST = ht.TListOf(ht.TString)
13338
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13339
     # pylint: disable=E1101
13340
     # Class '...' has no 'OP_ID' member
13341
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13342
                          opcodes.OpInstanceMigrate.OP_ID,
13343
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13344
     })))
13345

    
13346
  _NEVAC_MOVED = \
13347
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13348
                       ht.TItems([ht.TNonEmptyString,
13349
                                  ht.TNonEmptyString,
13350
                                  ht.TListOf(ht.TNonEmptyString),
13351
                                 ])))
13352
  _NEVAC_FAILED = \
13353
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13354
                       ht.TItems([ht.TNonEmptyString,
13355
                                  ht.TMaybeString,
13356
                                 ])))
13357
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13358
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13359

    
13360
  _MODE_DATA = {
13361
    constants.IALLOCATOR_MODE_ALLOC:
13362
      (_AddNewInstance,
13363
       [
13364
        ("name", ht.TString),
13365
        ("memory", ht.TInt),
13366
        ("disks", ht.TListOf(ht.TDict)),
13367
        ("disk_template", ht.TString),
13368
        ("os", ht.TString),
13369
        ("tags", _STRING_LIST),
13370
        ("nics", ht.TListOf(ht.TDict)),
13371
        ("vcpus", ht.TInt),
13372
        ("hypervisor", ht.TString),
13373
        ], ht.TList),
13374
    constants.IALLOCATOR_MODE_RELOC:
13375
      (_AddRelocateInstance,
13376
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13377
       ht.TList),
13378
     constants.IALLOCATOR_MODE_NODE_EVAC:
13379
      (_AddNodeEvacuate, [
13380
        ("instances", _STRING_LIST),
13381
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13382
        ], _NEVAC_RESULT),
13383
     constants.IALLOCATOR_MODE_CHG_GROUP:
13384
      (_AddChangeGroup, [
13385
        ("instances", _STRING_LIST),
13386
        ("target_groups", _STRING_LIST),
13387
        ], _NEVAC_RESULT),
13388
    }
13389

    
13390
  def Run(self, name, validate=True, call_fn=None):
13391
    """Run an instance allocator and return the results.
13392

13393
    """
13394
    if call_fn is None:
13395
      call_fn = self.rpc.call_iallocator_runner
13396

    
13397
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13398
    result.Raise("Failure while running the iallocator script")
13399

    
13400
    self.out_text = result.payload
13401
    if validate:
13402
      self._ValidateResult()
13403

    
13404
  def _ValidateResult(self):
13405
    """Process the allocator results.
13406

13407
    This will process and if successful save the result in
13408
    self.out_data and the other parameters.
13409

13410
    """
13411
    try:
13412
      rdict = serializer.Load(self.out_text)
13413
    except Exception, err:
13414
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13415

    
13416
    if not isinstance(rdict, dict):
13417
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13418

    
13419
    # TODO: remove backwards compatiblity in later versions
13420
    if "nodes" in rdict and "result" not in rdict:
13421
      rdict["result"] = rdict["nodes"]
13422
      del rdict["nodes"]
13423

    
13424
    for key in "success", "info", "result":
13425
      if key not in rdict:
13426
        raise errors.OpExecError("Can't parse iallocator results:"
13427
                                 " missing key '%s'" % key)
13428
      setattr(self, key, rdict[key])
13429

    
13430
    if not self._result_check(self.result):
13431
      raise errors.OpExecError("Iallocator returned invalid result,"
13432
                               " expected %s, got %s" %
13433
                               (self._result_check, self.result),
13434
                               errors.ECODE_INVAL)
13435

    
13436
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13437
      assert self.relocate_from is not None
13438
      assert self.required_nodes == 1
13439

    
13440
      node2group = dict((name, ndata["group"])
13441
                        for (name, ndata) in self.in_data["nodes"].items())
13442

    
13443
      fn = compat.partial(self._NodesToGroups, node2group,
13444
                          self.in_data["nodegroups"])
13445

    
13446
      instance = self.cfg.GetInstanceInfo(self.name)
13447
      request_groups = fn(self.relocate_from + [instance.primary_node])
13448
      result_groups = fn(rdict["result"] + [instance.primary_node])
13449

    
13450
      if self.success and not set(result_groups).issubset(request_groups):
13451
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13452
                                 " differ from original groups (%s)" %
13453
                                 (utils.CommaJoin(result_groups),
13454
                                  utils.CommaJoin(request_groups)))
13455

    
13456
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13457
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13458

    
13459
    self.out_data = rdict
13460

    
13461
  @staticmethod
13462
  def _NodesToGroups(node2group, groups, nodes):
13463
    """Returns a list of unique group names for a list of nodes.
13464

13465
    @type node2group: dict
13466
    @param node2group: Map from node name to group UUID
13467
    @type groups: dict
13468
    @param groups: Group information
13469
    @type nodes: list
13470
    @param nodes: Node names
13471

13472
    """
13473
    result = set()
13474

    
13475
    for node in nodes:
13476
      try:
13477
        group_uuid = node2group[node]
13478
      except KeyError:
13479
        # Ignore unknown node
13480
        pass
13481
      else:
13482
        try:
13483
          group = groups[group_uuid]
13484
        except KeyError:
13485
          # Can't find group, let's use UUID
13486
          group_name = group_uuid
13487
        else:
13488
          group_name = group["name"]
13489

    
13490
        result.add(group_name)
13491

    
13492
    return sorted(result)
13493

    
13494

    
13495
class LUTestAllocator(NoHooksLU):
13496
  """Run allocator tests.
13497

13498
  This LU runs the allocator tests
13499

13500
  """
13501
  def CheckPrereq(self):
13502
    """Check prerequisites.
13503

13504
    This checks the opcode parameters depending on the director and mode test.
13505

13506
    """
13507
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13508
      for attr in ["memory", "disks", "disk_template",
13509
                   "os", "tags", "nics", "vcpus"]:
13510
        if not hasattr(self.op, attr):
13511
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13512
                                     attr, errors.ECODE_INVAL)
13513
      iname = self.cfg.ExpandInstanceName(self.op.name)
13514
      if iname is not None:
13515
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13516
                                   iname, errors.ECODE_EXISTS)
13517
      if not isinstance(self.op.nics, list):
13518
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13519
                                   errors.ECODE_INVAL)
13520
      if not isinstance(self.op.disks, list):
13521
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13522
                                   errors.ECODE_INVAL)
13523
      for row in self.op.disks:
13524
        if (not isinstance(row, dict) or
13525
            constants.IDISK_SIZE not in row or
13526
            not isinstance(row[constants.IDISK_SIZE], int) or
13527
            constants.IDISK_MODE not in row or
13528
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13529
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13530
                                     " parameter", errors.ECODE_INVAL)
13531
      if self.op.hypervisor is None:
13532
        self.op.hypervisor = self.cfg.GetHypervisorType()
13533
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13534
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13535
      self.op.name = fname
13536
      self.relocate_from = \
13537
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13538
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13539
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13540
      if not self.op.instances:
13541
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13542
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13543
    else:
13544
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13545
                                 self.op.mode, errors.ECODE_INVAL)
13546

    
13547
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13548
      if self.op.allocator is None:
13549
        raise errors.OpPrereqError("Missing allocator name",
13550
                                   errors.ECODE_INVAL)
13551
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13552
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13553
                                 self.op.direction, errors.ECODE_INVAL)
13554

    
13555
  def Exec(self, feedback_fn):
13556
    """Run the allocator test.
13557

13558
    """
13559
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13560
      ial = IAllocator(self.cfg, self.rpc,
13561
                       mode=self.op.mode,
13562
                       name=self.op.name,
13563
                       memory=self.op.memory,
13564
                       disks=self.op.disks,
13565
                       disk_template=self.op.disk_template,
13566
                       os=self.op.os,
13567
                       tags=self.op.tags,
13568
                       nics=self.op.nics,
13569
                       vcpus=self.op.vcpus,
13570
                       hypervisor=self.op.hypervisor,
13571
                       )
13572
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13573
      ial = IAllocator(self.cfg, self.rpc,
13574
                       mode=self.op.mode,
13575
                       name=self.op.name,
13576
                       relocate_from=list(self.relocate_from),
13577
                       )
13578
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13579
      ial = IAllocator(self.cfg, self.rpc,
13580
                       mode=self.op.mode,
13581
                       instances=self.op.instances,
13582
                       target_groups=self.op.target_groups)
13583
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13584
      ial = IAllocator(self.cfg, self.rpc,
13585
                       mode=self.op.mode,
13586
                       instances=self.op.instances,
13587
                       evac_mode=self.op.evac_mode)
13588
    else:
13589
      raise errors.ProgrammerError("Uncatched mode %s in"
13590
                                   " LUTestAllocator.Exec", self.op.mode)
13591

    
13592
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13593
      result = ial.in_text
13594
    else:
13595
      ial.Run(self.op.allocator, validate=False)
13596
      result = ial.out_text
13597
    return result
13598

    
13599

    
13600
#: Query type implementations
13601
_QUERY_IMPL = {
13602
  constants.QR_INSTANCE: _InstanceQuery,
13603
  constants.QR_NODE: _NodeQuery,
13604
  constants.QR_GROUP: _GroupQuery,
13605
  constants.QR_OS: _OsQuery,
13606
  }
13607

    
13608
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13609

    
13610

    
13611
def _GetQueryImplementation(name):
13612
  """Returns the implemtnation for a query type.
13613

13614
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13615

13616
  """
13617
  try:
13618
    return _QUERY_IMPL[name]
13619
  except KeyError:
13620
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13621
                               errors.ECODE_INVAL)