Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 566db1f2

History | View | Annotate | Download (479.5 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import logging
36
import copy
37
import OpenSSL
38
import socket
39
import tempfile
40
import shutil
41
import itertools
42
import operator
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import query
58
from ganeti import qlang
59
from ganeti import opcodes
60
from ganeti import ht
61
from ganeti import runtime
62

    
63
import ganeti.masterd.instance # pylint: disable=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    # readability alias
123
    self.owned_locks = context.glm.list_owned
124
    self.context = context
125
    self.rpc = rpc
126
    # Dicts used to declare locking needs to mcpu
127
    self.needed_locks = None
128
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
129
    self.add_locks = {}
130
    self.remove_locks = {}
131
    # Used to force good behavior when calling helper functions
132
    self.recalculate_locks = {}
133
    # logging
134
    self.Log = processor.Log # pylint: disable=C0103
135
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
136
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
137
    self.LogStep = processor.LogStep # pylint: disable=C0103
138
    # support for dry-run
139
    self.dry_run_result = None
140
    # support for generic debug attribute
141
    if (not hasattr(self.op, "debug_level") or
142
        not isinstance(self.op.debug_level, int)):
143
      self.op.debug_level = 0
144

    
145
    # Tasklets
146
    self.tasklets = None
147

    
148
    # Validate opcode parameters and set defaults
149
    self.op.Validate(True)
150

    
151
    self.CheckArguments()
152

    
153
  def CheckArguments(self):
154
    """Check syntactic validity for the opcode arguments.
155

156
    This method is for doing a simple syntactic check and ensure
157
    validity of opcode parameters, without any cluster-related
158
    checks. While the same can be accomplished in ExpandNames and/or
159
    CheckPrereq, doing these separate is better because:
160

161
      - ExpandNames is left as as purely a lock-related function
162
      - CheckPrereq is run after we have acquired locks (and possible
163
        waited for them)
164

165
    The function is allowed to change the self.op attribute so that
166
    later methods can no longer worry about missing parameters.
167

168
    """
169
    pass
170

    
171
  def ExpandNames(self):
172
    """Expand names for this LU.
173

174
    This method is called before starting to execute the opcode, and it should
175
    update all the parameters of the opcode to their canonical form (e.g. a
176
    short node name must be fully expanded after this method has successfully
177
    completed). This way locking, hooks, logging, etc. can work correctly.
178

179
    LUs which implement this method must also populate the self.needed_locks
180
    member, as a dict with lock levels as keys, and a list of needed lock names
181
    as values. Rules:
182

183
      - use an empty dict if you don't need any lock
184
      - if you don't need any lock at a particular level omit that level
185
      - don't put anything for the BGL level
186
      - if you want all locks at a level use locking.ALL_SET as a value
187

188
    If you need to share locks (rather than acquire them exclusively) at one
189
    level you can modify self.share_locks, setting a true value (usually 1) for
190
    that level. By default locks are not shared.
191

192
    This function can also define a list of tasklets, which then will be
193
    executed in order instead of the usual LU-level CheckPrereq and Exec
194
    functions, if those are not defined by the LU.
195

196
    Examples::
197

198
      # Acquire all nodes and one instance
199
      self.needed_locks = {
200
        locking.LEVEL_NODE: locking.ALL_SET,
201
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
202
      }
203
      # Acquire just two nodes
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206
      }
207
      # Acquire no locks
208
      self.needed_locks = {} # No, you can't leave it to the default value None
209

210
    """
211
    # The implementation of this method is mandatory only if the new LU is
212
    # concurrent, so that old LUs don't need to be changed all at the same
213
    # time.
214
    if self.REQ_BGL:
215
      self.needed_locks = {} # Exclusive LUs don't need locks.
216
    else:
217
      raise NotImplementedError
218

    
219
  def DeclareLocks(self, level):
220
    """Declare LU locking needs for a level
221

222
    While most LUs can just declare their locking needs at ExpandNames time,
223
    sometimes there's the need to calculate some locks after having acquired
224
    the ones before. This function is called just before acquiring locks at a
225
    particular level, but after acquiring the ones at lower levels, and permits
226
    such calculations. It can be used to modify self.needed_locks, and by
227
    default it does nothing.
228

229
    This function is only called if you have something already set in
230
    self.needed_locks for the level.
231

232
    @param level: Locking level which is going to be locked
233
    @type level: member of ganeti.locking.LEVELS
234

235
    """
236

    
237
  def CheckPrereq(self):
238
    """Check prerequisites for this LU.
239

240
    This method should check that the prerequisites for the execution
241
    of this LU are fulfilled. It can do internode communication, but
242
    it should be idempotent - no cluster or system changes are
243
    allowed.
244

245
    The method should raise errors.OpPrereqError in case something is
246
    not fulfilled. Its return value is ignored.
247

248
    This method should also update all the parameters of the opcode to
249
    their canonical form if it hasn't been done by ExpandNames before.
250

251
    """
252
    if self.tasklets is not None:
253
      for (idx, tl) in enumerate(self.tasklets):
254
        logging.debug("Checking prerequisites for tasklet %s/%s",
255
                      idx + 1, len(self.tasklets))
256
        tl.CheckPrereq()
257
    else:
258
      pass
259

    
260
  def Exec(self, feedback_fn):
261
    """Execute the LU.
262

263
    This method should implement the actual work. It should raise
264
    errors.OpExecError for failures that are somewhat dealt with in
265
    code, or expected.
266

267
    """
268
    if self.tasklets is not None:
269
      for (idx, tl) in enumerate(self.tasklets):
270
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271
        tl.Exec(feedback_fn)
272
    else:
273
      raise NotImplementedError
274

    
275
  def BuildHooksEnv(self):
276
    """Build hooks environment for this LU.
277

278
    @rtype: dict
279
    @return: Dictionary containing the environment that will be used for
280
      running the hooks for this LU. The keys of the dict must not be prefixed
281
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282
      will extend the environment with additional variables. If no environment
283
      should be defined, an empty dictionary should be returned (not C{None}).
284
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
285
      will not be called.
286

287
    """
288
    raise NotImplementedError
289

    
290
  def BuildHooksNodes(self):
291
    """Build list of nodes to run LU's hooks.
292

293
    @rtype: tuple; (list, list)
294
    @return: Tuple containing a list of node names on which the hook
295
      should run before the execution and a list of node names on which the
296
      hook should run after the execution. No nodes should be returned as an
297
      empty list (and not None).
298
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
299
      will not be called.
300

301
    """
302
    raise NotImplementedError
303

    
304
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305
    """Notify the LU about the results of its hooks.
306

307
    This method is called every time a hooks phase is executed, and notifies
308
    the Logical Unit about the hooks' result. The LU can then use it to alter
309
    its result based on the hooks.  By default the method does nothing and the
310
    previous result is passed back unchanged but any LU can define it if it
311
    wants to use the local cluster hook-scripts somehow.
312

313
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
314
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315
    @param hook_results: the results of the multi-node hooks rpc call
316
    @param feedback_fn: function used send feedback back to the caller
317
    @param lu_result: the previous Exec result this LU had, or None
318
        in the PRE phase
319
    @return: the new Exec result, based on the previous result
320
        and hook results
321

322
    """
323
    # API must be kept, thus we ignore the unused argument and could
324
    # be a function warnings
325
    # pylint: disable=W0613,R0201
326
    return lu_result
327

    
328
  def _ExpandAndLockInstance(self):
329
    """Helper function to expand and lock an instance.
330

331
    Many LUs that work on an instance take its name in self.op.instance_name
332
    and need to expand it and then declare the expanded name for locking. This
333
    function does it, and then updates self.op.instance_name to the expanded
334
    name. It also initializes needed_locks as a dict, if this hasn't been done
335
    before.
336

337
    """
338
    if self.needed_locks is None:
339
      self.needed_locks = {}
340
    else:
341
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342
        "_ExpandAndLockInstance called with instance-level locks set"
343
    self.op.instance_name = _ExpandInstanceName(self.cfg,
344
                                                self.op.instance_name)
345
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
346

    
347
  def _LockInstancesNodes(self, primary_only=False):
348
    """Helper function to declare instances' nodes for locking.
349

350
    This function should be called after locking one or more instances to lock
351
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352
    with all primary or secondary nodes for instances already locked and
353
    present in self.needed_locks[locking.LEVEL_INSTANCE].
354

355
    It should be called from DeclareLocks, and for safety only works if
356
    self.recalculate_locks[locking.LEVEL_NODE] is set.
357

358
    In the future it may grow parameters to just lock some instance's nodes, or
359
    to just lock primaries or secondary nodes, if needed.
360

361
    If should be called in DeclareLocks in a way similar to::
362

363
      if level == locking.LEVEL_NODE:
364
        self._LockInstancesNodes()
365

366
    @type primary_only: boolean
367
    @param primary_only: only lock primary nodes of locked instances
368

369
    """
370
    assert locking.LEVEL_NODE in self.recalculate_locks, \
371
      "_LockInstancesNodes helper function called with no nodes to recalculate"
372

    
373
    # TODO: check if we're really been called with the instance locks held
374

    
375
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376
    # future we might want to have different behaviors depending on the value
377
    # of self.recalculate_locks[locking.LEVEL_NODE]
378
    wanted_nodes = []
379
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381
      wanted_nodes.append(instance.primary_node)
382
      if not primary_only:
383
        wanted_nodes.extend(instance.secondary_nodes)
384

    
385
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
389

    
390
    del self.recalculate_locks[locking.LEVEL_NODE]
391

    
392

    
393
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
394
  """Simple LU which runs no hooks.
395

396
  This LU is intended as a parent for other LogicalUnits which will
397
  run no hooks, in order to reduce duplicate code.
398

399
  """
400
  HPATH = None
401
  HTYPE = None
402

    
403
  def BuildHooksEnv(self):
404
    """Empty BuildHooksEnv for NoHooksLu.
405

406
    This just raises an error.
407

408
    """
409
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
410

    
411
  def BuildHooksNodes(self):
412
    """Empty BuildHooksNodes for NoHooksLU.
413

414
    """
415
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
416

    
417

    
418
class Tasklet:
419
  """Tasklet base class.
420

421
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
423
  tasklets know nothing about locks.
424

425
  Subclasses must follow these rules:
426
    - Implement CheckPrereq
427
    - Implement Exec
428

429
  """
430
  def __init__(self, lu):
431
    self.lu = lu
432

    
433
    # Shortcuts
434
    self.cfg = lu.cfg
435
    self.rpc = lu.rpc
436

    
437
  def CheckPrereq(self):
438
    """Check prerequisites for this tasklets.
439

440
    This method should check whether the prerequisites for the execution of
441
    this tasklet are fulfilled. It can do internode communication, but it
442
    should be idempotent - no cluster or system changes are allowed.
443

444
    The method should raise errors.OpPrereqError in case something is not
445
    fulfilled. Its return value is ignored.
446

447
    This method should also update all parameters to their canonical form if it
448
    hasn't been done before.
449

450
    """
451
    pass
452

    
453
  def Exec(self, feedback_fn):
454
    """Execute the tasklet.
455

456
    This method should implement the actual work. It should raise
457
    errors.OpExecError for failures that are somewhat dealt with in code, or
458
    expected.
459

460
    """
461
    raise NotImplementedError
462

    
463

    
464
class _QueryBase:
465
  """Base for query utility classes.
466

467
  """
468
  #: Attribute holding field definitions
469
  FIELDS = None
470

    
471
  def __init__(self, filter_, fields, use_locking):
472
    """Initializes this class.
473

474
    """
475
    self.use_locking = use_locking
476

    
477
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
478
                             namefield="name")
479
    self.requested_data = self.query.RequestedData()
480
    self.names = self.query.RequestedNames()
481

    
482
    # Sort only if no names were requested
483
    self.sort_by_name = not self.names
484

    
485
    self.do_locking = None
486
    self.wanted = None
487

    
488
  def _GetNames(self, lu, all_names, lock_level):
489
    """Helper function to determine names asked for in the query.
490

491
    """
492
    if self.do_locking:
493
      names = lu.owned_locks(lock_level)
494
    else:
495
      names = all_names
496

    
497
    if self.wanted == locking.ALL_SET:
498
      assert not self.names
499
      # caller didn't specify names, so ordering is not important
500
      return utils.NiceSort(names)
501

    
502
    # caller specified names and we must keep the same order
503
    assert self.names
504
    assert not self.do_locking or lu.glm.is_owned(lock_level)
505

    
506
    missing = set(self.wanted).difference(names)
507
    if missing:
508
      raise errors.OpExecError("Some items were removed before retrieving"
509
                               " their data: %s" % missing)
510

    
511
    # Return expanded names
512
    return self.wanted
513

    
514
  def ExpandNames(self, lu):
515
    """Expand names for this query.
516

517
    See L{LogicalUnit.ExpandNames}.
518

519
    """
520
    raise NotImplementedError()
521

    
522
  def DeclareLocks(self, lu, level):
523
    """Declare locks for this query.
524

525
    See L{LogicalUnit.DeclareLocks}.
526

527
    """
528
    raise NotImplementedError()
529

    
530
  def _GetQueryData(self, lu):
531
    """Collects all data for this query.
532

533
    @return: Query data object
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def NewStyleQuery(self, lu):
539
    """Collect data and execute query.
540

541
    """
542
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543
                                  sort_by_name=self.sort_by_name)
544

    
545
  def OldStyleQuery(self, lu):
546
    """Collect data and execute query.
547

548
    """
549
    return self.query.OldStyleQuery(self._GetQueryData(lu),
550
                                    sort_by_name=self.sort_by_name)
551

    
552

    
553
def _ShareAll():
554
  """Returns a dict declaring all lock levels shared.
555

556
  """
557
  return dict.fromkeys(locking.LEVELS, 1)
558

    
559

    
560
def _CheckInstancesNodeGroups(cfg, instances, owned_groups, owned_nodes,
561
                              cur_group_uuid):
562
  """Checks if node groups for locked instances are still correct.
563

564
  @type cfg: L{config.ConfigWriter}
565
  @param cfg: Cluster configuration
566
  @type instances: dict; string as key, L{objects.Instance} as value
567
  @param instances: Dictionary, instance name as key, instance object as value
568
  @type owned_groups: iterable of string
569
  @param owned_groups: List of owned groups
570
  @type owned_nodes: iterable of string
571
  @param owned_nodes: List of owned nodes
572
  @type cur_group_uuid: string or None
573
  @param cur_group_uuid: Optional group UUID to check against instance's groups
574

575
  """
576
  for (name, inst) in instances.items():
577
    assert owned_nodes.issuperset(inst.all_nodes), \
578
      "Instance %s's nodes changed while we kept the lock" % name
579

    
580
    inst_groups = _CheckInstanceNodeGroups(cfg, name, owned_groups)
581

    
582
    assert cur_group_uuid is None or cur_group_uuid in inst_groups, \
583
      "Instance %s has no node in group %s" % (name, cur_group_uuid)
584

    
585

    
586
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
587
  """Checks if the owned node groups are still correct for an instance.
588

589
  @type cfg: L{config.ConfigWriter}
590
  @param cfg: The cluster configuration
591
  @type instance_name: string
592
  @param instance_name: Instance name
593
  @type owned_groups: set or frozenset
594
  @param owned_groups: List of currently owned node groups
595

596
  """
597
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
598

    
599
  if not owned_groups.issuperset(inst_groups):
600
    raise errors.OpPrereqError("Instance %s's node groups changed since"
601
                               " locks were acquired, current groups are"
602
                               " are '%s', owning groups '%s'; retry the"
603
                               " operation" %
604
                               (instance_name,
605
                                utils.CommaJoin(inst_groups),
606
                                utils.CommaJoin(owned_groups)),
607
                               errors.ECODE_STATE)
608

    
609
  return inst_groups
610

    
611

    
612
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
613
  """Checks if the instances in a node group are still correct.
614

615
  @type cfg: L{config.ConfigWriter}
616
  @param cfg: The cluster configuration
617
  @type group_uuid: string
618
  @param group_uuid: Node group UUID
619
  @type owned_instances: set or frozenset
620
  @param owned_instances: List of currently owned instances
621

622
  """
623
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
624
  if owned_instances != wanted_instances:
625
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
626
                               " locks were acquired, wanted '%s', have '%s';"
627
                               " retry the operation" %
628
                               (group_uuid,
629
                                utils.CommaJoin(wanted_instances),
630
                                utils.CommaJoin(owned_instances)),
631
                               errors.ECODE_STATE)
632

    
633
  return wanted_instances
634

    
635

    
636
def _SupportsOob(cfg, node):
637
  """Tells if node supports OOB.
638

639
  @type cfg: L{config.ConfigWriter}
640
  @param cfg: The cluster configuration
641
  @type node: L{objects.Node}
642
  @param node: The node
643
  @return: The OOB script if supported or an empty string otherwise
644

645
  """
646
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
647

    
648

    
649
def _GetWantedNodes(lu, nodes):
650
  """Returns list of checked and expanded node names.
651

652
  @type lu: L{LogicalUnit}
653
  @param lu: the logical unit on whose behalf we execute
654
  @type nodes: list
655
  @param nodes: list of node names or None for all nodes
656
  @rtype: list
657
  @return: the list of nodes, sorted
658
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
659

660
  """
661
  if nodes:
662
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
663

    
664
  return utils.NiceSort(lu.cfg.GetNodeList())
665

    
666

    
667
def _GetWantedInstances(lu, instances):
668
  """Returns list of checked and expanded instance names.
669

670
  @type lu: L{LogicalUnit}
671
  @param lu: the logical unit on whose behalf we execute
672
  @type instances: list
673
  @param instances: list of instance names or None for all instances
674
  @rtype: list
675
  @return: the list of instances, sorted
676
  @raise errors.OpPrereqError: if the instances parameter is wrong type
677
  @raise errors.OpPrereqError: if any of the passed instances is not found
678

679
  """
680
  if instances:
681
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
682
  else:
683
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
684
  return wanted
685

    
686

    
687
def _GetUpdatedParams(old_params, update_dict,
688
                      use_default=True, use_none=False):
689
  """Return the new version of a parameter dictionary.
690

691
  @type old_params: dict
692
  @param old_params: old parameters
693
  @type update_dict: dict
694
  @param update_dict: dict containing new parameter values, or
695
      constants.VALUE_DEFAULT to reset the parameter to its default
696
      value
697
  @param use_default: boolean
698
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
699
      values as 'to be deleted' values
700
  @param use_none: boolean
701
  @type use_none: whether to recognise C{None} values as 'to be
702
      deleted' values
703
  @rtype: dict
704
  @return: the new parameter dictionary
705

706
  """
707
  params_copy = copy.deepcopy(old_params)
708
  for key, val in update_dict.iteritems():
709
    if ((use_default and val == constants.VALUE_DEFAULT) or
710
        (use_none and val is None)):
711
      try:
712
        del params_copy[key]
713
      except KeyError:
714
        pass
715
    else:
716
      params_copy[key] = val
717
  return params_copy
718

    
719

    
720
def _ReleaseLocks(lu, level, names=None, keep=None):
721
  """Releases locks owned by an LU.
722

723
  @type lu: L{LogicalUnit}
724
  @param level: Lock level
725
  @type names: list or None
726
  @param names: Names of locks to release
727
  @type keep: list or None
728
  @param keep: Names of locks to retain
729

730
  """
731
  assert not (keep is not None and names is not None), \
732
         "Only one of the 'names' and the 'keep' parameters can be given"
733

    
734
  if names is not None:
735
    should_release = names.__contains__
736
  elif keep:
737
    should_release = lambda name: name not in keep
738
  else:
739
    should_release = None
740

    
741
  if should_release:
742
    retain = []
743
    release = []
744

    
745
    # Determine which locks to release
746
    for name in lu.owned_locks(level):
747
      if should_release(name):
748
        release.append(name)
749
      else:
750
        retain.append(name)
751

    
752
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
753

    
754
    # Release just some locks
755
    lu.glm.release(level, names=release)
756

    
757
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
758
  else:
759
    # Release everything
760
    lu.glm.release(level)
761

    
762
    assert not lu.glm.is_owned(level), "No locks should be owned"
763

    
764

    
765
def _MapInstanceDisksToNodes(instances):
766
  """Creates a map from (node, volume) to instance name.
767

768
  @type instances: list of L{objects.Instance}
769
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
770

771
  """
772
  return dict(((node, vol), inst.name)
773
              for inst in instances
774
              for (node, vols) in inst.MapLVsByNode().items()
775
              for vol in vols)
776

    
777

    
778
def _RunPostHook(lu, node_name):
779
  """Runs the post-hook for an opcode on a single node.
780

781
  """
782
  hm = lu.proc.BuildHooksManager(lu)
783
  try:
784
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
785
  except:
786
    # pylint: disable=W0702
787
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
788

    
789

    
790
def _CheckOutputFields(static, dynamic, selected):
791
  """Checks whether all selected fields are valid.
792

793
  @type static: L{utils.FieldSet}
794
  @param static: static fields set
795
  @type dynamic: L{utils.FieldSet}
796
  @param dynamic: dynamic fields set
797

798
  """
799
  f = utils.FieldSet()
800
  f.Extend(static)
801
  f.Extend(dynamic)
802

    
803
  delta = f.NonMatching(selected)
804
  if delta:
805
    raise errors.OpPrereqError("Unknown output fields selected: %s"
806
                               % ",".join(delta), errors.ECODE_INVAL)
807

    
808

    
809
def _CheckGlobalHvParams(params):
810
  """Validates that given hypervisor params are not global ones.
811

812
  This will ensure that instances don't get customised versions of
813
  global params.
814

815
  """
816
  used_globals = constants.HVC_GLOBALS.intersection(params)
817
  if used_globals:
818
    msg = ("The following hypervisor parameters are global and cannot"
819
           " be customized at instance level, please modify them at"
820
           " cluster level: %s" % utils.CommaJoin(used_globals))
821
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
822

    
823

    
824
def _CheckNodeOnline(lu, node, msg=None):
825
  """Ensure that a given node is online.
826

827
  @param lu: the LU on behalf of which we make the check
828
  @param node: the node to check
829
  @param msg: if passed, should be a message to replace the default one
830
  @raise errors.OpPrereqError: if the node is offline
831

832
  """
833
  if msg is None:
834
    msg = "Can't use offline node"
835
  if lu.cfg.GetNodeInfo(node).offline:
836
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
837

    
838

    
839
def _CheckNodeNotDrained(lu, node):
840
  """Ensure that a given node is not drained.
841

842
  @param lu: the LU on behalf of which we make the check
843
  @param node: the node to check
844
  @raise errors.OpPrereqError: if the node is drained
845

846
  """
847
  if lu.cfg.GetNodeInfo(node).drained:
848
    raise errors.OpPrereqError("Can't use drained node %s" % node,
849
                               errors.ECODE_STATE)
850

    
851

    
852
def _CheckNodeVmCapable(lu, node):
853
  """Ensure that a given node is vm capable.
854

855
  @param lu: the LU on behalf of which we make the check
856
  @param node: the node to check
857
  @raise errors.OpPrereqError: if the node is not vm capable
858

859
  """
860
  if not lu.cfg.GetNodeInfo(node).vm_capable:
861
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
862
                               errors.ECODE_STATE)
863

    
864

    
865
def _CheckNodeHasOS(lu, node, os_name, force_variant):
866
  """Ensure that a node supports a given OS.
867

868
  @param lu: the LU on behalf of which we make the check
869
  @param node: the node to check
870
  @param os_name: the OS to query about
871
  @param force_variant: whether to ignore variant errors
872
  @raise errors.OpPrereqError: if the node is not supporting the OS
873

874
  """
875
  result = lu.rpc.call_os_get(node, os_name)
876
  result.Raise("OS '%s' not in supported OS list for node %s" %
877
               (os_name, node),
878
               prereq=True, ecode=errors.ECODE_INVAL)
879
  if not force_variant:
880
    _CheckOSVariant(result.payload, os_name)
881

    
882

    
883
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
884
  """Ensure that a node has the given secondary ip.
885

886
  @type lu: L{LogicalUnit}
887
  @param lu: the LU on behalf of which we make the check
888
  @type node: string
889
  @param node: the node to check
890
  @type secondary_ip: string
891
  @param secondary_ip: the ip to check
892
  @type prereq: boolean
893
  @param prereq: whether to throw a prerequisite or an execute error
894
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
895
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
896

897
  """
898
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
899
  result.Raise("Failure checking secondary ip on node %s" % node,
900
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
901
  if not result.payload:
902
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
903
           " please fix and re-run this command" % secondary_ip)
904
    if prereq:
905
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
906
    else:
907
      raise errors.OpExecError(msg)
908

    
909

    
910
def _GetClusterDomainSecret():
911
  """Reads the cluster domain secret.
912

913
  """
914
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
915
                               strict=True)
916

    
917

    
918
def _CheckInstanceDown(lu, instance, reason):
919
  """Ensure that an instance is not running."""
920
  if instance.admin_up:
921
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
922
                               (instance.name, reason), errors.ECODE_STATE)
923

    
924
  pnode = instance.primary_node
925
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
926
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
927
              prereq=True, ecode=errors.ECODE_ENVIRON)
928

    
929
  if instance.name in ins_l.payload:
930
    raise errors.OpPrereqError("Instance %s is running, %s" %
931
                               (instance.name, reason), errors.ECODE_STATE)
932

    
933

    
934
def _ExpandItemName(fn, name, kind):
935
  """Expand an item name.
936

937
  @param fn: the function to use for expansion
938
  @param name: requested item name
939
  @param kind: text description ('Node' or 'Instance')
940
  @return: the resolved (full) name
941
  @raise errors.OpPrereqError: if the item is not found
942

943
  """
944
  full_name = fn(name)
945
  if full_name is None:
946
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
947
                               errors.ECODE_NOENT)
948
  return full_name
949

    
950

    
951
def _ExpandNodeName(cfg, name):
952
  """Wrapper over L{_ExpandItemName} for nodes."""
953
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
954

    
955

    
956
def _ExpandInstanceName(cfg, name):
957
  """Wrapper over L{_ExpandItemName} for instance."""
958
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
959

    
960

    
961
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
962
                          memory, vcpus, nics, disk_template, disks,
963
                          bep, hvp, hypervisor_name, tags):
964
  """Builds instance related env variables for hooks
965

966
  This builds the hook environment from individual variables.
967

968
  @type name: string
969
  @param name: the name of the instance
970
  @type primary_node: string
971
  @param primary_node: the name of the instance's primary node
972
  @type secondary_nodes: list
973
  @param secondary_nodes: list of secondary nodes as strings
974
  @type os_type: string
975
  @param os_type: the name of the instance's OS
976
  @type status: boolean
977
  @param status: the should_run status of the instance
978
  @type memory: string
979
  @param memory: the memory size of the instance
980
  @type vcpus: string
981
  @param vcpus: the count of VCPUs the instance has
982
  @type nics: list
983
  @param nics: list of tuples (ip, mac, mode, link) representing
984
      the NICs the instance has
985
  @type disk_template: string
986
  @param disk_template: the disk template of the instance
987
  @type disks: list
988
  @param disks: the list of (size, mode) pairs
989
  @type bep: dict
990
  @param bep: the backend parameters for the instance
991
  @type hvp: dict
992
  @param hvp: the hypervisor parameters for the instance
993
  @type hypervisor_name: string
994
  @param hypervisor_name: the hypervisor for the instance
995
  @type tags: list
996
  @param tags: list of instance tags as strings
997
  @rtype: dict
998
  @return: the hook environment for this instance
999

1000
  """
1001
  if status:
1002
    str_status = "up"
1003
  else:
1004
    str_status = "down"
1005
  env = {
1006
    "OP_TARGET": name,
1007
    "INSTANCE_NAME": name,
1008
    "INSTANCE_PRIMARY": primary_node,
1009
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1010
    "INSTANCE_OS_TYPE": os_type,
1011
    "INSTANCE_STATUS": str_status,
1012
    "INSTANCE_MEMORY": memory,
1013
    "INSTANCE_VCPUS": vcpus,
1014
    "INSTANCE_DISK_TEMPLATE": disk_template,
1015
    "INSTANCE_HYPERVISOR": hypervisor_name,
1016
  }
1017

    
1018
  if nics:
1019
    nic_count = len(nics)
1020
    for idx, (ip, mac, mode, link) in enumerate(nics):
1021
      if ip is None:
1022
        ip = ""
1023
      env["INSTANCE_NIC%d_IP" % idx] = ip
1024
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1025
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1026
      env["INSTANCE_NIC%d_LINK" % idx] = link
1027
      if mode == constants.NIC_MODE_BRIDGED:
1028
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1029
  else:
1030
    nic_count = 0
1031

    
1032
  env["INSTANCE_NIC_COUNT"] = nic_count
1033

    
1034
  if disks:
1035
    disk_count = len(disks)
1036
    for idx, (size, mode) in enumerate(disks):
1037
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1038
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1039
  else:
1040
    disk_count = 0
1041

    
1042
  env["INSTANCE_DISK_COUNT"] = disk_count
1043

    
1044
  if not tags:
1045
    tags = []
1046

    
1047
  env["INSTANCE_TAGS"] = " ".join(tags)
1048

    
1049
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1050
    for key, value in source.items():
1051
      env["INSTANCE_%s_%s" % (kind, key)] = value
1052

    
1053
  return env
1054

    
1055

    
1056
def _NICListToTuple(lu, nics):
1057
  """Build a list of nic information tuples.
1058

1059
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1060
  value in LUInstanceQueryData.
1061

1062
  @type lu:  L{LogicalUnit}
1063
  @param lu: the logical unit on whose behalf we execute
1064
  @type nics: list of L{objects.NIC}
1065
  @param nics: list of nics to convert to hooks tuples
1066

1067
  """
1068
  hooks_nics = []
1069
  cluster = lu.cfg.GetClusterInfo()
1070
  for nic in nics:
1071
    ip = nic.ip
1072
    mac = nic.mac
1073
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1074
    mode = filled_params[constants.NIC_MODE]
1075
    link = filled_params[constants.NIC_LINK]
1076
    hooks_nics.append((ip, mac, mode, link))
1077
  return hooks_nics
1078

    
1079

    
1080
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1081
  """Builds instance related env variables for hooks from an object.
1082

1083
  @type lu: L{LogicalUnit}
1084
  @param lu: the logical unit on whose behalf we execute
1085
  @type instance: L{objects.Instance}
1086
  @param instance: the instance for which we should build the
1087
      environment
1088
  @type override: dict
1089
  @param override: dictionary with key/values that will override
1090
      our values
1091
  @rtype: dict
1092
  @return: the hook environment dictionary
1093

1094
  """
1095
  cluster = lu.cfg.GetClusterInfo()
1096
  bep = cluster.FillBE(instance)
1097
  hvp = cluster.FillHV(instance)
1098
  args = {
1099
    "name": instance.name,
1100
    "primary_node": instance.primary_node,
1101
    "secondary_nodes": instance.secondary_nodes,
1102
    "os_type": instance.os,
1103
    "status": instance.admin_up,
1104
    "memory": bep[constants.BE_MEMORY],
1105
    "vcpus": bep[constants.BE_VCPUS],
1106
    "nics": _NICListToTuple(lu, instance.nics),
1107
    "disk_template": instance.disk_template,
1108
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1109
    "bep": bep,
1110
    "hvp": hvp,
1111
    "hypervisor_name": instance.hypervisor,
1112
    "tags": instance.tags,
1113
  }
1114
  if override:
1115
    args.update(override)
1116
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1117

    
1118

    
1119
def _AdjustCandidatePool(lu, exceptions):
1120
  """Adjust the candidate pool after node operations.
1121

1122
  """
1123
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1124
  if mod_list:
1125
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1126
               utils.CommaJoin(node.name for node in mod_list))
1127
    for name in mod_list:
1128
      lu.context.ReaddNode(name)
1129
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1130
  if mc_now > mc_max:
1131
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1132
               (mc_now, mc_max))
1133

    
1134

    
1135
def _DecideSelfPromotion(lu, exceptions=None):
1136
  """Decide whether I should promote myself as a master candidate.
1137

1138
  """
1139
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1140
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1141
  # the new node will increase mc_max with one, so:
1142
  mc_should = min(mc_should + 1, cp_size)
1143
  return mc_now < mc_should
1144

    
1145

    
1146
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1147
  """Check that the brigdes needed by a list of nics exist.
1148

1149
  """
1150
  cluster = lu.cfg.GetClusterInfo()
1151
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1152
  brlist = [params[constants.NIC_LINK] for params in paramslist
1153
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1154
  if brlist:
1155
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1156
    result.Raise("Error checking bridges on destination node '%s'" %
1157
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1158

    
1159

    
1160
def _CheckInstanceBridgesExist(lu, instance, node=None):
1161
  """Check that the brigdes needed by an instance exist.
1162

1163
  """
1164
  if node is None:
1165
    node = instance.primary_node
1166
  _CheckNicsBridgesExist(lu, instance.nics, node)
1167

    
1168

    
1169
def _CheckOSVariant(os_obj, name):
1170
  """Check whether an OS name conforms to the os variants specification.
1171

1172
  @type os_obj: L{objects.OS}
1173
  @param os_obj: OS object to check
1174
  @type name: string
1175
  @param name: OS name passed by the user, to check for validity
1176

1177
  """
1178
  variant = objects.OS.GetVariant(name)
1179
  if not os_obj.supported_variants:
1180
    if variant:
1181
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1182
                                 " passed)" % (os_obj.name, variant),
1183
                                 errors.ECODE_INVAL)
1184
    return
1185
  if not variant:
1186
    raise errors.OpPrereqError("OS name must include a variant",
1187
                               errors.ECODE_INVAL)
1188

    
1189
  if variant not in os_obj.supported_variants:
1190
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1191

    
1192

    
1193
def _GetNodeInstancesInner(cfg, fn):
1194
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1195

    
1196

    
1197
def _GetNodeInstances(cfg, node_name):
1198
  """Returns a list of all primary and secondary instances on a node.
1199

1200
  """
1201

    
1202
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1203

    
1204

    
1205
def _GetNodePrimaryInstances(cfg, node_name):
1206
  """Returns primary instances on a node.
1207

1208
  """
1209
  return _GetNodeInstancesInner(cfg,
1210
                                lambda inst: node_name == inst.primary_node)
1211

    
1212

    
1213
def _GetNodeSecondaryInstances(cfg, node_name):
1214
  """Returns secondary instances on a node.
1215

1216
  """
1217
  return _GetNodeInstancesInner(cfg,
1218
                                lambda inst: node_name in inst.secondary_nodes)
1219

    
1220

    
1221
def _GetStorageTypeArgs(cfg, storage_type):
1222
  """Returns the arguments for a storage type.
1223

1224
  """
1225
  # Special case for file storage
1226
  if storage_type == constants.ST_FILE:
1227
    # storage.FileStorage wants a list of storage directories
1228
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1229

    
1230
  return []
1231

    
1232

    
1233
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1234
  faulty = []
1235

    
1236
  for dev in instance.disks:
1237
    cfg.SetDiskID(dev, node_name)
1238

    
1239
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1240
  result.Raise("Failed to get disk status from node %s" % node_name,
1241
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1242

    
1243
  for idx, bdev_status in enumerate(result.payload):
1244
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1245
      faulty.append(idx)
1246

    
1247
  return faulty
1248

    
1249

    
1250
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1251
  """Check the sanity of iallocator and node arguments and use the
1252
  cluster-wide iallocator if appropriate.
1253

1254
  Check that at most one of (iallocator, node) is specified. If none is
1255
  specified, then the LU's opcode's iallocator slot is filled with the
1256
  cluster-wide default iallocator.
1257

1258
  @type iallocator_slot: string
1259
  @param iallocator_slot: the name of the opcode iallocator slot
1260
  @type node_slot: string
1261
  @param node_slot: the name of the opcode target node slot
1262

1263
  """
1264
  node = getattr(lu.op, node_slot, None)
1265
  iallocator = getattr(lu.op, iallocator_slot, None)
1266

    
1267
  if node is not None and iallocator is not None:
1268
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1269
                               errors.ECODE_INVAL)
1270
  elif node is None and iallocator is None:
1271
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1272
    if default_iallocator:
1273
      setattr(lu.op, iallocator_slot, default_iallocator)
1274
    else:
1275
      raise errors.OpPrereqError("No iallocator or node given and no"
1276
                                 " cluster-wide default iallocator found;"
1277
                                 " please specify either an iallocator or a"
1278
                                 " node, or set a cluster-wide default"
1279
                                 " iallocator")
1280

    
1281

    
1282
def _GetDefaultIAllocator(cfg, iallocator):
1283
  """Decides on which iallocator to use.
1284

1285
  @type cfg: L{config.ConfigWriter}
1286
  @param cfg: Cluster configuration object
1287
  @type iallocator: string or None
1288
  @param iallocator: Iallocator specified in opcode
1289
  @rtype: string
1290
  @return: Iallocator name
1291

1292
  """
1293
  if not iallocator:
1294
    # Use default iallocator
1295
    iallocator = cfg.GetDefaultIAllocator()
1296

    
1297
  if not iallocator:
1298
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1299
                               " opcode nor as a cluster-wide default",
1300
                               errors.ECODE_INVAL)
1301

    
1302
  return iallocator
1303

    
1304

    
1305
class LUClusterPostInit(LogicalUnit):
1306
  """Logical unit for running hooks after cluster initialization.
1307

1308
  """
1309
  HPATH = "cluster-init"
1310
  HTYPE = constants.HTYPE_CLUSTER
1311

    
1312
  def BuildHooksEnv(self):
1313
    """Build hooks env.
1314

1315
    """
1316
    return {
1317
      "OP_TARGET": self.cfg.GetClusterName(),
1318
      }
1319

    
1320
  def BuildHooksNodes(self):
1321
    """Build hooks nodes.
1322

1323
    """
1324
    return ([], [self.cfg.GetMasterNode()])
1325

    
1326
  def Exec(self, feedback_fn):
1327
    """Nothing to do.
1328

1329
    """
1330
    return True
1331

    
1332

    
1333
class LUClusterDestroy(LogicalUnit):
1334
  """Logical unit for destroying the cluster.
1335

1336
  """
1337
  HPATH = "cluster-destroy"
1338
  HTYPE = constants.HTYPE_CLUSTER
1339

    
1340
  def BuildHooksEnv(self):
1341
    """Build hooks env.
1342

1343
    """
1344
    return {
1345
      "OP_TARGET": self.cfg.GetClusterName(),
1346
      }
1347

    
1348
  def BuildHooksNodes(self):
1349
    """Build hooks nodes.
1350

1351
    """
1352
    return ([], [])
1353

    
1354
  def CheckPrereq(self):
1355
    """Check prerequisites.
1356

1357
    This checks whether the cluster is empty.
1358

1359
    Any errors are signaled by raising errors.OpPrereqError.
1360

1361
    """
1362
    master = self.cfg.GetMasterNode()
1363

    
1364
    nodelist = self.cfg.GetNodeList()
1365
    if len(nodelist) != 1 or nodelist[0] != master:
1366
      raise errors.OpPrereqError("There are still %d node(s) in"
1367
                                 " this cluster." % (len(nodelist) - 1),
1368
                                 errors.ECODE_INVAL)
1369
    instancelist = self.cfg.GetInstanceList()
1370
    if instancelist:
1371
      raise errors.OpPrereqError("There are still %d instance(s) in"
1372
                                 " this cluster." % len(instancelist),
1373
                                 errors.ECODE_INVAL)
1374

    
1375
  def Exec(self, feedback_fn):
1376
    """Destroys the cluster.
1377

1378
    """
1379
    master = self.cfg.GetMasterNode()
1380

    
1381
    # Run post hooks on master node before it's removed
1382
    _RunPostHook(self, master)
1383

    
1384
    result = self.rpc.call_node_deactivate_master_ip(master)
1385
    result.Raise("Could not disable the master role")
1386

    
1387
    return master
1388

    
1389

    
1390
def _VerifyCertificate(filename):
1391
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1392

1393
  @type filename: string
1394
  @param filename: Path to PEM file
1395

1396
  """
1397
  try:
1398
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1399
                                           utils.ReadFile(filename))
1400
  except Exception, err: # pylint: disable=W0703
1401
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1402
            "Failed to load X509 certificate %s: %s" % (filename, err))
1403

    
1404
  (errcode, msg) = \
1405
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1406
                                constants.SSL_CERT_EXPIRATION_ERROR)
1407

    
1408
  if msg:
1409
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1410
  else:
1411
    fnamemsg = None
1412

    
1413
  if errcode is None:
1414
    return (None, fnamemsg)
1415
  elif errcode == utils.CERT_WARNING:
1416
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1417
  elif errcode == utils.CERT_ERROR:
1418
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1419

    
1420
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1421

    
1422

    
1423
def _GetAllHypervisorParameters(cluster, instances):
1424
  """Compute the set of all hypervisor parameters.
1425

1426
  @type cluster: L{objects.Cluster}
1427
  @param cluster: the cluster object
1428
  @param instances: list of L{objects.Instance}
1429
  @param instances: additional instances from which to obtain parameters
1430
  @rtype: list of (origin, hypervisor, parameters)
1431
  @return: a list with all parameters found, indicating the hypervisor they
1432
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1433

1434
  """
1435
  hvp_data = []
1436

    
1437
  for hv_name in cluster.enabled_hypervisors:
1438
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1439

    
1440
  for os_name, os_hvp in cluster.os_hvp.items():
1441
    for hv_name, hv_params in os_hvp.items():
1442
      if hv_params:
1443
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1444
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1445

    
1446
  # TODO: collapse identical parameter values in a single one
1447
  for instance in instances:
1448
    if instance.hvparams:
1449
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1450
                       cluster.FillHV(instance)))
1451

    
1452
  return hvp_data
1453

    
1454

    
1455
class _VerifyErrors(object):
1456
  """Mix-in for cluster/group verify LUs.
1457

1458
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1459
  self.op and self._feedback_fn to be available.)
1460

1461
  """
1462
  TCLUSTER = "cluster"
1463
  TNODE = "node"
1464
  TINSTANCE = "instance"
1465

    
1466
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1467
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1468
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1469
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1470
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1471
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1472
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1473
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1474
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1475
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1476
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1477
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1478
  ENODEDRBD = (TNODE, "ENODEDRBD")
1479
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1480
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1481
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1482
  ENODEHV = (TNODE, "ENODEHV")
1483
  ENODELVM = (TNODE, "ENODELVM")
1484
  ENODEN1 = (TNODE, "ENODEN1")
1485
  ENODENET = (TNODE, "ENODENET")
1486
  ENODEOS = (TNODE, "ENODEOS")
1487
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1488
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1489
  ENODERPC = (TNODE, "ENODERPC")
1490
  ENODESSH = (TNODE, "ENODESSH")
1491
  ENODEVERSION = (TNODE, "ENODEVERSION")
1492
  ENODESETUP = (TNODE, "ENODESETUP")
1493
  ENODETIME = (TNODE, "ENODETIME")
1494
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1495

    
1496
  ETYPE_FIELD = "code"
1497
  ETYPE_ERROR = "ERROR"
1498
  ETYPE_WARNING = "WARNING"
1499

    
1500
  def _Error(self, ecode, item, msg, *args, **kwargs):
1501
    """Format an error message.
1502

1503
    Based on the opcode's error_codes parameter, either format a
1504
    parseable error code, or a simpler error string.
1505

1506
    This must be called only from Exec and functions called from Exec.
1507

1508
    """
1509
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1510
    itype, etxt = ecode
1511
    # first complete the msg
1512
    if args:
1513
      msg = msg % args
1514
    # then format the whole message
1515
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1516
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1517
    else:
1518
      if item:
1519
        item = " " + item
1520
      else:
1521
        item = ""
1522
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1523
    # and finally report it via the feedback_fn
1524
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1525

    
1526
  def _ErrorIf(self, cond, *args, **kwargs):
1527
    """Log an error message if the passed condition is True.
1528

1529
    """
1530
    cond = (bool(cond)
1531
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1532
    if cond:
1533
      self._Error(*args, **kwargs)
1534
    # do not mark the operation as failed for WARN cases only
1535
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1536
      self.bad = self.bad or cond
1537

    
1538

    
1539
class LUClusterVerify(NoHooksLU):
1540
  """Submits all jobs necessary to verify the cluster.
1541

1542
  """
1543
  REQ_BGL = False
1544

    
1545
  def ExpandNames(self):
1546
    self.needed_locks = {}
1547

    
1548
  def Exec(self, feedback_fn):
1549
    jobs = []
1550

    
1551
    if self.op.group_name:
1552
      groups = [self.op.group_name]
1553
      depends_fn = lambda: None
1554
    else:
1555
      groups = self.cfg.GetNodeGroupList()
1556

    
1557
      # Verify global configuration
1558
      jobs.append([opcodes.OpClusterVerifyConfig()])
1559

    
1560
      # Always depend on global verification
1561
      depends_fn = lambda: [(-len(jobs), [])]
1562

    
1563
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1564
                                              depends=depends_fn())]
1565
                for group in groups)
1566

    
1567
    # Fix up all parameters
1568
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1569
      op.debug_simulate_errors = self.op.debug_simulate_errors
1570
      op.verbose = self.op.verbose
1571
      op.error_codes = self.op.error_codes
1572
      try:
1573
        op.skip_checks = self.op.skip_checks
1574
      except AttributeError:
1575
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1576

    
1577
    return ResultWithJobs(jobs)
1578

    
1579

    
1580
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1581
  """Verifies the cluster config.
1582

1583
  """
1584
  REQ_BGL = False
1585

    
1586
  def _VerifyHVP(self, hvp_data):
1587
    """Verifies locally the syntax of the hypervisor parameters.
1588

1589
    """
1590
    for item, hv_name, hv_params in hvp_data:
1591
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1592
             (item, hv_name))
1593
      try:
1594
        hv_class = hypervisor.GetHypervisor(hv_name)
1595
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1596
        hv_class.CheckParameterSyntax(hv_params)
1597
      except errors.GenericError, err:
1598
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1599

    
1600
  def ExpandNames(self):
1601
    self.needed_locks = dict.fromkeys(locking.LEVELS, locking.ALL_SET)
1602
    self.share_locks = _ShareAll()
1603

    
1604
  def CheckPrereq(self):
1605
    """Check prerequisites.
1606

1607
    """
1608
    # Retrieve all information
1609
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1610
    self.all_node_info = self.cfg.GetAllNodesInfo()
1611
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1612

    
1613
  def Exec(self, feedback_fn):
1614
    """Verify integrity of cluster, performing various test on nodes.
1615

1616
    """
1617
    self.bad = False
1618
    self._feedback_fn = feedback_fn
1619

    
1620
    feedback_fn("* Verifying cluster config")
1621

    
1622
    for msg in self.cfg.VerifyConfig():
1623
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1624

    
1625
    feedback_fn("* Verifying cluster certificate files")
1626

    
1627
    for cert_filename in constants.ALL_CERT_FILES:
1628
      (errcode, msg) = _VerifyCertificate(cert_filename)
1629
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1630

    
1631
    feedback_fn("* Verifying hypervisor parameters")
1632

    
1633
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1634
                                                self.all_inst_info.values()))
1635

    
1636
    feedback_fn("* Verifying all nodes belong to an existing group")
1637

    
1638
    # We do this verification here because, should this bogus circumstance
1639
    # occur, it would never be caught by VerifyGroup, which only acts on
1640
    # nodes/instances reachable from existing node groups.
1641

    
1642
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1643
                         if node.group not in self.all_group_info)
1644

    
1645
    dangling_instances = {}
1646
    no_node_instances = []
1647

    
1648
    for inst in self.all_inst_info.values():
1649
      if inst.primary_node in dangling_nodes:
1650
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1651
      elif inst.primary_node not in self.all_node_info:
1652
        no_node_instances.append(inst.name)
1653

    
1654
    pretty_dangling = [
1655
        "%s (%s)" %
1656
        (node.name,
1657
         utils.CommaJoin(dangling_instances.get(node.name,
1658
                                                ["no instances"])))
1659
        for node in dangling_nodes]
1660

    
1661
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1662
                  "the following nodes (and their instances) belong to a non"
1663
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1664

    
1665
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1666
                  "the following instances have a non-existing primary-node:"
1667
                  " %s", utils.CommaJoin(no_node_instances))
1668

    
1669
    return not self.bad
1670

    
1671

    
1672
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1673
  """Verifies the status of a node group.
1674

1675
  """
1676
  HPATH = "cluster-verify"
1677
  HTYPE = constants.HTYPE_CLUSTER
1678
  REQ_BGL = False
1679

    
1680
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1681

    
1682
  class NodeImage(object):
1683
    """A class representing the logical and physical status of a node.
1684

1685
    @type name: string
1686
    @ivar name: the node name to which this object refers
1687
    @ivar volumes: a structure as returned from
1688
        L{ganeti.backend.GetVolumeList} (runtime)
1689
    @ivar instances: a list of running instances (runtime)
1690
    @ivar pinst: list of configured primary instances (config)
1691
    @ivar sinst: list of configured secondary instances (config)
1692
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1693
        instances for which this node is secondary (config)
1694
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1695
    @ivar dfree: free disk, as reported by the node (runtime)
1696
    @ivar offline: the offline status (config)
1697
    @type rpc_fail: boolean
1698
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1699
        not whether the individual keys were correct) (runtime)
1700
    @type lvm_fail: boolean
1701
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1702
    @type hyp_fail: boolean
1703
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1704
    @type ghost: boolean
1705
    @ivar ghost: whether this is a known node or not (config)
1706
    @type os_fail: boolean
1707
    @ivar os_fail: whether the RPC call didn't return valid OS data
1708
    @type oslist: list
1709
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1710
    @type vm_capable: boolean
1711
    @ivar vm_capable: whether the node can host instances
1712

1713
    """
1714
    def __init__(self, offline=False, name=None, vm_capable=True):
1715
      self.name = name
1716
      self.volumes = {}
1717
      self.instances = []
1718
      self.pinst = []
1719
      self.sinst = []
1720
      self.sbp = {}
1721
      self.mfree = 0
1722
      self.dfree = 0
1723
      self.offline = offline
1724
      self.vm_capable = vm_capable
1725
      self.rpc_fail = False
1726
      self.lvm_fail = False
1727
      self.hyp_fail = False
1728
      self.ghost = False
1729
      self.os_fail = False
1730
      self.oslist = {}
1731

    
1732
  def ExpandNames(self):
1733
    # This raises errors.OpPrereqError on its own:
1734
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1735

    
1736
    # Get instances in node group; this is unsafe and needs verification later
1737
    inst_names = \
1738
      self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1739

    
1740
    self.needed_locks = {
1741
      locking.LEVEL_INSTANCE: inst_names,
1742
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1743
      locking.LEVEL_NODE: [],
1744
      }
1745

    
1746
    self.share_locks = _ShareAll()
1747

    
1748
  def DeclareLocks(self, level):
1749
    if level == locking.LEVEL_NODE:
1750
      # Get members of node group; this is unsafe and needs verification later
1751
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1752

    
1753
      all_inst_info = self.cfg.GetAllInstancesInfo()
1754

    
1755
      # In Exec(), we warn about mirrored instances that have primary and
1756
      # secondary living in separate node groups. To fully verify that
1757
      # volumes for these instances are healthy, we will need to do an
1758
      # extra call to their secondaries. We ensure here those nodes will
1759
      # be locked.
1760
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1761
        # Important: access only the instances whose lock is owned
1762
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1763
          nodes.update(all_inst_info[inst].secondary_nodes)
1764

    
1765
      self.needed_locks[locking.LEVEL_NODE] = nodes
1766

    
1767
  def CheckPrereq(self):
1768
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1769
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1770

    
1771
    group_nodes = set(self.group_info.members)
1772
    group_instances = \
1773
      self.cfg.GetNodeGroupInstances(self.group_uuid, primary_only=True)
1774

    
1775
    unlocked_nodes = \
1776
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1777

    
1778
    unlocked_instances = \
1779
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1780

    
1781
    if unlocked_nodes:
1782
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1783
                                 utils.CommaJoin(unlocked_nodes),
1784
                                 errors.ECODE_STATE)
1785

    
1786
    if unlocked_instances:
1787
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1788
                                 utils.CommaJoin(unlocked_instances),
1789
                                 errors.ECODE_STATE)
1790

    
1791
    self.all_node_info = self.cfg.GetAllNodesInfo()
1792
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1793

    
1794
    self.my_node_names = utils.NiceSort(group_nodes)
1795
    self.my_inst_names = utils.NiceSort(group_instances)
1796

    
1797
    self.my_node_info = dict((name, self.all_node_info[name])
1798
                             for name in self.my_node_names)
1799

    
1800
    self.my_inst_info = dict((name, self.all_inst_info[name])
1801
                             for name in self.my_inst_names)
1802

    
1803
    # We detect here the nodes that will need the extra RPC calls for verifying
1804
    # split LV volumes; they should be locked.
1805
    extra_lv_nodes = set()
1806

    
1807
    for inst in self.my_inst_info.values():
1808
      if inst.disk_template in constants.DTS_INT_MIRROR:
1809
        for nname in inst.all_nodes:
1810
          if self.all_node_info[nname].group != self.group_uuid:
1811
            extra_lv_nodes.add(nname)
1812

    
1813
    unlocked_lv_nodes = \
1814
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1815

    
1816
    if unlocked_lv_nodes:
1817
      raise errors.OpPrereqError("Missing node locks for LV check: %s" %
1818
                                 utils.CommaJoin(unlocked_lv_nodes),
1819
                                 errors.ECODE_STATE)
1820
    self.extra_lv_nodes = list(extra_lv_nodes)
1821

    
1822
  def _VerifyNode(self, ninfo, nresult):
1823
    """Perform some basic validation on data returned from a node.
1824

1825
      - check the result data structure is well formed and has all the
1826
        mandatory fields
1827
      - check ganeti version
1828

1829
    @type ninfo: L{objects.Node}
1830
    @param ninfo: the node to check
1831
    @param nresult: the results from the node
1832
    @rtype: boolean
1833
    @return: whether overall this call was successful (and we can expect
1834
         reasonable values in the respose)
1835

1836
    """
1837
    node = ninfo.name
1838
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1839

    
1840
    # main result, nresult should be a non-empty dict
1841
    test = not nresult or not isinstance(nresult, dict)
1842
    _ErrorIf(test, self.ENODERPC, node,
1843
                  "unable to verify node: no data returned")
1844
    if test:
1845
      return False
1846

    
1847
    # compares ganeti version
1848
    local_version = constants.PROTOCOL_VERSION
1849
    remote_version = nresult.get("version", None)
1850
    test = not (remote_version and
1851
                isinstance(remote_version, (list, tuple)) and
1852
                len(remote_version) == 2)
1853
    _ErrorIf(test, self.ENODERPC, node,
1854
             "connection to node returned invalid data")
1855
    if test:
1856
      return False
1857

    
1858
    test = local_version != remote_version[0]
1859
    _ErrorIf(test, self.ENODEVERSION, node,
1860
             "incompatible protocol versions: master %s,"
1861
             " node %s", local_version, remote_version[0])
1862
    if test:
1863
      return False
1864

    
1865
    # node seems compatible, we can actually try to look into its results
1866

    
1867
    # full package version
1868
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1869
                  self.ENODEVERSION, node,
1870
                  "software version mismatch: master %s, node %s",
1871
                  constants.RELEASE_VERSION, remote_version[1],
1872
                  code=self.ETYPE_WARNING)
1873

    
1874
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1875
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1876
      for hv_name, hv_result in hyp_result.iteritems():
1877
        test = hv_result is not None
1878
        _ErrorIf(test, self.ENODEHV, node,
1879
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1880

    
1881
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1882
    if ninfo.vm_capable and isinstance(hvp_result, list):
1883
      for item, hv_name, hv_result in hvp_result:
1884
        _ErrorIf(True, self.ENODEHV, node,
1885
                 "hypervisor %s parameter verify failure (source %s): %s",
1886
                 hv_name, item, hv_result)
1887

    
1888
    test = nresult.get(constants.NV_NODESETUP,
1889
                       ["Missing NODESETUP results"])
1890
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1891
             "; ".join(test))
1892

    
1893
    return True
1894

    
1895
  def _VerifyNodeTime(self, ninfo, nresult,
1896
                      nvinfo_starttime, nvinfo_endtime):
1897
    """Check the node time.
1898

1899
    @type ninfo: L{objects.Node}
1900
    @param ninfo: the node to check
1901
    @param nresult: the remote results for the node
1902
    @param nvinfo_starttime: the start time of the RPC call
1903
    @param nvinfo_endtime: the end time of the RPC call
1904

1905
    """
1906
    node = ninfo.name
1907
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1908

    
1909
    ntime = nresult.get(constants.NV_TIME, None)
1910
    try:
1911
      ntime_merged = utils.MergeTime(ntime)
1912
    except (ValueError, TypeError):
1913
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1914
      return
1915

    
1916
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1917
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1918
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1919
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1920
    else:
1921
      ntime_diff = None
1922

    
1923
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1924
             "Node time diverges by at least %s from master node time",
1925
             ntime_diff)
1926

    
1927
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1928
    """Check the node LVM results.
1929

1930
    @type ninfo: L{objects.Node}
1931
    @param ninfo: the node to check
1932
    @param nresult: the remote results for the node
1933
    @param vg_name: the configured VG name
1934

1935
    """
1936
    if vg_name is None:
1937
      return
1938

    
1939
    node = ninfo.name
1940
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1941

    
1942
    # checks vg existence and size > 20G
1943
    vglist = nresult.get(constants.NV_VGLIST, None)
1944
    test = not vglist
1945
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1946
    if not test:
1947
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1948
                                            constants.MIN_VG_SIZE)
1949
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1950

    
1951
    # check pv names
1952
    pvlist = nresult.get(constants.NV_PVLIST, None)
1953
    test = pvlist is None
1954
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1955
    if not test:
1956
      # check that ':' is not present in PV names, since it's a
1957
      # special character for lvcreate (denotes the range of PEs to
1958
      # use on the PV)
1959
      for _, pvname, owner_vg in pvlist:
1960
        test = ":" in pvname
1961
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1962
                 " '%s' of VG '%s'", pvname, owner_vg)
1963

    
1964
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1965
    """Check the node bridges.
1966

1967
    @type ninfo: L{objects.Node}
1968
    @param ninfo: the node to check
1969
    @param nresult: the remote results for the node
1970
    @param bridges: the expected list of bridges
1971

1972
    """
1973
    if not bridges:
1974
      return
1975

    
1976
    node = ninfo.name
1977
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1978

    
1979
    missing = nresult.get(constants.NV_BRIDGES, None)
1980
    test = not isinstance(missing, list)
1981
    _ErrorIf(test, self.ENODENET, node,
1982
             "did not return valid bridge information")
1983
    if not test:
1984
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1985
               utils.CommaJoin(sorted(missing)))
1986

    
1987
  def _VerifyNodeNetwork(self, ninfo, nresult):
1988
    """Check the node network connectivity results.
1989

1990
    @type ninfo: L{objects.Node}
1991
    @param ninfo: the node to check
1992
    @param nresult: the remote results for the node
1993

1994
    """
1995
    node = ninfo.name
1996
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1997

    
1998
    test = constants.NV_NODELIST not in nresult
1999
    _ErrorIf(test, self.ENODESSH, node,
2000
             "node hasn't returned node ssh connectivity data")
2001
    if not test:
2002
      if nresult[constants.NV_NODELIST]:
2003
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2004
          _ErrorIf(True, self.ENODESSH, node,
2005
                   "ssh communication with node '%s': %s", a_node, a_msg)
2006

    
2007
    test = constants.NV_NODENETTEST not in nresult
2008
    _ErrorIf(test, self.ENODENET, node,
2009
             "node hasn't returned node tcp connectivity data")
2010
    if not test:
2011
      if nresult[constants.NV_NODENETTEST]:
2012
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2013
        for anode in nlist:
2014
          _ErrorIf(True, self.ENODENET, node,
2015
                   "tcp communication with node '%s': %s",
2016
                   anode, nresult[constants.NV_NODENETTEST][anode])
2017

    
2018
    test = constants.NV_MASTERIP not in nresult
2019
    _ErrorIf(test, self.ENODENET, node,
2020
             "node hasn't returned node master IP reachability data")
2021
    if not test:
2022
      if not nresult[constants.NV_MASTERIP]:
2023
        if node == self.master_node:
2024
          msg = "the master node cannot reach the master IP (not configured?)"
2025
        else:
2026
          msg = "cannot reach the master IP"
2027
        _ErrorIf(True, self.ENODENET, node, msg)
2028

    
2029
  def _VerifyInstance(self, instance, instanceconfig, node_image,
2030
                      diskstatus):
2031
    """Verify an instance.
2032

2033
    This function checks to see if the required block devices are
2034
    available on the instance's node.
2035

2036
    """
2037
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2038
    node_current = instanceconfig.primary_node
2039

    
2040
    node_vol_should = {}
2041
    instanceconfig.MapLVsByNode(node_vol_should)
2042

    
2043
    for node in node_vol_should:
2044
      n_img = node_image[node]
2045
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2046
        # ignore missing volumes on offline or broken nodes
2047
        continue
2048
      for volume in node_vol_should[node]:
2049
        test = volume not in n_img.volumes
2050
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2051
                 "volume %s missing on node %s", volume, node)
2052

    
2053
    if instanceconfig.admin_up:
2054
      pri_img = node_image[node_current]
2055
      test = instance not in pri_img.instances and not pri_img.offline
2056
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
2057
               "instance not running on its primary node %s",
2058
               node_current)
2059

    
2060
    diskdata = [(nname, success, status, idx)
2061
                for (nname, disks) in diskstatus.items()
2062
                for idx, (success, status) in enumerate(disks)]
2063

    
2064
    for nname, success, bdev_status, idx in diskdata:
2065
      # the 'ghost node' construction in Exec() ensures that we have a
2066
      # node here
2067
      snode = node_image[nname]
2068
      bad_snode = snode.ghost or snode.offline
2069
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2070
               self.EINSTANCEFAULTYDISK, instance,
2071
               "couldn't retrieve status for disk/%s on %s: %s",
2072
               idx, nname, bdev_status)
2073
      _ErrorIf((instanceconfig.admin_up and success and
2074
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2075
               self.EINSTANCEFAULTYDISK, instance,
2076
               "disk/%s on %s is faulty", idx, nname)
2077

    
2078
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2079
    """Verify if there are any unknown volumes in the cluster.
2080

2081
    The .os, .swap and backup volumes are ignored. All other volumes are
2082
    reported as unknown.
2083

2084
    @type reserved: L{ganeti.utils.FieldSet}
2085
    @param reserved: a FieldSet of reserved volume names
2086

2087
    """
2088
    for node, n_img in node_image.items():
2089
      if (n_img.offline or n_img.rpc_fail or n_img.lvm_fail or
2090
          self.all_node_info[node].group != self.group_uuid):
2091
        # skip non-healthy nodes
2092
        continue
2093
      for volume in n_img.volumes:
2094
        test = ((node not in node_vol_should or
2095
                volume not in node_vol_should[node]) and
2096
                not reserved.Matches(volume))
2097
        self._ErrorIf(test, self.ENODEORPHANLV, node,
2098
                      "volume %s is unknown", volume)
2099

    
2100
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2101
    """Verify N+1 Memory Resilience.
2102

2103
    Check that if one single node dies we can still start all the
2104
    instances it was primary for.
2105

2106
    """
2107
    cluster_info = self.cfg.GetClusterInfo()
2108
    for node, n_img in node_image.items():
2109
      # This code checks that every node which is now listed as
2110
      # secondary has enough memory to host all instances it is
2111
      # supposed to should a single other node in the cluster fail.
2112
      # FIXME: not ready for failover to an arbitrary node
2113
      # FIXME: does not support file-backed instances
2114
      # WARNING: we currently take into account down instances as well
2115
      # as up ones, considering that even if they're down someone
2116
      # might want to start them even in the event of a node failure.
2117
      if n_img.offline or self.all_node_info[node].group != self.group_uuid:
2118
        # we're skipping nodes marked offline and nodes in other groups from
2119
        # the N+1 warning, since most likely we don't have good memory
2120
        # infromation from them; we already list instances living on such
2121
        # nodes, and that's enough warning
2122
        continue
2123
      for prinode, instances in n_img.sbp.items():
2124
        needed_mem = 0
2125
        for instance in instances:
2126
          bep = cluster_info.FillBE(instance_cfg[instance])
2127
          if bep[constants.BE_AUTO_BALANCE]:
2128
            needed_mem += bep[constants.BE_MEMORY]
2129
        test = n_img.mfree < needed_mem
2130
        self._ErrorIf(test, self.ENODEN1, node,
2131
                      "not enough memory to accomodate instance failovers"
2132
                      " should node %s fail (%dMiB needed, %dMiB available)",
2133
                      prinode, needed_mem, n_img.mfree)
2134

    
2135
  @classmethod
2136
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2137
                   (files_all, files_opt, files_mc, files_vm)):
2138
    """Verifies file checksums collected from all nodes.
2139

2140
    @param errorif: Callback for reporting errors
2141
    @param nodeinfo: List of L{objects.Node} objects
2142
    @param master_node: Name of master node
2143
    @param all_nvinfo: RPC results
2144

2145
    """
2146
    # Define functions determining which nodes to consider for a file
2147
    files2nodefn = [
2148
      (files_all, None),
2149
      (files_mc, lambda node: (node.master_candidate or
2150
                               node.name == master_node)),
2151
      (files_vm, lambda node: node.vm_capable),
2152
      ]
2153

    
2154
    # Build mapping from filename to list of nodes which should have the file
2155
    nodefiles = {}
2156
    for (files, fn) in files2nodefn:
2157
      if fn is None:
2158
        filenodes = nodeinfo
2159
      else:
2160
        filenodes = filter(fn, nodeinfo)
2161
      nodefiles.update((filename,
2162
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2163
                       for filename in files)
2164

    
2165
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2166

    
2167
    fileinfo = dict((filename, {}) for filename in nodefiles)
2168
    ignore_nodes = set()
2169

    
2170
    for node in nodeinfo:
2171
      if node.offline:
2172
        ignore_nodes.add(node.name)
2173
        continue
2174

    
2175
      nresult = all_nvinfo[node.name]
2176

    
2177
      if nresult.fail_msg or not nresult.payload:
2178
        node_files = None
2179
      else:
2180
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2181

    
2182
      test = not (node_files and isinstance(node_files, dict))
2183
      errorif(test, cls.ENODEFILECHECK, node.name,
2184
              "Node did not return file checksum data")
2185
      if test:
2186
        ignore_nodes.add(node.name)
2187
        continue
2188

    
2189
      # Build per-checksum mapping from filename to nodes having it
2190
      for (filename, checksum) in node_files.items():
2191
        assert filename in nodefiles
2192
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2193

    
2194
    for (filename, checksums) in fileinfo.items():
2195
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2196

    
2197
      # Nodes having the file
2198
      with_file = frozenset(node_name
2199
                            for nodes in fileinfo[filename].values()
2200
                            for node_name in nodes) - ignore_nodes
2201

    
2202
      expected_nodes = nodefiles[filename] - ignore_nodes
2203

    
2204
      # Nodes missing file
2205
      missing_file = expected_nodes - with_file
2206

    
2207
      if filename in files_opt:
2208
        # All or no nodes
2209
        errorif(missing_file and missing_file != expected_nodes,
2210
                cls.ECLUSTERFILECHECK, None,
2211
                "File %s is optional, but it must exist on all or no"
2212
                " nodes (not found on %s)",
2213
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2214
      else:
2215
        # Non-optional files
2216
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2217
                "File %s is missing from node(s) %s", filename,
2218
                utils.CommaJoin(utils.NiceSort(missing_file)))
2219

    
2220
        # Warn if a node has a file it shouldn't
2221
        unexpected = with_file - expected_nodes
2222
        errorif(unexpected,
2223
                cls.ECLUSTERFILECHECK, None,
2224
                "File %s should not exist on node(s) %s",
2225
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2226

    
2227
      # See if there are multiple versions of the file
2228
      test = len(checksums) > 1
2229
      if test:
2230
        variants = ["variant %s on %s" %
2231
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2232
                    for (idx, (checksum, nodes)) in
2233
                      enumerate(sorted(checksums.items()))]
2234
      else:
2235
        variants = []
2236

    
2237
      errorif(test, cls.ECLUSTERFILECHECK, None,
2238
              "File %s found with %s different checksums (%s)",
2239
              filename, len(checksums), "; ".join(variants))
2240

    
2241
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2242
                      drbd_map):
2243
    """Verifies and the node DRBD status.
2244

2245
    @type ninfo: L{objects.Node}
2246
    @param ninfo: the node to check
2247
    @param nresult: the remote results for the node
2248
    @param instanceinfo: the dict of instances
2249
    @param drbd_helper: the configured DRBD usermode helper
2250
    @param drbd_map: the DRBD map as returned by
2251
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2252

2253
    """
2254
    node = ninfo.name
2255
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2256

    
2257
    if drbd_helper:
2258
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2259
      test = (helper_result == None)
2260
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2261
               "no drbd usermode helper returned")
2262
      if helper_result:
2263
        status, payload = helper_result
2264
        test = not status
2265
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2266
                 "drbd usermode helper check unsuccessful: %s", payload)
2267
        test = status and (payload != drbd_helper)
2268
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2269
                 "wrong drbd usermode helper: %s", payload)
2270

    
2271
    # compute the DRBD minors
2272
    node_drbd = {}
2273
    for minor, instance in drbd_map[node].items():
2274
      test = instance not in instanceinfo
2275
      _ErrorIf(test, self.ECLUSTERCFG, None,
2276
               "ghost instance '%s' in temporary DRBD map", instance)
2277
        # ghost instance should not be running, but otherwise we
2278
        # don't give double warnings (both ghost instance and
2279
        # unallocated minor in use)
2280
      if test:
2281
        node_drbd[minor] = (instance, False)
2282
      else:
2283
        instance = instanceinfo[instance]
2284
        node_drbd[minor] = (instance.name, instance.admin_up)
2285

    
2286
    # and now check them
2287
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2288
    test = not isinstance(used_minors, (tuple, list))
2289
    _ErrorIf(test, self.ENODEDRBD, node,
2290
             "cannot parse drbd status file: %s", str(used_minors))
2291
    if test:
2292
      # we cannot check drbd status
2293
      return
2294

    
2295
    for minor, (iname, must_exist) in node_drbd.items():
2296
      test = minor not in used_minors and must_exist
2297
      _ErrorIf(test, self.ENODEDRBD, node,
2298
               "drbd minor %d of instance %s is not active", minor, iname)
2299
    for minor in used_minors:
2300
      test = minor not in node_drbd
2301
      _ErrorIf(test, self.ENODEDRBD, node,
2302
               "unallocated drbd minor %d is in use", minor)
2303

    
2304
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2305
    """Builds the node OS structures.
2306

2307
    @type ninfo: L{objects.Node}
2308
    @param ninfo: the node to check
2309
    @param nresult: the remote results for the node
2310
    @param nimg: the node image object
2311

2312
    """
2313
    node = ninfo.name
2314
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2315

    
2316
    remote_os = nresult.get(constants.NV_OSLIST, None)
2317
    test = (not isinstance(remote_os, list) or
2318
            not compat.all(isinstance(v, list) and len(v) == 7
2319
                           for v in remote_os))
2320

    
2321
    _ErrorIf(test, self.ENODEOS, node,
2322
             "node hasn't returned valid OS data")
2323

    
2324
    nimg.os_fail = test
2325

    
2326
    if test:
2327
      return
2328

    
2329
    os_dict = {}
2330

    
2331
    for (name, os_path, status, diagnose,
2332
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2333

    
2334
      if name not in os_dict:
2335
        os_dict[name] = []
2336

    
2337
      # parameters is a list of lists instead of list of tuples due to
2338
      # JSON lacking a real tuple type, fix it:
2339
      parameters = [tuple(v) for v in parameters]
2340
      os_dict[name].append((os_path, status, diagnose,
2341
                            set(variants), set(parameters), set(api_ver)))
2342

    
2343
    nimg.oslist = os_dict
2344

    
2345
  def _VerifyNodeOS(self, ninfo, nimg, base):
2346
    """Verifies the node OS list.
2347

2348
    @type ninfo: L{objects.Node}
2349
    @param ninfo: the node to check
2350
    @param nimg: the node image object
2351
    @param base: the 'template' node we match against (e.g. from the master)
2352

2353
    """
2354
    node = ninfo.name
2355
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2356

    
2357
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2358

    
2359
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2360
    for os_name, os_data in nimg.oslist.items():
2361
      assert os_data, "Empty OS status for OS %s?!" % os_name
2362
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2363
      _ErrorIf(not f_status, self.ENODEOS, node,
2364
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2365
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2366
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2367
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2368
      # comparisons with the 'base' image
2369
      test = os_name not in base.oslist
2370
      _ErrorIf(test, self.ENODEOS, node,
2371
               "Extra OS %s not present on reference node (%s)",
2372
               os_name, base.name)
2373
      if test:
2374
        continue
2375
      assert base.oslist[os_name], "Base node has empty OS status?"
2376
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2377
      if not b_status:
2378
        # base OS is invalid, skipping
2379
        continue
2380
      for kind, a, b in [("API version", f_api, b_api),
2381
                         ("variants list", f_var, b_var),
2382
                         ("parameters", beautify_params(f_param),
2383
                          beautify_params(b_param))]:
2384
        _ErrorIf(a != b, self.ENODEOS, node,
2385
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2386
                 kind, os_name, base.name,
2387
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2388

    
2389
    # check any missing OSes
2390
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2391
    _ErrorIf(missing, self.ENODEOS, node,
2392
             "OSes present on reference node %s but missing on this node: %s",
2393
             base.name, utils.CommaJoin(missing))
2394

    
2395
  def _VerifyOob(self, ninfo, nresult):
2396
    """Verifies out of band functionality of a node.
2397

2398
    @type ninfo: L{objects.Node}
2399
    @param ninfo: the node to check
2400
    @param nresult: the remote results for the node
2401

2402
    """
2403
    node = ninfo.name
2404
    # We just have to verify the paths on master and/or master candidates
2405
    # as the oob helper is invoked on the master
2406
    if ((ninfo.master_candidate or ninfo.master_capable) and
2407
        constants.NV_OOB_PATHS in nresult):
2408
      for path_result in nresult[constants.NV_OOB_PATHS]:
2409
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2410

    
2411
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2412
    """Verifies and updates the node volume data.
2413

2414
    This function will update a L{NodeImage}'s internal structures
2415
    with data from the remote call.
2416

2417
    @type ninfo: L{objects.Node}
2418
    @param ninfo: the node to check
2419
    @param nresult: the remote results for the node
2420
    @param nimg: the node image object
2421
    @param vg_name: the configured VG name
2422

2423
    """
2424
    node = ninfo.name
2425
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2426

    
2427
    nimg.lvm_fail = True
2428
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2429
    if vg_name is None:
2430
      pass
2431
    elif isinstance(lvdata, basestring):
2432
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2433
               utils.SafeEncode(lvdata))
2434
    elif not isinstance(lvdata, dict):
2435
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2436
    else:
2437
      nimg.volumes = lvdata
2438
      nimg.lvm_fail = False
2439

    
2440
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2441
    """Verifies and updates the node instance list.
2442

2443
    If the listing was successful, then updates this node's instance
2444
    list. Otherwise, it marks the RPC call as failed for the instance
2445
    list key.
2446

2447
    @type ninfo: L{objects.Node}
2448
    @param ninfo: the node to check
2449
    @param nresult: the remote results for the node
2450
    @param nimg: the node image object
2451

2452
    """
2453
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2454
    test = not isinstance(idata, list)
2455
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2456
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2457
    if test:
2458
      nimg.hyp_fail = True
2459
    else:
2460
      nimg.instances = idata
2461

    
2462
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2463
    """Verifies and computes a node information map
2464

2465
    @type ninfo: L{objects.Node}
2466
    @param ninfo: the node to check
2467
    @param nresult: the remote results for the node
2468
    @param nimg: the node image object
2469
    @param vg_name: the configured VG name
2470

2471
    """
2472
    node = ninfo.name
2473
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2474

    
2475
    # try to read free memory (from the hypervisor)
2476
    hv_info = nresult.get(constants.NV_HVINFO, None)
2477
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2478
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2479
    if not test:
2480
      try:
2481
        nimg.mfree = int(hv_info["memory_free"])
2482
      except (ValueError, TypeError):
2483
        _ErrorIf(True, self.ENODERPC, node,
2484
                 "node returned invalid nodeinfo, check hypervisor")
2485

    
2486
    # FIXME: devise a free space model for file based instances as well
2487
    if vg_name is not None:
2488
      test = (constants.NV_VGLIST not in nresult or
2489
              vg_name not in nresult[constants.NV_VGLIST])
2490
      _ErrorIf(test, self.ENODELVM, node,
2491
               "node didn't return data for the volume group '%s'"
2492
               " - it is either missing or broken", vg_name)
2493
      if not test:
2494
        try:
2495
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2496
        except (ValueError, TypeError):
2497
          _ErrorIf(True, self.ENODERPC, node,
2498
                   "node returned invalid LVM info, check LVM status")
2499

    
2500
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2501
    """Gets per-disk status information for all instances.
2502

2503
    @type nodelist: list of strings
2504
    @param nodelist: Node names
2505
    @type node_image: dict of (name, L{objects.Node})
2506
    @param node_image: Node objects
2507
    @type instanceinfo: dict of (name, L{objects.Instance})
2508
    @param instanceinfo: Instance objects
2509
    @rtype: {instance: {node: [(succes, payload)]}}
2510
    @return: a dictionary of per-instance dictionaries with nodes as
2511
        keys and disk information as values; the disk information is a
2512
        list of tuples (success, payload)
2513

2514
    """
2515
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2516

    
2517
    node_disks = {}
2518
    node_disks_devonly = {}
2519
    diskless_instances = set()
2520
    diskless = constants.DT_DISKLESS
2521

    
2522
    for nname in nodelist:
2523
      node_instances = list(itertools.chain(node_image[nname].pinst,
2524
                                            node_image[nname].sinst))
2525
      diskless_instances.update(inst for inst in node_instances
2526
                                if instanceinfo[inst].disk_template == diskless)
2527
      disks = [(inst, disk)
2528
               for inst in node_instances
2529
               for disk in instanceinfo[inst].disks]
2530

    
2531
      if not disks:
2532
        # No need to collect data
2533
        continue
2534

    
2535
      node_disks[nname] = disks
2536

    
2537
      # Creating copies as SetDiskID below will modify the objects and that can
2538
      # lead to incorrect data returned from nodes
2539
      devonly = [dev.Copy() for (_, dev) in disks]
2540

    
2541
      for dev in devonly:
2542
        self.cfg.SetDiskID(dev, nname)
2543

    
2544
      node_disks_devonly[nname] = devonly
2545

    
2546
    assert len(node_disks) == len(node_disks_devonly)
2547

    
2548
    # Collect data from all nodes with disks
2549
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2550
                                                          node_disks_devonly)
2551

    
2552
    assert len(result) == len(node_disks)
2553

    
2554
    instdisk = {}
2555

    
2556
    for (nname, nres) in result.items():
2557
      disks = node_disks[nname]
2558

    
2559
      if nres.offline:
2560
        # No data from this node
2561
        data = len(disks) * [(False, "node offline")]
2562
      else:
2563
        msg = nres.fail_msg
2564
        _ErrorIf(msg, self.ENODERPC, nname,
2565
                 "while getting disk information: %s", msg)
2566
        if msg:
2567
          # No data from this node
2568
          data = len(disks) * [(False, msg)]
2569
        else:
2570
          data = []
2571
          for idx, i in enumerate(nres.payload):
2572
            if isinstance(i, (tuple, list)) and len(i) == 2:
2573
              data.append(i)
2574
            else:
2575
              logging.warning("Invalid result from node %s, entry %d: %s",
2576
                              nname, idx, i)
2577
              data.append((False, "Invalid result from the remote node"))
2578

    
2579
      for ((inst, _), status) in zip(disks, data):
2580
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2581

    
2582
    # Add empty entries for diskless instances.
2583
    for inst in diskless_instances:
2584
      assert inst not in instdisk
2585
      instdisk[inst] = {}
2586

    
2587
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2588
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2589
                      compat.all(isinstance(s, (tuple, list)) and
2590
                                 len(s) == 2 for s in statuses)
2591
                      for inst, nnames in instdisk.items()
2592
                      for nname, statuses in nnames.items())
2593
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2594

    
2595
    return instdisk
2596

    
2597
  @staticmethod
2598
  def _SshNodeSelector(group_uuid, all_nodes):
2599
    """Create endless iterators for all potential SSH check hosts.
2600

2601
    """
2602
    nodes = [node for node in all_nodes
2603
             if (node.group != group_uuid and
2604
                 not node.offline)]
2605
    keyfunc = operator.attrgetter("group")
2606

    
2607
    return map(itertools.cycle,
2608
               [sorted(map(operator.attrgetter("name"), names))
2609
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2610
                                                  keyfunc)])
2611

    
2612
  @classmethod
2613
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2614
    """Choose which nodes should talk to which other nodes.
2615

2616
    We will make nodes contact all nodes in their group, and one node from
2617
    every other group.
2618

2619
    @warning: This algorithm has a known issue if one node group is much
2620
      smaller than others (e.g. just one node). In such a case all other
2621
      nodes will talk to the single node.
2622

2623
    """
2624
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2625
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2626

    
2627
    return (online_nodes,
2628
            dict((name, sorted([i.next() for i in sel]))
2629
                 for name in online_nodes))
2630

    
2631
  def BuildHooksEnv(self):
2632
    """Build hooks env.
2633

2634
    Cluster-Verify hooks just ran in the post phase and their failure makes
2635
    the output be logged in the verify output and the verification to fail.
2636

2637
    """
2638
    env = {
2639
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2640
      }
2641

    
2642
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2643
               for node in self.my_node_info.values())
2644

    
2645
    return env
2646

    
2647
  def BuildHooksNodes(self):
2648
    """Build hooks nodes.
2649

2650
    """
2651
    return ([], self.my_node_names)
2652

    
2653
  def Exec(self, feedback_fn):
2654
    """Verify integrity of the node group, performing various test on nodes.
2655

2656
    """
2657
    # This method has too many local variables. pylint: disable=R0914
2658
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2659

    
2660
    if not self.my_node_names:
2661
      # empty node group
2662
      feedback_fn("* Empty node group, skipping verification")
2663
      return True
2664

    
2665
    self.bad = False
2666
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2667
    verbose = self.op.verbose
2668
    self._feedback_fn = feedback_fn
2669

    
2670
    vg_name = self.cfg.GetVGName()
2671
    drbd_helper = self.cfg.GetDRBDHelper()
2672
    cluster = self.cfg.GetClusterInfo()
2673
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2674
    hypervisors = cluster.enabled_hypervisors
2675
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2676

    
2677
    i_non_redundant = [] # Non redundant instances
2678
    i_non_a_balanced = [] # Non auto-balanced instances
2679
    n_offline = 0 # Count of offline nodes
2680
    n_drained = 0 # Count of nodes being drained
2681
    node_vol_should = {}
2682

    
2683
    # FIXME: verify OS list
2684

    
2685
    # File verification
2686
    filemap = _ComputeAncillaryFiles(cluster, False)
2687

    
2688
    # do local checksums
2689
    master_node = self.master_node = self.cfg.GetMasterNode()
2690
    master_ip = self.cfg.GetMasterIP()
2691

    
2692
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2693

    
2694
    node_verify_param = {
2695
      constants.NV_FILELIST:
2696
        utils.UniqueSequence(filename
2697
                             for files in filemap
2698
                             for filename in files),
2699
      constants.NV_NODELIST:
2700
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2701
                                  self.all_node_info.values()),
2702
      constants.NV_HYPERVISOR: hypervisors,
2703
      constants.NV_HVPARAMS:
2704
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2705
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2706
                                 for node in node_data_list
2707
                                 if not node.offline],
2708
      constants.NV_INSTANCELIST: hypervisors,
2709
      constants.NV_VERSION: None,
2710
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2711
      constants.NV_NODESETUP: None,
2712
      constants.NV_TIME: None,
2713
      constants.NV_MASTERIP: (master_node, master_ip),
2714
      constants.NV_OSLIST: None,
2715
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2716
      }
2717

    
2718
    if vg_name is not None:
2719
      node_verify_param[constants.NV_VGLIST] = None
2720
      node_verify_param[constants.NV_LVLIST] = vg_name
2721
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2722
      node_verify_param[constants.NV_DRBDLIST] = None
2723

    
2724
    if drbd_helper:
2725
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2726

    
2727
    # bridge checks
2728
    # FIXME: this needs to be changed per node-group, not cluster-wide
2729
    bridges = set()
2730
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2731
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2732
      bridges.add(default_nicpp[constants.NIC_LINK])
2733
    for instance in self.my_inst_info.values():
2734
      for nic in instance.nics:
2735
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2736
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2737
          bridges.add(full_nic[constants.NIC_LINK])
2738

    
2739
    if bridges:
2740
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2741

    
2742
    # Build our expected cluster state
2743
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2744
                                                 name=node.name,
2745
                                                 vm_capable=node.vm_capable))
2746
                      for node in node_data_list)
2747

    
2748
    # Gather OOB paths
2749
    oob_paths = []
2750
    for node in self.all_node_info.values():
2751
      path = _SupportsOob(self.cfg, node)
2752
      if path and path not in oob_paths:
2753
        oob_paths.append(path)
2754

    
2755
    if oob_paths:
2756
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2757

    
2758
    for instance in self.my_inst_names:
2759
      inst_config = self.my_inst_info[instance]
2760

    
2761
      for nname in inst_config.all_nodes:
2762
        if nname not in node_image:
2763
          gnode = self.NodeImage(name=nname)
2764
          gnode.ghost = (nname not in self.all_node_info)
2765
          node_image[nname] = gnode
2766

    
2767
      inst_config.MapLVsByNode(node_vol_should)
2768

    
2769
      pnode = inst_config.primary_node
2770
      node_image[pnode].pinst.append(instance)
2771

    
2772
      for snode in inst_config.secondary_nodes:
2773
        nimg = node_image[snode]
2774
        nimg.sinst.append(instance)
2775
        if pnode not in nimg.sbp:
2776
          nimg.sbp[pnode] = []
2777
        nimg.sbp[pnode].append(instance)
2778

    
2779
    # At this point, we have the in-memory data structures complete,
2780
    # except for the runtime information, which we'll gather next
2781

    
2782
    # Due to the way our RPC system works, exact response times cannot be
2783
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2784
    # time before and after executing the request, we can at least have a time
2785
    # window.
2786
    nvinfo_starttime = time.time()
2787
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2788
                                           node_verify_param,
2789
                                           self.cfg.GetClusterName())
2790
    nvinfo_endtime = time.time()
2791

    
2792
    if self.extra_lv_nodes and vg_name is not None:
2793
      extra_lv_nvinfo = \
2794
          self.rpc.call_node_verify(self.extra_lv_nodes,
2795
                                    {constants.NV_LVLIST: vg_name},
2796
                                    self.cfg.GetClusterName())
2797
    else:
2798
      extra_lv_nvinfo = {}
2799

    
2800
    all_drbd_map = self.cfg.ComputeDRBDMap()
2801

    
2802
    feedback_fn("* Gathering disk information (%s nodes)" %
2803
                len(self.my_node_names))
2804
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2805
                                     self.my_inst_info)
2806

    
2807
    feedback_fn("* Verifying configuration file consistency")
2808

    
2809
    # If not all nodes are being checked, we need to make sure the master node
2810
    # and a non-checked vm_capable node are in the list.
2811
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2812
    if absent_nodes:
2813
      vf_nvinfo = all_nvinfo.copy()
2814
      vf_node_info = list(self.my_node_info.values())
2815
      additional_nodes = []
2816
      if master_node not in self.my_node_info:
2817
        additional_nodes.append(master_node)
2818
        vf_node_info.append(self.all_node_info[master_node])
2819
      # Add the first vm_capable node we find which is not included
2820
      for node in absent_nodes:
2821
        nodeinfo = self.all_node_info[node]
2822
        if nodeinfo.vm_capable and not nodeinfo.offline:
2823
          additional_nodes.append(node)
2824
          vf_node_info.append(self.all_node_info[node])
2825
          break
2826
      key = constants.NV_FILELIST
2827
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2828
                                                 {key: node_verify_param[key]},
2829
                                                 self.cfg.GetClusterName()))
2830
    else:
2831
      vf_nvinfo = all_nvinfo
2832
      vf_node_info = self.my_node_info.values()
2833

    
2834
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2835

    
2836
    feedback_fn("* Verifying node status")
2837

    
2838
    refos_img = None
2839

    
2840
    for node_i in node_data_list:
2841
      node = node_i.name
2842
      nimg = node_image[node]
2843

    
2844
      if node_i.offline:
2845
        if verbose:
2846
          feedback_fn("* Skipping offline node %s" % (node,))
2847
        n_offline += 1
2848
        continue
2849

    
2850
      if node == master_node:
2851
        ntype = "master"
2852
      elif node_i.master_candidate:
2853
        ntype = "master candidate"
2854
      elif node_i.drained:
2855
        ntype = "drained"
2856
        n_drained += 1
2857
      else:
2858
        ntype = "regular"
2859
      if verbose:
2860
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2861

    
2862
      msg = all_nvinfo[node].fail_msg
2863
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2864
      if msg:
2865
        nimg.rpc_fail = True
2866
        continue
2867

    
2868
      nresult = all_nvinfo[node].payload
2869

    
2870
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2871
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2872
      self._VerifyNodeNetwork(node_i, nresult)
2873
      self._VerifyOob(node_i, nresult)
2874

    
2875
      if nimg.vm_capable:
2876
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2877
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2878
                             all_drbd_map)
2879

    
2880
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2881
        self._UpdateNodeInstances(node_i, nresult, nimg)
2882
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2883
        self._UpdateNodeOS(node_i, nresult, nimg)
2884

    
2885
        if not nimg.os_fail:
2886
          if refos_img is None:
2887
            refos_img = nimg
2888
          self._VerifyNodeOS(node_i, nimg, refos_img)
2889
        self._VerifyNodeBridges(node_i, nresult, bridges)
2890

    
2891
        # Check whether all running instancies are primary for the node. (This
2892
        # can no longer be done from _VerifyInstance below, since some of the
2893
        # wrong instances could be from other node groups.)
2894
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2895

    
2896
        for inst in non_primary_inst:
2897
          test = inst in self.all_inst_info
2898
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2899
                   "instance should not run on node %s", node_i.name)
2900
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2901
                   "node is running unknown instance %s", inst)
2902

    
2903
    for node, result in extra_lv_nvinfo.items():
2904
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2905
                              node_image[node], vg_name)
2906

    
2907
    feedback_fn("* Verifying instance status")
2908
    for instance in self.my_inst_names:
2909
      if verbose:
2910
        feedback_fn("* Verifying instance %s" % instance)
2911
      inst_config = self.my_inst_info[instance]
2912
      self._VerifyInstance(instance, inst_config, node_image,
2913
                           instdisk[instance])
2914
      inst_nodes_offline = []
2915

    
2916
      pnode = inst_config.primary_node
2917
      pnode_img = node_image[pnode]
2918
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2919
               self.ENODERPC, pnode, "instance %s, connection to"
2920
               " primary node failed", instance)
2921

    
2922
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2923
               self.EINSTANCEBADNODE, instance,
2924
               "instance is marked as running and lives on offline node %s",
2925
               inst_config.primary_node)
2926

    
2927
      # If the instance is non-redundant we cannot survive losing its primary
2928
      # node, so we are not N+1 compliant. On the other hand we have no disk
2929
      # templates with more than one secondary so that situation is not well
2930
      # supported either.
2931
      # FIXME: does not support file-backed instances
2932
      if not inst_config.secondary_nodes:
2933
        i_non_redundant.append(instance)
2934

    
2935
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2936
               instance, "instance has multiple secondary nodes: %s",
2937
               utils.CommaJoin(inst_config.secondary_nodes),
2938
               code=self.ETYPE_WARNING)
2939

    
2940
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2941
        pnode = inst_config.primary_node
2942
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2943
        instance_groups = {}
2944

    
2945
        for node in instance_nodes:
2946
          instance_groups.setdefault(self.all_node_info[node].group,
2947
                                     []).append(node)
2948

    
2949
        pretty_list = [
2950
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2951
          # Sort so that we always list the primary node first.
2952
          for group, nodes in sorted(instance_groups.items(),
2953
                                     key=lambda (_, nodes): pnode in nodes,
2954
                                     reverse=True)]
2955

    
2956
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2957
                      instance, "instance has primary and secondary nodes in"
2958
                      " different groups: %s", utils.CommaJoin(pretty_list),
2959
                      code=self.ETYPE_WARNING)
2960

    
2961
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2962
        i_non_a_balanced.append(instance)
2963

    
2964
      for snode in inst_config.secondary_nodes:
2965
        s_img = node_image[snode]
2966
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2967
                 "instance %s, connection to secondary node failed", instance)
2968

    
2969
        if s_img.offline:
2970
          inst_nodes_offline.append(snode)
2971

    
2972
      # warn that the instance lives on offline nodes
2973
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2974
               "instance has offline secondary node(s) %s",
2975
               utils.CommaJoin(inst_nodes_offline))
2976
      # ... or ghost/non-vm_capable nodes
2977
      for node in inst_config.all_nodes:
2978
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2979
                 "instance lives on ghost node %s", node)
2980
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2981
                 instance, "instance lives on non-vm_capable node %s", node)
2982

    
2983
    feedback_fn("* Verifying orphan volumes")
2984
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2985

    
2986
    # We will get spurious "unknown volume" warnings if any node of this group
2987
    # is secondary for an instance whose primary is in another group. To avoid
2988
    # them, we find these instances and add their volumes to node_vol_should.
2989
    for inst in self.all_inst_info.values():
2990
      for secondary in inst.secondary_nodes:
2991
        if (secondary in self.my_node_info
2992
            and inst.name not in self.my_inst_info):
2993
          inst.MapLVsByNode(node_vol_should)
2994
          break
2995

    
2996
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2997

    
2998
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2999
      feedback_fn("* Verifying N+1 Memory redundancy")
3000
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3001

    
3002
    feedback_fn("* Other Notes")
3003
    if i_non_redundant:
3004
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3005
                  % len(i_non_redundant))
3006

    
3007
    if i_non_a_balanced:
3008
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3009
                  % len(i_non_a_balanced))
3010

    
3011
    if n_offline:
3012
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3013

    
3014
    if n_drained:
3015
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3016

    
3017
    return not self.bad
3018

    
3019
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3020
    """Analyze the post-hooks' result
3021

3022
    This method analyses the hook result, handles it, and sends some
3023
    nicely-formatted feedback back to the user.
3024

3025
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
3026
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3027
    @param hooks_results: the results of the multi-node hooks rpc call
3028
    @param feedback_fn: function used send feedback back to the caller
3029
    @param lu_result: previous Exec result
3030
    @return: the new Exec result, based on the previous result
3031
        and hook results
3032

3033
    """
3034
    # We only really run POST phase hooks, only for non-empty groups,
3035
    # and are only interested in their results
3036
    if not self.my_node_names:
3037
      # empty node group
3038
      pass
3039
    elif phase == constants.HOOKS_PHASE_POST:
3040
      # Used to change hooks' output to proper indentation
3041
      feedback_fn("* Hooks Results")
3042
      assert hooks_results, "invalid result from hooks"
3043

    
3044
      for node_name in hooks_results:
3045
        res = hooks_results[node_name]
3046
        msg = res.fail_msg
3047
        test = msg and not res.offline
3048
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
3049
                      "Communication failure in hooks execution: %s", msg)
3050
        if res.offline or msg:
3051
          # No need to investigate payload if node is offline or gave
3052
          # an error.
3053
          continue
3054
        for script, hkr, output in res.payload:
3055
          test = hkr == constants.HKR_FAIL
3056
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
3057
                        "Script %s failed, output:", script)
3058
          if test:
3059
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3060
            feedback_fn("%s" % output)
3061
            lu_result = False
3062

    
3063
    return lu_result
3064

    
3065

    
3066
class LUClusterVerifyDisks(NoHooksLU):
3067
  """Verifies the cluster disks status.
3068

3069
  """
3070
  REQ_BGL = False
3071

    
3072
  def ExpandNames(self):
3073
    self.share_locks = _ShareAll()
3074
    self.needed_locks = {
3075
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3076
      }
3077

    
3078
  def Exec(self, feedback_fn):
3079
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3080

    
3081
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3082
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3083
                           for group in group_names])
3084

    
3085

    
3086
class LUGroupVerifyDisks(NoHooksLU):
3087
  """Verifies the status of all disks in a node group.
3088

3089
  """
3090
  REQ_BGL = False
3091

    
3092
  def ExpandNames(self):
3093
    # Raises errors.OpPrereqError on its own if group can't be found
3094
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3095

    
3096
    self.share_locks = _ShareAll()
3097
    self.needed_locks = {
3098
      locking.LEVEL_INSTANCE: [],
3099
      locking.LEVEL_NODEGROUP: [],
3100
      locking.LEVEL_NODE: [],
3101
      }
3102

    
3103
  def DeclareLocks(self, level):
3104
    if level == locking.LEVEL_INSTANCE:
3105
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3106

    
3107
      # Lock instances optimistically, needs verification once node and group
3108
      # locks have been acquired
3109
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3110
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3111

    
3112
    elif level == locking.LEVEL_NODEGROUP:
3113
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3114

    
3115
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3116
        set([self.group_uuid] +
3117
            # Lock all groups used by instances optimistically; this requires
3118
            # going via the node before it's locked, requiring verification
3119
            # later on
3120
            [group_uuid
3121
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3122
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3123

    
3124
    elif level == locking.LEVEL_NODE:
3125
      # This will only lock the nodes in the group to be verified which contain
3126
      # actual instances
3127
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3128
      self._LockInstancesNodes()
3129

    
3130
      # Lock all nodes in group to be verified
3131
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3132
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3133
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3134

    
3135
  def CheckPrereq(self):
3136
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3137
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3138
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3139

    
3140
    assert self.group_uuid in owned_groups
3141

    
3142
    # Check if locked instances are still correct
3143
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3144

    
3145
    # Get instance information
3146
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3147

    
3148
    # Check if node groups for locked instances are still correct
3149
    _CheckInstancesNodeGroups(self.cfg, self.instances,
3150
                              owned_groups, owned_nodes, self.group_uuid)
3151

    
3152
  def Exec(self, feedback_fn):
3153
    """Verify integrity of cluster disks.
3154

3155
    @rtype: tuple of three items
3156
    @return: a tuple of (dict of node-to-node_error, list of instances
3157
        which need activate-disks, dict of instance: (node, volume) for
3158
        missing volumes
3159

3160
    """
3161
    res_nodes = {}
3162
    res_instances = set()
3163
    res_missing = {}
3164

    
3165
    nv_dict = _MapInstanceDisksToNodes([inst
3166
                                        for inst in self.instances.values()
3167
                                        if inst.admin_up])
3168

    
3169
    if nv_dict:
3170
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3171
                             set(self.cfg.GetVmCapableNodeList()))
3172

    
3173
      node_lvs = self.rpc.call_lv_list(nodes, [])
3174

    
3175
      for (node, node_res) in node_lvs.items():
3176
        if node_res.offline:
3177
          continue
3178

    
3179
        msg = node_res.fail_msg
3180
        if msg:
3181
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3182
          res_nodes[node] = msg
3183
          continue
3184

    
3185
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3186
          inst = nv_dict.pop((node, lv_name), None)
3187
          if not (lv_online or inst is None):
3188
            res_instances.add(inst)
3189

    
3190
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3191
      # better
3192
      for key, inst in nv_dict.iteritems():
3193
        res_missing.setdefault(inst, []).append(list(key))
3194

    
3195
    return (res_nodes, list(res_instances), res_missing)
3196

    
3197

    
3198
class LUClusterRepairDiskSizes(NoHooksLU):
3199
  """Verifies the cluster disks sizes.
3200

3201
  """
3202
  REQ_BGL = False
3203

    
3204
  def ExpandNames(self):
3205
    if self.op.instances:
3206
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3207
      self.needed_locks = {
3208
        locking.LEVEL_NODE: [],
3209
        locking.LEVEL_INSTANCE: self.wanted_names,
3210
        }
3211
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3212
    else:
3213
      self.wanted_names = None
3214
      self.needed_locks = {
3215
        locking.LEVEL_NODE: locking.ALL_SET,
3216
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3217
        }
3218
    self.share_locks = {
3219
      locking.LEVEL_NODE: 1,
3220
      locking.LEVEL_INSTANCE: 0,
3221
      }
3222

    
3223
  def DeclareLocks(self, level):
3224
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3225
      self._LockInstancesNodes(primary_only=True)
3226

    
3227
  def CheckPrereq(self):
3228
    """Check prerequisites.
3229

3230
    This only checks the optional instance list against the existing names.
3231

3232
    """
3233
    if self.wanted_names is None:
3234
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3235

    
3236
    self.wanted_instances = \
3237
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3238

    
3239
  def _EnsureChildSizes(self, disk):
3240
    """Ensure children of the disk have the needed disk size.
3241

3242
    This is valid mainly for DRBD8 and fixes an issue where the
3243
    children have smaller disk size.
3244

3245
    @param disk: an L{ganeti.objects.Disk} object
3246

3247
    """
3248
    if disk.dev_type == constants.LD_DRBD8:
3249
      assert disk.children, "Empty children for DRBD8?"
3250
      fchild = disk.children[0]
3251
      mismatch = fchild.size < disk.size
3252
      if mismatch:
3253
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3254
                     fchild.size, disk.size)
3255
        fchild.size = disk.size
3256

    
3257
      # and we recurse on this child only, not on the metadev
3258
      return self._EnsureChildSizes(fchild) or mismatch
3259
    else:
3260
      return False
3261

    
3262
  def Exec(self, feedback_fn):
3263
    """Verify the size of cluster disks.
3264

3265
    """
3266
    # TODO: check child disks too
3267
    # TODO: check differences in size between primary/secondary nodes
3268
    per_node_disks = {}
3269
    for instance in self.wanted_instances:
3270
      pnode = instance.primary_node
3271
      if pnode not in per_node_disks:
3272
        per_node_disks[pnode] = []
3273
      for idx, disk in enumerate(instance.disks):
3274
        per_node_disks[pnode].append((instance, idx, disk))
3275

    
3276
    changed = []
3277
    for node, dskl in per_node_disks.items():
3278
      newl = [v[2].Copy() for v in dskl]
3279
      for dsk in newl:
3280
        self.cfg.SetDiskID(dsk, node)
3281
      result = self.rpc.call_blockdev_getsize(node, newl)
3282
      if result.fail_msg:
3283
        self.LogWarning("Failure in blockdev_getsize call to node"
3284
                        " %s, ignoring", node)
3285
        continue
3286
      if len(result.payload) != len(dskl):
3287
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3288
                        " result.payload=%s", node, len(dskl), result.payload)
3289
        self.LogWarning("Invalid result from node %s, ignoring node results",
3290
                        node)
3291
        continue
3292
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3293
        if size is None:
3294
          self.LogWarning("Disk %d of instance %s did not return size"
3295
                          " information, ignoring", idx, instance.name)
3296
          continue
3297
        if not isinstance(size, (int, long)):
3298
          self.LogWarning("Disk %d of instance %s did not return valid"
3299
                          " size information, ignoring", idx, instance.name)
3300
          continue
3301
        size = size >> 20
3302
        if size != disk.size:
3303
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3304
                       " correcting: recorded %d, actual %d", idx,
3305
                       instance.name, disk.size, size)
3306
          disk.size = size
3307
          self.cfg.Update(instance, feedback_fn)
3308
          changed.append((instance.name, idx, size))
3309
        if self._EnsureChildSizes(disk):
3310
          self.cfg.Update(instance, feedback_fn)
3311
          changed.append((instance.name, idx, disk.size))
3312
    return changed
3313

    
3314

    
3315
class LUClusterRename(LogicalUnit):
3316
  """Rename the cluster.
3317

3318
  """
3319
  HPATH = "cluster-rename"
3320
  HTYPE = constants.HTYPE_CLUSTER
3321

    
3322
  def BuildHooksEnv(self):
3323
    """Build hooks env.
3324

3325
    """
3326
    return {
3327
      "OP_TARGET": self.cfg.GetClusterName(),
3328
      "NEW_NAME": self.op.name,
3329
      }
3330

    
3331
  def BuildHooksNodes(self):
3332
    """Build hooks nodes.
3333

3334
    """
3335
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3336

    
3337
  def CheckPrereq(self):
3338
    """Verify that the passed name is a valid one.
3339

3340
    """
3341
    hostname = netutils.GetHostname(name=self.op.name,
3342
                                    family=self.cfg.GetPrimaryIPFamily())
3343

    
3344
    new_name = hostname.name
3345
    self.ip = new_ip = hostname.ip
3346
    old_name = self.cfg.GetClusterName()
3347
    old_ip = self.cfg.GetMasterIP()
3348
    if new_name == old_name and new_ip == old_ip:
3349
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3350
                                 " cluster has changed",
3351
                                 errors.ECODE_INVAL)
3352
    if new_ip != old_ip:
3353
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3354
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3355
                                   " reachable on the network" %
3356
                                   new_ip, errors.ECODE_NOTUNIQUE)
3357

    
3358
    self.op.name = new_name
3359

    
3360
  def Exec(self, feedback_fn):
3361
    """Rename the cluster.
3362

3363
    """
3364
    clustername = self.op.name
3365
    ip = self.ip
3366

    
3367
    # shutdown the master IP
3368
    master = self.cfg.GetMasterNode()
3369
    result = self.rpc.call_node_deactivate_master_ip(master)
3370
    result.Raise("Could not disable the master role")
3371

    
3372
    try:
3373
      cluster = self.cfg.GetClusterInfo()
3374
      cluster.cluster_name = clustername
3375
      cluster.master_ip = ip
3376
      self.cfg.Update(cluster, feedback_fn)
3377

    
3378
      # update the known hosts file
3379
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3380
      node_list = self.cfg.GetOnlineNodeList()
3381
      try:
3382
        node_list.remove(master)
3383
      except ValueError:
3384
        pass
3385
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3386
    finally:
3387
      result = self.rpc.call_node_activate_master_ip(master)
3388
      msg = result.fail_msg
3389
      if msg:
3390
        self.LogWarning("Could not re-enable the master role on"
3391
                        " the master, please restart manually: %s", msg)
3392

    
3393
    return clustername
3394

    
3395

    
3396
class LUClusterSetParams(LogicalUnit):
3397
  """Change the parameters of the cluster.
3398

3399
  """
3400
  HPATH = "cluster-modify"
3401
  HTYPE = constants.HTYPE_CLUSTER
3402
  REQ_BGL = False
3403

    
3404
  def CheckArguments(self):
3405
    """Check parameters
3406

3407
    """
3408
    if self.op.uid_pool:
3409
      uidpool.CheckUidPool(self.op.uid_pool)
3410

    
3411
    if self.op.add_uids:
3412
      uidpool.CheckUidPool(self.op.add_uids)
3413

    
3414
    if self.op.remove_uids:
3415
      uidpool.CheckUidPool(self.op.remove_uids)
3416

    
3417
  def ExpandNames(self):
3418
    # FIXME: in the future maybe other cluster params won't require checking on
3419
    # all nodes to be modified.
3420
    self.needed_locks = {
3421
      locking.LEVEL_NODE: locking.ALL_SET,
3422
    }
3423
    self.share_locks[locking.LEVEL_NODE] = 1
3424

    
3425
  def BuildHooksEnv(self):
3426
    """Build hooks env.
3427

3428
    """
3429
    return {
3430
      "OP_TARGET": self.cfg.GetClusterName(),
3431
      "NEW_VG_NAME": self.op.vg_name,
3432
      }
3433

    
3434
  def BuildHooksNodes(self):
3435
    """Build hooks nodes.
3436

3437
    """
3438
    mn = self.cfg.GetMasterNode()
3439
    return ([mn], [mn])
3440

    
3441
  def CheckPrereq(self):
3442
    """Check prerequisites.
3443

3444
    This checks whether the given params don't conflict and
3445
    if the given volume group is valid.
3446

3447
    """
3448
    if self.op.vg_name is not None and not self.op.vg_name:
3449
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3450
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3451
                                   " instances exist", errors.ECODE_INVAL)
3452

    
3453
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3454
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3455
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3456
                                   " drbd-based instances exist",
3457
                                   errors.ECODE_INVAL)
3458

    
3459
    node_list = self.owned_locks(locking.LEVEL_NODE)
3460

    
3461
    # if vg_name not None, checks given volume group on all nodes
3462
    if self.op.vg_name:
3463
      vglist = self.rpc.call_vg_list(node_list)
3464
      for node in node_list:
3465
        msg = vglist[node].fail_msg
3466
        if msg:
3467
          # ignoring down node
3468
          self.LogWarning("Error while gathering data on node %s"
3469
                          " (ignoring node): %s", node, msg)
3470
          continue
3471
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3472
                                              self.op.vg_name,
3473
                                              constants.MIN_VG_SIZE)
3474
        if vgstatus:
3475
          raise errors.OpPrereqError("Error on node '%s': %s" %
3476
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3477

    
3478
    if self.op.drbd_helper:
3479
      # checks given drbd helper on all nodes
3480
      helpers = self.rpc.call_drbd_helper(node_list)
3481
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3482
        if ninfo.offline:
3483
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3484
          continue
3485
        msg = helpers[node].fail_msg
3486
        if msg:
3487
          raise errors.OpPrereqError("Error checking drbd helper on node"
3488
                                     " '%s': %s" % (node, msg),
3489
                                     errors.ECODE_ENVIRON)
3490
        node_helper = helpers[node].payload
3491
        if node_helper != self.op.drbd_helper:
3492
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3493
                                     (node, node_helper), errors.ECODE_ENVIRON)
3494

    
3495
    self.cluster = cluster = self.cfg.GetClusterInfo()
3496
    # validate params changes
3497
    if self.op.beparams:
3498
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3499
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3500

    
3501
    if self.op.ndparams:
3502
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3503
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3504

    
3505
      # TODO: we need a more general way to handle resetting
3506
      # cluster-level parameters to default values
3507
      if self.new_ndparams["oob_program"] == "":
3508
        self.new_ndparams["oob_program"] = \
3509
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3510

    
3511
    if self.op.nicparams:
3512
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3513
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3514
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3515
      nic_errors = []
3516

    
3517
      # check all instances for consistency
3518
      for instance in self.cfg.GetAllInstancesInfo().values():
3519
        for nic_idx, nic in enumerate(instance.nics):
3520
          params_copy = copy.deepcopy(nic.nicparams)
3521
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3522

    
3523
          # check parameter syntax
3524
          try:
3525
            objects.NIC.CheckParameterSyntax(params_filled)
3526
          except errors.ConfigurationError, err:
3527
            nic_errors.append("Instance %s, nic/%d: %s" %
3528
                              (instance.name, nic_idx, err))
3529

    
3530
          # if we're moving instances to routed, check that they have an ip
3531
          target_mode = params_filled[constants.NIC_MODE]
3532
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3533
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3534
                              " address" % (instance.name, nic_idx))
3535
      if nic_errors:
3536
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3537
                                   "\n".join(nic_errors))
3538

    
3539
    # hypervisor list/parameters
3540
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3541
    if self.op.hvparams:
3542
      for hv_name, hv_dict in self.op.hvparams.items():
3543
        if hv_name not in self.new_hvparams:
3544
          self.new_hvparams[hv_name] = hv_dict
3545
        else:
3546
          self.new_hvparams[hv_name].update(hv_dict)
3547

    
3548
    # os hypervisor parameters
3549
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3550
    if self.op.os_hvp:
3551
      for os_name, hvs in self.op.os_hvp.items():
3552
        if os_name not in self.new_os_hvp:
3553
          self.new_os_hvp[os_name] = hvs
3554
        else:
3555
          for hv_name, hv_dict in hvs.items():
3556
            if hv_name not in self.new_os_hvp[os_name]:
3557
              self.new_os_hvp[os_name][hv_name] = hv_dict
3558
            else:
3559
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3560

    
3561
    # os parameters
3562
    self.new_osp = objects.FillDict(cluster.osparams, {})
3563
    if self.op.osparams:
3564
      for os_name, osp in self.op.osparams.items():
3565
        if os_name not in self.new_osp:
3566
          self.new_osp[os_name] = {}
3567

    
3568
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3569
                                                  use_none=True)
3570

    
3571
        if not self.new_osp[os_name]:
3572
          # we removed all parameters
3573
          del self.new_osp[os_name]
3574
        else:
3575
          # check the parameter validity (remote check)
3576
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3577
                         os_name, self.new_osp[os_name])
3578

    
3579
    # changes to the hypervisor list
3580
    if self.op.enabled_hypervisors is not None:
3581
      self.hv_list = self.op.enabled_hypervisors
3582
      for hv in self.hv_list:
3583
        # if the hypervisor doesn't already exist in the cluster
3584
        # hvparams, we initialize it to empty, and then (in both
3585
        # cases) we make sure to fill the defaults, as we might not
3586
        # have a complete defaults list if the hypervisor wasn't
3587
        # enabled before
3588
        if hv not in new_hvp:
3589
          new_hvp[hv] = {}
3590
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3591
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3592
    else:
3593
      self.hv_list = cluster.enabled_hypervisors
3594

    
3595
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3596
      # either the enabled list has changed, or the parameters have, validate
3597
      for hv_name, hv_params in self.new_hvparams.items():
3598
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3599
            (self.op.enabled_hypervisors and
3600
             hv_name in self.op.enabled_hypervisors)):
3601
          # either this is a new hypervisor, or its parameters have changed
3602
          hv_class = hypervisor.GetHypervisor(hv_name)
3603
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3604
          hv_class.CheckParameterSyntax(hv_params)
3605
          _CheckHVParams(self, node_list, hv_name, hv_params)
3606

    
3607
    if self.op.os_hvp:
3608
      # no need to check any newly-enabled hypervisors, since the
3609
      # defaults have already been checked in the above code-block
3610
      for os_name, os_hvp in self.new_os_hvp.items():
3611
        for hv_name, hv_params in os_hvp.items():
3612
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3613
          # we need to fill in the new os_hvp on top of the actual hv_p
3614
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3615
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3616
          hv_class = hypervisor.GetHypervisor(hv_name)
3617
          hv_class.CheckParameterSyntax(new_osp)
3618
          _CheckHVParams(self, node_list, hv_name, new_osp)
3619

    
3620
    if self.op.default_iallocator:
3621
      alloc_script = utils.FindFile(self.op.default_iallocator,
3622
                                    constants.IALLOCATOR_SEARCH_PATH,
3623
                                    os.path.isfile)
3624
      if alloc_script is None:
3625
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3626
                                   " specified" % self.op.default_iallocator,
3627
                                   errors.ECODE_INVAL)
3628

    
3629
  def Exec(self, feedback_fn):
3630
    """Change the parameters of the cluster.
3631

3632
    """
3633
    if self.op.vg_name is not None:
3634
      new_volume = self.op.vg_name
3635
      if not new_volume:
3636
        new_volume = None
3637
      if new_volume != self.cfg.GetVGName():
3638
        self.cfg.SetVGName(new_volume)
3639
      else:
3640
        feedback_fn("Cluster LVM configuration already in desired"
3641
                    " state, not changing")
3642
    if self.op.drbd_helper is not None:
3643
      new_helper = self.op.drbd_helper
3644
      if not new_helper:
3645
        new_helper = None
3646
      if new_helper != self.cfg.GetDRBDHelper():
3647
        self.cfg.SetDRBDHelper(new_helper)
3648
      else:
3649
        feedback_fn("Cluster DRBD helper already in desired state,"
3650
                    " not changing")
3651
    if self.op.hvparams:
3652
      self.cluster.hvparams = self.new_hvparams
3653
    if self.op.os_hvp:
3654
      self.cluster.os_hvp = self.new_os_hvp
3655
    if self.op.enabled_hypervisors is not None:
3656
      self.cluster.hvparams = self.new_hvparams
3657
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3658
    if self.op.beparams:
3659
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3660
    if self.op.nicparams:
3661
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3662
    if self.op.osparams:
3663
      self.cluster.osparams = self.new_osp
3664
    if self.op.ndparams:
3665
      self.cluster.ndparams = self.new_ndparams
3666

    
3667
    if self.op.candidate_pool_size is not None:
3668
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3669
      # we need to update the pool size here, otherwise the save will fail
3670
      _AdjustCandidatePool(self, [])
3671

    
3672
    if self.op.maintain_node_health is not None:
3673
      self.cluster.maintain_node_health = self.op.maintain_node_health
3674

    
3675
    if self.op.prealloc_wipe_disks is not None:
3676
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3677

    
3678
    if self.op.add_uids is not None:
3679
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3680

    
3681
    if self.op.remove_uids is not None:
3682
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3683

    
3684
    if self.op.uid_pool is not None:
3685
      self.cluster.uid_pool = self.op.uid_pool
3686

    
3687
    if self.op.default_iallocator is not None:
3688
      self.cluster.default_iallocator = self.op.default_iallocator
3689

    
3690
    if self.op.reserved_lvs is not None:
3691
      self.cluster.reserved_lvs = self.op.reserved_lvs
3692

    
3693
    def helper_os(aname, mods, desc):
3694
      desc += " OS list"
3695
      lst = getattr(self.cluster, aname)
3696
      for key, val in mods:
3697
        if key == constants.DDM_ADD:
3698
          if val in lst:
3699
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3700
          else:
3701
            lst.append(val)
3702
        elif key == constants.DDM_REMOVE:
3703
          if val in lst:
3704
            lst.remove(val)
3705
          else:
3706
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3707
        else:
3708
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3709

    
3710
    if self.op.hidden_os:
3711
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3712

    
3713
    if self.op.blacklisted_os:
3714
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3715

    
3716
    if self.op.master_netdev:
3717
      master = self.cfg.GetMasterNode()
3718
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3719
                  self.cluster.master_netdev)
3720
      result = self.rpc.call_node_deactivate_master_ip(master)
3721
      result.Raise("Could not disable the master ip")
3722
      feedback_fn("Changing master_netdev from %s to %s" %
3723
                  (self.cluster.master_netdev, self.op.master_netdev))
3724
      self.cluster.master_netdev = self.op.master_netdev
3725

    
3726
    self.cfg.Update(self.cluster, feedback_fn)
3727

    
3728
    if self.op.master_netdev:
3729
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3730
                  self.op.master_netdev)
3731
      result = self.rpc.call_node_activate_master_ip(master)
3732
      if result.fail_msg:
3733
        self.LogWarning("Could not re-enable the master ip on"
3734
                        " the master, please restart manually: %s",
3735
                        result.fail_msg)
3736

    
3737

    
3738
def _UploadHelper(lu, nodes, fname):
3739
  """Helper for uploading a file and showing warnings.
3740

3741
  """
3742
  if os.path.exists(fname):
3743
    result = lu.rpc.call_upload_file(nodes, fname)
3744
    for to_node, to_result in result.items():
3745
      msg = to_result.fail_msg
3746
      if msg:
3747
        msg = ("Copy of file %s to node %s failed: %s" %
3748
               (fname, to_node, msg))
3749
        lu.proc.LogWarning(msg)
3750

    
3751

    
3752
def _ComputeAncillaryFiles(cluster, redist):
3753
  """Compute files external to Ganeti which need to be consistent.
3754

3755
  @type redist: boolean
3756
  @param redist: Whether to include files which need to be redistributed
3757

3758
  """
3759
  # Compute files for all nodes
3760
  files_all = set([
3761
    constants.SSH_KNOWN_HOSTS_FILE,
3762
    constants.CONFD_HMAC_KEY,
3763
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3764
    constants.RAPI_USERS_FILE,
3765
    ])
3766

    
3767
  if not redist:
3768
    files_all.update(constants.ALL_CERT_FILES)
3769
    files_all.update(ssconf.SimpleStore().GetFileList())
3770
  else:
3771
    # we need to ship at least the RAPI certificate
3772
    files_all.add(constants.RAPI_CERT_FILE)
3773

    
3774
  if cluster.modify_etc_hosts:
3775
    files_all.add(constants.ETC_HOSTS)
3776

    
3777
  # Files which are optional, these must:
3778
  # - be present in one other category as well
3779
  # - either exist or not exist on all nodes of that category (mc, vm all)
3780
  files_opt = set([
3781
    constants.RAPI_USERS_FILE,
3782
    ])
3783

    
3784
  # Files which should only be on master candidates
3785
  files_mc = set()
3786
  if not redist:
3787
    files_mc.add(constants.CLUSTER_CONF_FILE)
3788

    
3789
  # Files which should only be on VM-capable nodes
3790
  files_vm = set(filename
3791
    for hv_name in cluster.enabled_hypervisors
3792
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3793

    
3794
  files_opt |= set(filename
3795
    for hv_name in cluster.enabled_hypervisors
3796
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3797

    
3798
  # Filenames in each category must be unique
3799
  all_files_set = files_all | files_mc | files_vm
3800
  assert (len(all_files_set) ==
3801
          sum(map(len, [files_all, files_mc, files_vm]))), \
3802
         "Found file listed in more than one file list"
3803

    
3804
  # Optional files must be present in one other category
3805
  assert all_files_set.issuperset(files_opt), \
3806
         "Optional file not in a different required list"
3807

    
3808
  return (files_all, files_opt, files_mc, files_vm)
3809

    
3810

    
3811
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3812
  """Distribute additional files which are part of the cluster configuration.
3813

3814
  ConfigWriter takes care of distributing the config and ssconf files, but
3815
  there are more files which should be distributed to all nodes. This function
3816
  makes sure those are copied.
3817

3818
  @param lu: calling logical unit
3819
  @param additional_nodes: list of nodes not in the config to distribute to
3820
  @type additional_vm: boolean
3821
  @param additional_vm: whether the additional nodes are vm-capable or not
3822

3823
  """
3824
  # Gather target nodes
3825
  cluster = lu.cfg.GetClusterInfo()
3826
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3827

    
3828
  online_nodes = lu.cfg.GetOnlineNodeList()
3829
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3830

    
3831
  if additional_nodes is not None:
3832
    online_nodes.extend(additional_nodes)
3833
    if additional_vm:
3834
      vm_nodes.extend(additional_nodes)
3835

    
3836
  # Never distribute to master node
3837
  for nodelist in [online_nodes, vm_nodes]:
3838
    if master_info.name in nodelist:
3839
      nodelist.remove(master_info.name)
3840

    
3841
  # Gather file lists
3842
  (files_all, _, files_mc, files_vm) = \
3843
    _ComputeAncillaryFiles(cluster, True)
3844

    
3845
  # Never re-distribute configuration file from here
3846
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3847
              constants.CLUSTER_CONF_FILE in files_vm)
3848
  assert not files_mc, "Master candidates not handled in this function"
3849

    
3850
  filemap = [
3851
    (online_nodes, files_all),
3852
    (vm_nodes, files_vm),
3853
    ]
3854

    
3855
  # Upload the files
3856
  for (node_list, files) in filemap:
3857
    for fname in files:
3858
      _UploadHelper(lu, node_list, fname)
3859

    
3860

    
3861
class LUClusterRedistConf(NoHooksLU):
3862
  """Force the redistribution of cluster configuration.
3863

3864
  This is a very simple LU.
3865

3866
  """
3867
  REQ_BGL = False
3868

    
3869
  def ExpandNames(self):
3870
    self.needed_locks = {
3871
      locking.LEVEL_NODE: locking.ALL_SET,
3872
    }
3873
    self.share_locks[locking.LEVEL_NODE] = 1
3874

    
3875
  def Exec(self, feedback_fn):
3876
    """Redistribute the configuration.
3877

3878
    """
3879
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3880
    _RedistributeAncillaryFiles(self)
3881

    
3882

    
3883
class LUClusterActivateMasterIp(NoHooksLU):
3884
  """Activate the master IP on the master node.
3885

3886
  """
3887
  def Exec(self, feedback_fn):
3888
    """Activate the master IP.
3889

3890
    """
3891
    master = self.cfg.GetMasterNode()
3892
    result = self.rpc.call_node_activate_master_ip(master)
3893
    result.Raise("Could not activate the master IP")
3894

    
3895

    
3896
class LUClusterDeactivateMasterIp(NoHooksLU):
3897
  """Deactivate the master IP on the master node.
3898

3899
  """
3900
  def Exec(self, feedback_fn):
3901
    """Deactivate the master IP.
3902

3903
    """
3904
    master = self.cfg.GetMasterNode()
3905
    result = self.rpc.call_node_deactivate_master_ip(master)
3906
    result.Raise("Could not deactivate the master IP")
3907

    
3908

    
3909
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3910
  """Sleep and poll for an instance's disk to sync.
3911

3912
  """
3913
  if not instance.disks or disks is not None and not disks:
3914
    return True
3915

    
3916
  disks = _ExpandCheckDisks(instance, disks)
3917

    
3918
  if not oneshot:
3919
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3920

    
3921
  node = instance.primary_node
3922

    
3923
  for dev in disks:
3924
    lu.cfg.SetDiskID(dev, node)
3925

    
3926
  # TODO: Convert to utils.Retry
3927

    
3928
  retries = 0
3929
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3930
  while True:
3931
    max_time = 0
3932
    done = True
3933
    cumul_degraded = False
3934
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3935
    msg = rstats.fail_msg
3936
    if msg:
3937
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3938
      retries += 1
3939
      if retries >= 10:
3940
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3941
                                 " aborting." % node)
3942
      time.sleep(6)
3943
      continue
3944
    rstats = rstats.payload
3945
    retries = 0
3946
    for i, mstat in enumerate(rstats):
3947
      if mstat is None:
3948
        lu.LogWarning("Can't compute data for node %s/%s",
3949
                           node, disks[i].iv_name)
3950
        continue
3951

    
3952
      cumul_degraded = (cumul_degraded or
3953
                        (mstat.is_degraded and mstat.sync_percent is None))
3954
      if mstat.sync_percent is not None:
3955
        done = False
3956
        if mstat.estimated_time is not None:
3957
          rem_time = ("%s remaining (estimated)" %
3958
                      utils.FormatSeconds(mstat.estimated_time))
3959
          max_time = mstat.estimated_time
3960
        else:
3961
          rem_time = "no time estimate"
3962
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3963
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3964

    
3965
    # if we're done but degraded, let's do a few small retries, to
3966
    # make sure we see a stable and not transient situation; therefore
3967
    # we force restart of the loop
3968
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3969
      logging.info("Degraded disks found, %d retries left", degr_retries)
3970
      degr_retries -= 1
3971
      time.sleep(1)
3972
      continue
3973

    
3974
    if done or oneshot:
3975
      break
3976

    
3977
    time.sleep(min(60, max_time))
3978

    
3979
  if done:
3980
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3981
  return not cumul_degraded
3982

    
3983

    
3984
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3985
  """Check that mirrors are not degraded.
3986

3987
  The ldisk parameter, if True, will change the test from the
3988
  is_degraded attribute (which represents overall non-ok status for
3989
  the device(s)) to the ldisk (representing the local storage status).
3990

3991
  """
3992
  lu.cfg.SetDiskID(dev, node)
3993

    
3994
  result = True
3995

    
3996
  if on_primary or dev.AssembleOnSecondary():
3997
    rstats = lu.rpc.call_blockdev_find(node, dev)
3998
    msg = rstats.fail_msg
3999
    if msg:
4000
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4001
      result = False
4002
    elif not rstats.payload:
4003
      lu.LogWarning("Can't find disk on node %s", node)
4004
      result = False
4005
    else:
4006
      if ldisk:
4007
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4008
      else:
4009
        result = result and not rstats.payload.is_degraded
4010

    
4011
  if dev.children:
4012
    for child in dev.children:
4013
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4014

    
4015
  return result
4016

    
4017

    
4018
class LUOobCommand(NoHooksLU):
4019
  """Logical unit for OOB handling.
4020

4021
  """
4022
  REG_BGL = False
4023
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4024

    
4025
  def ExpandNames(self):
4026
    """Gather locks we need.
4027

4028
    """
4029
    if self.op.node_names:
4030
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4031
      lock_names = self.op.node_names
4032
    else:
4033
      lock_names = locking.ALL_SET
4034

    
4035
    self.needed_locks = {
4036
      locking.LEVEL_NODE: lock_names,
4037
      }
4038

    
4039
  def CheckPrereq(self):
4040
    """Check prerequisites.
4041

4042
    This checks:
4043
     - the node exists in the configuration
4044
     - OOB is supported
4045

4046
    Any errors are signaled by raising errors.OpPrereqError.
4047

4048
    """
4049
    self.nodes = []
4050
    self.master_node = self.cfg.GetMasterNode()
4051

    
4052
    assert self.op.power_delay >= 0.0
4053

    
4054
    if self.op.node_names:
4055
      if (self.op.command in self._SKIP_MASTER and
4056
          self.master_node in self.op.node_names):
4057
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4058
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4059

    
4060
        if master_oob_handler:
4061
          additional_text = ("run '%s %s %s' if you want to operate on the"
4062
                             " master regardless") % (master_oob_handler,
4063
                                                      self.op.command,
4064
                                                      self.master_node)
4065
        else:
4066
          additional_text = "it does not support out-of-band operations"
4067

    
4068
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4069
                                    " allowed for %s; %s") %
4070
                                   (self.master_node, self.op.command,
4071
                                    additional_text), errors.ECODE_INVAL)
4072
    else:
4073
      self.op.node_names = self.cfg.GetNodeList()
4074
      if self.op.command in self._SKIP_MASTER:
4075
        self.op.node_names.remove(self.master_node)
4076

    
4077
    if self.op.command in self._SKIP_MASTER:
4078
      assert self.master_node not in self.op.node_names
4079

    
4080
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4081
      if node is None:
4082
        raise errors.OpPrereqError("Node %s not found" % node_name,
4083
                                   errors.ECODE_NOENT)
4084
      else:
4085
        self.nodes.append(node)
4086

    
4087
      if (not self.op.ignore_status and
4088
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4089
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4090
                                    " not marked offline") % node_name,
4091
                                   errors.ECODE_STATE)
4092

    
4093
  def Exec(self, feedback_fn):
4094
    """Execute OOB and return result if we expect any.
4095

4096
    """
4097
    master_node = self.master_node
4098
    ret = []
4099

    
4100
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4101
                                              key=lambda node: node.name)):
4102
      node_entry = [(constants.RS_NORMAL, node.name)]
4103
      ret.append(node_entry)
4104

    
4105
      oob_program = _SupportsOob(self.cfg, node)
4106

    
4107
      if not oob_program:
4108
        node_entry.append((constants.RS_UNAVAIL, None))
4109
        continue
4110

    
4111
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4112
                   self.op.command, oob_program, node.name)
4113
      result = self.rpc.call_run_oob(master_node, oob_program,
4114
                                     self.op.command, node.name,
4115
                                     self.op.timeout)
4116

    
4117
      if result.fail_msg:
4118
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4119
                        node.name, result.fail_msg)
4120
        node_entry.append((constants.RS_NODATA, None))
4121
      else:
4122
        try:
4123
          self._CheckPayload(result)
4124
        except errors.OpExecError, err:
4125
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4126
                          node.name, err)
4127
          node_entry.append((constants.RS_NODATA, None))
4128
        else:
4129
          if self.op.command == constants.OOB_HEALTH:
4130
            # For health we should log important events
4131
            for item, status in result.payload:
4132
              if status in [constants.OOB_STATUS_WARNING,
4133
                            constants.OOB_STATUS_CRITICAL]:
4134
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4135
                                item, node.name, status)
4136

    
4137
          if self.op.command == constants.OOB_POWER_ON:
4138
            node.powered = True
4139
          elif self.op.command == constants.OOB_POWER_OFF:
4140
            node.powered = False
4141
          elif self.op.command == constants.OOB_POWER_STATUS:
4142
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4143
            if powered != node.powered:
4144
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4145
                               " match actual power state (%s)"), node.powered,
4146
                              node.name, powered)
4147

    
4148
          # For configuration changing commands we should update the node
4149
          if self.op.command in (constants.OOB_POWER_ON,
4150
                                 constants.OOB_POWER_OFF):
4151
            self.cfg.Update(node, feedback_fn)
4152

    
4153
          node_entry.append((constants.RS_NORMAL, result.payload))
4154

    
4155
          if (self.op.command == constants.OOB_POWER_ON and
4156
              idx < len(self.nodes) - 1):
4157
            time.sleep(self.op.power_delay)
4158

    
4159
    return ret
4160

    
4161
  def _CheckPayload(self, result):
4162
    """Checks if the payload is valid.
4163

4164
    @param result: RPC result
4165
    @raises errors.OpExecError: If payload is not valid
4166

4167
    """
4168
    errs = []
4169
    if self.op.command == constants.OOB_HEALTH:
4170
      if not isinstance(result.payload, list):
4171
        errs.append("command 'health' is expected to return a list but got %s" %
4172
                    type(result.payload))
4173
      else:
4174
        for item, status in result.payload:
4175
          if status not in constants.OOB_STATUSES:
4176
            errs.append("health item '%s' has invalid status '%s'" %
4177
                        (item, status))
4178

    
4179
    if self.op.command == constants.OOB_POWER_STATUS:
4180
      if not isinstance(result.payload, dict):
4181
        errs.append("power-status is expected to return a dict but got %s" %
4182
                    type(result.payload))
4183

    
4184
    if self.op.command in [
4185
        constants.OOB_POWER_ON,
4186
        constants.OOB_POWER_OFF,
4187
        constants.OOB_POWER_CYCLE,
4188
        ]:
4189
      if result.payload is not None:
4190
        errs.append("%s is expected to not return payload but got '%s'" %
4191
                    (self.op.command, result.payload))
4192

    
4193
    if errs:
4194
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4195
                               utils.CommaJoin(errs))
4196

    
4197

    
4198
class _OsQuery(_QueryBase):
4199
  FIELDS = query.OS_FIELDS
4200

    
4201
  def ExpandNames(self, lu):
4202
    # Lock all nodes in shared mode
4203
    # Temporary removal of locks, should be reverted later
4204
    # TODO: reintroduce locks when they are lighter-weight
4205
    lu.needed_locks = {}
4206
    #self.share_locks[locking.LEVEL_NODE] = 1
4207
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4208

    
4209
    # The following variables interact with _QueryBase._GetNames
4210
    if self.names:
4211
      self.wanted = self.names
4212
    else:
4213
      self.wanted = locking.ALL_SET
4214

    
4215
    self.do_locking = self.use_locking
4216

    
4217
  def DeclareLocks(self, lu, level):
4218
    pass
4219

    
4220
  @staticmethod
4221
  def _DiagnoseByOS(rlist):
4222
    """Remaps a per-node return list into an a per-os per-node dictionary
4223

4224
    @param rlist: a map with node names as keys and OS objects as values
4225

4226
    @rtype: dict
4227
    @return: a dictionary with osnames as keys and as value another
4228
        map, with nodes as keys and tuples of (path, status, diagnose,
4229
        variants, parameters, api_versions) as values, eg::
4230

4231
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4232
                                     (/srv/..., False, "invalid api")],
4233
                           "node2": [(/srv/..., True, "", [], [])]}
4234
          }
4235

4236
    """
4237
    all_os = {}
4238
    # we build here the list of nodes that didn't fail the RPC (at RPC
4239
    # level), so that nodes with a non-responding node daemon don't
4240
    # make all OSes invalid
4241
    good_nodes = [node_name for node_name in rlist
4242
                  if not rlist[node_name].fail_msg]
4243
    for node_name, nr in rlist.items():
4244
      if nr.fail_msg or not nr.payload:
4245
        continue
4246
      for (name, path, status, diagnose, variants,
4247
           params, api_versions) in nr.payload:
4248
        if name not in all_os:
4249
          # build a list of nodes for this os containing empty lists
4250
          # for each node in node_list
4251
          all_os[name] = {}
4252
          for nname in good_nodes:
4253
            all_os[name][nname] = []
4254
        # convert params from [name, help] to (name, help)
4255
        params = [tuple(v) for v in params]
4256
        all_os[name][node_name].append((path, status, diagnose,
4257
                                        variants, params, api_versions))
4258
    return all_os
4259

    
4260
  def _GetQueryData(self, lu):
4261
    """Computes the list of nodes and their attributes.
4262

4263
    """
4264
    # Locking is not used
4265
    assert not (compat.any(lu.glm.is_owned(level)
4266
                           for level in locking.LEVELS
4267
                           if level != locking.LEVEL_CLUSTER) or
4268
                self.do_locking or self.use_locking)
4269

    
4270
    valid_nodes = [node.name
4271
                   for node in lu.cfg.GetAllNodesInfo().values()
4272
                   if not node.offline and node.vm_capable]
4273
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4274
    cluster = lu.cfg.GetClusterInfo()
4275

    
4276
    data = {}
4277

    
4278
    for (os_name, os_data) in pol.items():
4279
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4280
                          hidden=(os_name in cluster.hidden_os),
4281
                          blacklisted=(os_name in cluster.blacklisted_os))
4282

    
4283
      variants = set()
4284
      parameters = set()
4285
      api_versions = set()
4286

    
4287
      for idx, osl in enumerate(os_data.values()):
4288
        info.valid = bool(info.valid and osl and osl[0][1])
4289
        if not info.valid:
4290
          break
4291

    
4292
        (node_variants, node_params, node_api) = osl[0][3:6]
4293
        if idx == 0:
4294
          # First entry
4295
          variants.update(node_variants)
4296
          parameters.update(node_params)
4297
          api_versions.update(node_api)
4298
        else:
4299
          # Filter out inconsistent values
4300
          variants.intersection_update(node_variants)
4301
          parameters.intersection_update(node_params)
4302
          api_versions.intersection_update(node_api)
4303

    
4304
      info.variants = list(variants)
4305
      info.parameters = list(parameters)
4306
      info.api_versions = list(api_versions)
4307

    
4308
      data[os_name] = info
4309

    
4310
    # Prepare data in requested order
4311
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4312
            if name in data]
4313

    
4314

    
4315
class LUOsDiagnose(NoHooksLU):
4316
  """Logical unit for OS diagnose/query.
4317

4318
  """
4319
  REQ_BGL = False
4320

    
4321
  @staticmethod
4322
  def _BuildFilter(fields, names):
4323
    """Builds a filter for querying OSes.
4324

4325
    """
4326
    name_filter = qlang.MakeSimpleFilter("name", names)
4327

    
4328
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4329
    # respective field is not requested
4330
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4331
                     for fname in ["hidden", "blacklisted"]
4332
                     if fname not in fields]
4333
    if "valid" not in fields:
4334
      status_filter.append([qlang.OP_TRUE, "valid"])
4335

    
4336
    if status_filter:
4337
      status_filter.insert(0, qlang.OP_AND)
4338
    else:
4339
      status_filter = None
4340

    
4341
    if name_filter and status_filter:
4342
      return [qlang.OP_AND, name_filter, status_filter]
4343
    elif name_filter:
4344
      return name_filter
4345
    else:
4346
      return status_filter
4347

    
4348
  def CheckArguments(self):
4349
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4350
                       self.op.output_fields, False)
4351

    
4352
  def ExpandNames(self):
4353
    self.oq.ExpandNames(self)
4354

    
4355
  def Exec(self, feedback_fn):
4356
    return self.oq.OldStyleQuery(self)
4357

    
4358

    
4359
class LUNodeRemove(LogicalUnit):
4360
  """Logical unit for removing a node.
4361

4362
  """
4363
  HPATH = "node-remove"
4364
  HTYPE = constants.HTYPE_NODE
4365

    
4366
  def BuildHooksEnv(self):
4367
    """Build hooks env.
4368

4369
    This doesn't run on the target node in the pre phase as a failed
4370
    node would then be impossible to remove.
4371

4372
    """
4373
    return {
4374
      "OP_TARGET": self.op.node_name,
4375
      "NODE_NAME": self.op.node_name,
4376
      }
4377

    
4378
  def BuildHooksNodes(self):
4379
    """Build hooks nodes.
4380

4381
    """
4382
    all_nodes = self.cfg.GetNodeList()
4383
    try:
4384
      all_nodes.remove(self.op.node_name)
4385
    except ValueError:
4386
      logging.warning("Node '%s', which is about to be removed, was not found"
4387
                      " in the list of all nodes", self.op.node_name)
4388
    return (all_nodes, all_nodes)
4389

    
4390
  def CheckPrereq(self):
4391
    """Check prerequisites.
4392

4393
    This checks:
4394
     - the node exists in the configuration
4395
     - it does not have primary or secondary instances
4396
     - it's not the master
4397

4398
    Any errors are signaled by raising errors.OpPrereqError.
4399

4400
    """
4401
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4402
    node = self.cfg.GetNodeInfo(self.op.node_name)
4403
    assert node is not None
4404

    
4405
    masternode = self.cfg.GetMasterNode()
4406
    if node.name == masternode:
4407
      raise errors.OpPrereqError("Node is the master node, failover to another"
4408
                                 " node is required", errors.ECODE_INVAL)
4409

    
4410
    for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4411
      if node.name in instance.all_nodes:
4412
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4413
                                   " please remove first" % instance_name,
4414
                                   errors.ECODE_INVAL)
4415
    self.op.node_name = node.name
4416
    self.node = node
4417

    
4418
  def Exec(self, feedback_fn):
4419
    """Removes the node from the cluster.
4420

4421
    """
4422
    node = self.node
4423
    logging.info("Stopping the node daemon and removing configs from node %s",
4424
                 node.name)
4425

    
4426
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4427

    
4428
    # Promote nodes to master candidate as needed
4429
    _AdjustCandidatePool(self, exceptions=[node.name])
4430
    self.context.RemoveNode(node.name)
4431

    
4432
    # Run post hooks on the node before it's removed
4433
    _RunPostHook(self, node.name)
4434

    
4435
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4436
    msg = result.fail_msg
4437
    if msg:
4438
      self.LogWarning("Errors encountered on the remote node while leaving"
4439
                      " the cluster: %s", msg)
4440

    
4441
    # Remove node from our /etc/hosts
4442
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4443
      master_node = self.cfg.GetMasterNode()
4444
      result = self.rpc.call_etc_hosts_modify(master_node,
4445
                                              constants.ETC_HOSTS_REMOVE,
4446
                                              node.name, None)
4447
      result.Raise("Can't update hosts file with new host data")
4448
      _RedistributeAncillaryFiles(self)
4449

    
4450

    
4451
class _NodeQuery(_QueryBase):
4452
  FIELDS = query.NODE_FIELDS
4453

    
4454
  def ExpandNames(self, lu):
4455
    lu.needed_locks = {}
4456
    lu.share_locks = _ShareAll()
4457

    
4458
    if self.names:
4459
      self.wanted = _GetWantedNodes(lu, self.names)
4460
    else:
4461
      self.wanted = locking.ALL_SET
4462

    
4463
    self.do_locking = (self.use_locking and
4464
                       query.NQ_LIVE in self.requested_data)
4465

    
4466
    if self.do_locking:
4467
      # If any non-static field is requested we need to lock the nodes
4468
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4469

    
4470
  def DeclareLocks(self, lu, level):
4471
    pass
4472

    
4473
  def _GetQueryData(self, lu):
4474
    """Computes the list of nodes and their attributes.
4475

4476
    """
4477
    all_info = lu.cfg.GetAllNodesInfo()
4478

    
4479
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4480

    
4481
    # Gather data as requested
4482
    if query.NQ_LIVE in self.requested_data:
4483
      # filter out non-vm_capable nodes
4484
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4485

    
4486
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4487
                                        lu.cfg.GetHypervisorType())
4488
      live_data = dict((name, nresult.payload)
4489
                       for (name, nresult) in node_data.items()
4490
                       if not nresult.fail_msg and nresult.payload)
4491
    else:
4492
      live_data = None
4493

    
4494
    if query.NQ_INST in self.requested_data:
4495
      node_to_primary = dict([(name, set()) for name in nodenames])
4496
      node_to_secondary = dict([(name, set()) for name in nodenames])
4497

    
4498
      inst_data = lu.cfg.GetAllInstancesInfo()
4499

    
4500
      for inst in inst_data.values():
4501
        if inst.primary_node in node_to_primary:
4502
          node_to_primary[inst.primary_node].add(inst.name)
4503
        for secnode in inst.secondary_nodes:
4504
          if secnode in node_to_secondary:
4505
            node_to_secondary[secnode].add(inst.name)
4506
    else:
4507
      node_to_primary = None
4508
      node_to_secondary = None
4509

    
4510
    if query.NQ_OOB in self.requested_data:
4511
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4512
                         for name, node in all_info.iteritems())
4513
    else:
4514
      oob_support = None
4515

    
4516
    if query.NQ_GROUP in self.requested_data:
4517
      groups = lu.cfg.GetAllNodeGroupsInfo()
4518
    else:
4519
      groups = {}
4520

    
4521
    return query.NodeQueryData([all_info[name] for name in nodenames],
4522
                               live_data, lu.cfg.GetMasterNode(),
4523
                               node_to_primary, node_to_secondary, groups,
4524
                               oob_support, lu.cfg.GetClusterInfo())
4525

    
4526

    
4527
class LUNodeQuery(NoHooksLU):
4528
  """Logical unit for querying nodes.
4529

4530
  """
4531
  # pylint: disable=W0142
4532
  REQ_BGL = False
4533

    
4534
  def CheckArguments(self):
4535
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4536
                         self.op.output_fields, self.op.use_locking)
4537

    
4538
  def ExpandNames(self):
4539
    self.nq.ExpandNames(self)
4540

    
4541
  def Exec(self, feedback_fn):
4542
    return self.nq.OldStyleQuery(self)
4543

    
4544

    
4545
class LUNodeQueryvols(NoHooksLU):
4546
  """Logical unit for getting volumes on node(s).
4547

4548
  """
4549
  REQ_BGL = False
4550
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4551
  _FIELDS_STATIC = utils.FieldSet("node")
4552

    
4553
  def CheckArguments(self):
4554
    _CheckOutputFields(static=self._FIELDS_STATIC,
4555
                       dynamic=self._FIELDS_DYNAMIC,
4556
                       selected=self.op.output_fields)
4557

    
4558
  def ExpandNames(self):
4559
    self.needed_locks = {}
4560
    self.share_locks[locking.LEVEL_NODE] = 1
4561
    if not self.op.nodes:
4562
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4563
    else:
4564
      self.needed_locks[locking.LEVEL_NODE] = \
4565
        _GetWantedNodes(self, self.op.nodes)
4566

    
4567
  def Exec(self, feedback_fn):
4568
    """Computes the list of nodes and their attributes.
4569

4570
    """
4571
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4572
    volumes = self.rpc.call_node_volumes(nodenames)
4573

    
4574
    ilist = self.cfg.GetAllInstancesInfo()
4575
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4576

    
4577
    output = []
4578
    for node in nodenames:
4579
      nresult = volumes[node]
4580
      if nresult.offline:
4581
        continue
4582
      msg = nresult.fail_msg
4583
      if msg:
4584
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4585
        continue
4586

    
4587
      node_vols = sorted(nresult.payload,
4588
                         key=operator.itemgetter("dev"))
4589

    
4590
      for vol in node_vols:
4591
        node_output = []
4592
        for field in self.op.output_fields:
4593
          if field == "node":
4594
            val = node
4595
          elif field == "phys":
4596
            val = vol["dev"]
4597
          elif field == "vg":
4598
            val = vol["vg"]
4599
          elif field == "name":
4600
            val = vol["name"]
4601
          elif field == "size":
4602
            val = int(float(vol["size"]))
4603
          elif field == "instance":
4604
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4605
          else:
4606
            raise errors.ParameterError(field)
4607
          node_output.append(str(val))
4608

    
4609
        output.append(node_output)
4610

    
4611
    return output
4612

    
4613

    
4614
class LUNodeQueryStorage(NoHooksLU):
4615
  """Logical unit for getting information on storage units on node(s).
4616

4617
  """
4618
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4619
  REQ_BGL = False
4620

    
4621
  def CheckArguments(self):
4622
    _CheckOutputFields(static=self._FIELDS_STATIC,
4623
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4624
                       selected=self.op.output_fields)
4625

    
4626
  def ExpandNames(self):
4627
    self.needed_locks = {}
4628
    self.share_locks[locking.LEVEL_NODE] = 1
4629

    
4630
    if self.op.nodes:
4631
      self.needed_locks[locking.LEVEL_NODE] = \
4632
        _GetWantedNodes(self, self.op.nodes)
4633
    else:
4634
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4635

    
4636
  def Exec(self, feedback_fn):
4637
    """Computes the list of nodes and their attributes.
4638

4639
    """
4640
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4641

    
4642
    # Always get name to sort by
4643
    if constants.SF_NAME in self.op.output_fields:
4644
      fields = self.op.output_fields[:]
4645
    else:
4646
      fields = [constants.SF_NAME] + self.op.output_fields
4647

    
4648
    # Never ask for node or type as it's only known to the LU
4649
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4650
      while extra in fields:
4651
        fields.remove(extra)
4652

    
4653
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4654
    name_idx = field_idx[constants.SF_NAME]
4655

    
4656
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4657
    data = self.rpc.call_storage_list(self.nodes,
4658
                                      self.op.storage_type, st_args,
4659
                                      self.op.name, fields)
4660

    
4661
    result = []
4662

    
4663
    for node in utils.NiceSort(self.nodes):
4664
      nresult = data[node]
4665
      if nresult.offline:
4666
        continue
4667

    
4668
      msg = nresult.fail_msg
4669
      if msg:
4670
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4671
        continue
4672

    
4673
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4674

    
4675
      for name in utils.NiceSort(rows.keys()):
4676
        row = rows[name]
4677

    
4678
        out = []
4679

    
4680
        for field in self.op.output_fields:
4681
          if field == constants.SF_NODE:
4682
            val = node
4683
          elif field == constants.SF_TYPE:
4684
            val = self.op.storage_type
4685
          elif field in field_idx:
4686
            val = row[field_idx[field]]
4687
          else:
4688
            raise errors.ParameterError(field)
4689

    
4690
          out.append(val)
4691

    
4692
        result.append(out)
4693

    
4694
    return result
4695

    
4696

    
4697
class _InstanceQuery(_QueryBase):
4698
  FIELDS = query.INSTANCE_FIELDS
4699

    
4700
  def ExpandNames(self, lu):
4701
    lu.needed_locks = {}
4702
    lu.share_locks = _ShareAll()
4703

    
4704
    if self.names:
4705
      self.wanted = _GetWantedInstances(lu, self.names)
4706
    else:
4707
      self.wanted = locking.ALL_SET
4708

    
4709
    self.do_locking = (self.use_locking and
4710
                       query.IQ_LIVE in self.requested_data)
4711
    if self.do_locking:
4712
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4713
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4714
      lu.needed_locks[locking.LEVEL_NODE] = []
4715
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4716

    
4717
    self.do_grouplocks = (self.do_locking and
4718
                          query.IQ_NODES in self.requested_data)
4719

    
4720
  def DeclareLocks(self, lu, level):
4721
    if self.do_locking:
4722
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4723
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4724

    
4725
        # Lock all groups used by instances optimistically; this requires going
4726
        # via the node before it's locked, requiring verification later on
4727
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4728
          set(group_uuid
4729
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4730
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4731
      elif level == locking.LEVEL_NODE:
4732
        lu._LockInstancesNodes() # pylint: disable=W0212
4733

    
4734
  @staticmethod
4735
  def _CheckGroupLocks(lu):
4736
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4737
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4738

    
4739
    # Check if node groups for locked instances are still correct
4740
    for instance_name in owned_instances:
4741
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4742

    
4743
  def _GetQueryData(self, lu):
4744
    """Computes the list of instances and their attributes.
4745

4746
    """
4747
    if self.do_grouplocks:
4748
      self._CheckGroupLocks(lu)
4749

    
4750
    cluster = lu.cfg.GetClusterInfo()
4751
    all_info = lu.cfg.GetAllInstancesInfo()
4752

    
4753
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4754

    
4755
    instance_list = [all_info[name] for name in instance_names]
4756
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4757
                                        for inst in instance_list)))
4758
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4759
    bad_nodes = []
4760
    offline_nodes = []
4761
    wrongnode_inst = set()
4762

    
4763
    # Gather data as requested
4764
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4765
      live_data = {}
4766
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4767
      for name in nodes:
4768
        result = node_data[name]
4769
        if result.offline:
4770
          # offline nodes will be in both lists
4771
          assert result.fail_msg
4772
          offline_nodes.append(name)
4773
        if result.fail_msg:
4774
          bad_nodes.append(name)
4775
        elif result.payload:
4776
          for inst in result.payload:
4777
            if inst in all_info:
4778
              if all_info[inst].primary_node == name:
4779
                live_data.update(result.payload)
4780
              else:
4781
                wrongnode_inst.add(inst)
4782
            else:
4783
              # orphan instance; we don't list it here as we don't
4784
              # handle this case yet in the output of instance listing
4785
              logging.warning("Orphan instance '%s' found on node %s",
4786
                              inst, name)
4787
        # else no instance is alive
4788
    else:
4789
      live_data = {}
4790

    
4791
    if query.IQ_DISKUSAGE in self.requested_data:
4792
      disk_usage = dict((inst.name,
4793
                         _ComputeDiskSize(inst.disk_template,
4794
                                          [{constants.IDISK_SIZE: disk.size}
4795
                                           for disk in inst.disks]))
4796
                        for inst in instance_list)
4797
    else:
4798
      disk_usage = None
4799

    
4800
    if query.IQ_CONSOLE in self.requested_data:
4801
      consinfo = {}
4802
      for inst in instance_list:
4803
        if inst.name in live_data:
4804
          # Instance is running
4805
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4806
        else:
4807
          consinfo[inst.name] = None
4808
      assert set(consinfo.keys()) == set(instance_names)
4809
    else:
4810
      consinfo = None
4811

    
4812
    if query.IQ_NODES in self.requested_data:
4813
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4814
                                            instance_list)))
4815
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4816
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4817
                    for uuid in set(map(operator.attrgetter("group"),
4818
                                        nodes.values())))
4819
    else:
4820
      nodes = None
4821
      groups = None
4822

    
4823
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4824
                                   disk_usage, offline_nodes, bad_nodes,
4825
                                   live_data, wrongnode_inst, consinfo,
4826
                                   nodes, groups)
4827

    
4828

    
4829
class LUQuery(NoHooksLU):
4830
  """Query for resources/items of a certain kind.
4831

4832
  """
4833
  # pylint: disable=W0142
4834
  REQ_BGL = False
4835

    
4836
  def CheckArguments(self):
4837
    qcls = _GetQueryImplementation(self.op.what)
4838

    
4839
    self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4840

    
4841
  def ExpandNames(self):
4842
    self.impl.ExpandNames(self)
4843

    
4844
  def DeclareLocks(self, level):
4845
    self.impl.DeclareLocks(self, level)
4846

    
4847
  def Exec(self, feedback_fn):
4848
    return self.impl.NewStyleQuery(self)
4849

    
4850

    
4851
class LUQueryFields(NoHooksLU):
4852
  """Query for resources/items of a certain kind.
4853

4854
  """
4855
  # pylint: disable=W0142
4856
  REQ_BGL = False
4857

    
4858
  def CheckArguments(self):
4859
    self.qcls = _GetQueryImplementation(self.op.what)
4860

    
4861
  def ExpandNames(self):
4862
    self.needed_locks = {}
4863

    
4864
  def Exec(self, feedback_fn):
4865
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4866

    
4867

    
4868
class LUNodeModifyStorage(NoHooksLU):
4869
  """Logical unit for modifying a storage volume on a node.
4870

4871
  """
4872
  REQ_BGL = False
4873

    
4874
  def CheckArguments(self):
4875
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4876

    
4877
    storage_type = self.op.storage_type
4878

    
4879
    try:
4880
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4881
    except KeyError:
4882
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4883
                                 " modified" % storage_type,
4884
                                 errors.ECODE_INVAL)
4885

    
4886
    diff = set(self.op.changes.keys()) - modifiable
4887
    if diff:
4888
      raise errors.OpPrereqError("The following fields can not be modified for"
4889
                                 " storage units of type '%s': %r" %
4890
                                 (storage_type, list(diff)),
4891
                                 errors.ECODE_INVAL)
4892

    
4893
  def ExpandNames(self):
4894
    self.needed_locks = {
4895
      locking.LEVEL_NODE: self.op.node_name,
4896
      }
4897

    
4898
  def Exec(self, feedback_fn):
4899
    """Computes the list of nodes and their attributes.
4900

4901
    """
4902
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4903
    result = self.rpc.call_storage_modify(self.op.node_name,
4904
                                          self.op.storage_type, st_args,
4905
                                          self.op.name, self.op.changes)
4906
    result.Raise("Failed to modify storage unit '%s' on %s" %
4907
                 (self.op.name, self.op.node_name))
4908

    
4909

    
4910
class LUNodeAdd(LogicalUnit):
4911
  """Logical unit for adding node to the cluster.
4912

4913
  """
4914
  HPATH = "node-add"
4915
  HTYPE = constants.HTYPE_NODE
4916
  _NFLAGS = ["master_capable", "vm_capable"]
4917

    
4918
  def CheckArguments(self):
4919
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4920
    # validate/normalize the node name
4921
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4922
                                         family=self.primary_ip_family)
4923
    self.op.node_name = self.hostname.name
4924

    
4925
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4926
      raise errors.OpPrereqError("Cannot readd the master node",
4927
                                 errors.ECODE_STATE)
4928

    
4929
    if self.op.readd and self.op.group:
4930
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4931
                                 " being readded", errors.ECODE_INVAL)
4932

    
4933
  def BuildHooksEnv(self):
4934
    """Build hooks env.
4935

4936
    This will run on all nodes before, and on all nodes + the new node after.
4937

4938
    """
4939
    return {
4940
      "OP_TARGET": self.op.node_name,
4941
      "NODE_NAME": self.op.node_name,
4942
      "NODE_PIP": self.op.primary_ip,
4943
      "NODE_SIP": self.op.secondary_ip,
4944
      "MASTER_CAPABLE": str(self.op.master_capable),
4945
      "VM_CAPABLE": str(self.op.vm_capable),
4946
      }
4947

    
4948
  def BuildHooksNodes(self):
4949
    """Build hooks nodes.
4950

4951
    """
4952
    # Exclude added node
4953
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4954
    post_nodes = pre_nodes + [self.op.node_name, ]
4955

    
4956
    return (pre_nodes, post_nodes)
4957

    
4958
  def CheckPrereq(self):
4959
    """Check prerequisites.
4960

4961
    This checks:
4962
     - the new node is not already in the config
4963
     - it is resolvable
4964
     - its parameters (single/dual homed) matches the cluster
4965

4966
    Any errors are signaled by raising errors.OpPrereqError.
4967

4968
    """
4969
    cfg = self.cfg
4970
    hostname = self.hostname
4971
    node = hostname.name
4972
    primary_ip = self.op.primary_ip = hostname.ip
4973
    if self.op.secondary_ip is None:
4974
      if self.primary_ip_family == netutils.IP6Address.family:
4975
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4976
                                   " IPv4 address must be given as secondary",
4977
                                   errors.ECODE_INVAL)
4978
      self.op.secondary_ip = primary_ip
4979

    
4980
    secondary_ip = self.op.secondary_ip
4981
    if not netutils.IP4Address.IsValid(secondary_ip):
4982
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4983
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4984

    
4985
    node_list = cfg.GetNodeList()
4986
    if not self.op.readd and node in node_list:
4987
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4988
                                 node, errors.ECODE_EXISTS)
4989
    elif self.op.readd and node not in node_list:
4990
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4991
                                 errors.ECODE_NOENT)
4992

    
4993
    self.changed_primary_ip = False
4994

    
4995
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4996
      if self.op.readd and node == existing_node_name:
4997
        if existing_node.secondary_ip != secondary_ip:
4998
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4999
                                     " address configuration as before",
5000
                                     errors.ECODE_INVAL)
5001
        if existing_node.primary_ip != primary_ip:
5002
          self.changed_primary_ip = True
5003

    
5004
        continue
5005

    
5006
      if (existing_node.primary_ip == primary_ip or
5007
          existing_node.secondary_ip == primary_ip or
5008
          existing_node.primary_ip == secondary_ip or
5009
          existing_node.secondary_ip == secondary_ip):
5010
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5011
                                   " existing node %s" % existing_node.name,
5012
                                   errors.ECODE_NOTUNIQUE)
5013

    
5014
    # After this 'if' block, None is no longer a valid value for the
5015
    # _capable op attributes
5016
    if self.op.readd:
5017
      old_node = self.cfg.GetNodeInfo(node)
5018
      assert old_node is not None, "Can't retrieve locked node %s" % node
5019
      for attr in self._NFLAGS:
5020
        if getattr(self.op, attr) is None:
5021
          setattr(self.op, attr, getattr(old_node, attr))
5022
    else:
5023
      for attr in self._NFLAGS:
5024
        if getattr(self.op, attr) is None:
5025
          setattr(self.op, attr, True)
5026

    
5027
    if self.op.readd and not self.op.vm_capable:
5028
      pri, sec = cfg.GetNodeInstances(node)
5029
      if pri or sec:
5030
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5031
                                   " flag set to false, but it already holds"
5032
                                   " instances" % node,
5033
                                   errors.ECODE_STATE)
5034

    
5035
    # check that the type of the node (single versus dual homed) is the
5036
    # same as for the master
5037
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5038
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5039
    newbie_singlehomed = secondary_ip == primary_ip
5040
    if master_singlehomed != newbie_singlehomed:
5041
      if master_singlehomed:
5042
        raise errors.OpPrereqError("The master has no secondary ip but the"
5043
                                   " new node has one",
5044
                                   errors.ECODE_INVAL)
5045
      else:
5046
        raise errors.OpPrereqError("The master has a secondary ip but the"
5047
                                   " new node doesn't have one",
5048
                                   errors.ECODE_INVAL)
5049

    
5050
    # checks reachability
5051
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5052
      raise errors.OpPrereqError("Node not reachable by ping",
5053
                                 errors.ECODE_ENVIRON)
5054

    
5055
    if not newbie_singlehomed:
5056
      # check reachability from my secondary ip to newbie's secondary ip
5057
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5058
                           source=myself.secondary_ip):
5059
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5060
                                   " based ping to node daemon port",
5061
                                   errors.ECODE_ENVIRON)
5062

    
5063
    if self.op.readd:
5064
      exceptions = [node]
5065
    else:
5066
      exceptions = []
5067

    
5068
    if self.op.master_capable:
5069
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5070
    else:
5071
      self.master_candidate = False
5072

    
5073
    if self.op.readd:
5074
      self.new_node = old_node
5075
    else:
5076
      node_group = cfg.LookupNodeGroup(self.op.group)
5077
      self.new_node = objects.Node(name=node,
5078
                                   primary_ip=primary_ip,
5079
                                   secondary_ip=secondary_ip,
5080
                                   master_candidate=self.master_candidate,
5081
                                   offline=False, drained=False,
5082
                                   group=node_group)
5083

    
5084
    if self.op.ndparams:
5085
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5086

    
5087
    # check connectivity
5088
    result = self.rpc.call_version([self.new_node.name])[self.new_node.name]
5089
    result.Raise("Can't get version information from node %s" % node)
5090
    if constants.PROTOCOL_VERSION == result.payload:
5091
      logging.info("Communication to node %s fine, sw version %s match",
5092
                   node, result.payload)
5093
    else:
5094
      raise errors.OpPrereqError("Version mismatch master version %s,"
5095
                                 " node version %s" %
5096
                                 (constants.PROTOCOL_VERSION, result.payload),
5097
                                 errors.ECODE_ENVIRON)
5098

    
5099
  def Exec(self, feedback_fn):
5100
    """Adds the new node to the cluster.
5101

5102
    """
5103
    new_node = self.new_node
5104
    node = new_node.name
5105

    
5106
    # We adding a new node so we assume it's powered
5107
    new_node.powered = True
5108

    
5109
    # for re-adds, reset the offline/drained/master-candidate flags;
5110
    # we need to reset here, otherwise offline would prevent RPC calls
5111
    # later in the procedure; this also means that if the re-add
5112
    # fails, we are left with a non-offlined, broken node
5113
    if self.op.readd:
5114
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5115
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5116
      # if we demote the node, we do cleanup later in the procedure
5117
      new_node.master_candidate = self.master_candidate
5118
      if self.changed_primary_ip:
5119
        new_node.primary_ip = self.op.primary_ip
5120

    
5121
    # copy the master/vm_capable flags
5122
    for attr in self._NFLAGS:
5123
      setattr(new_node, attr, getattr(self.op, attr))
5124

    
5125
    # notify the user about any possible mc promotion
5126
    if new_node.master_candidate:
5127
      self.LogInfo("Node will be a master candidate")
5128

    
5129
    if self.op.ndparams:
5130
      new_node.ndparams = self.op.ndparams
5131
    else:
5132
      new_node.ndparams = {}
5133

    
5134
    # Add node to our /etc/hosts, and add key to known_hosts
5135
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5136
      master_node = self.cfg.GetMasterNode()
5137
      result = self.rpc.call_etc_hosts_modify(master_node,
5138
                                              constants.ETC_HOSTS_ADD,
5139
                                              self.hostname.name,
5140
                                              self.hostname.ip)
5141
      result.Raise("Can't update hosts file with new host data")
5142

    
5143
    if new_node.secondary_ip != new_node.primary_ip:
5144
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5145
                               False)
5146

    
5147
    node_verify_list = [self.cfg.GetMasterNode()]
5148
    node_verify_param = {
5149
      constants.NV_NODELIST: ([node], {}),
5150
      # TODO: do a node-net-test as well?
5151
    }
5152

    
5153
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5154
                                       self.cfg.GetClusterName())
5155
    for verifier in node_verify_list:
5156
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5157
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5158
      if nl_payload:
5159
        for failed in nl_payload:
5160
          feedback_fn("ssh/hostname verification failed"
5161
                      " (checking from %s): %s" %
5162
                      (verifier, nl_payload[failed]))
5163
        raise errors.OpExecError("ssh/hostname verification failed")
5164

    
5165
    if self.op.readd:
5166
      _RedistributeAncillaryFiles(self)
5167
      self.context.ReaddNode(new_node)
5168
      # make sure we redistribute the config
5169
      self.cfg.Update(new_node, feedback_fn)
5170
      # and make sure the new node will not have old files around
5171
      if not new_node.master_candidate:
5172
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5173
        msg = result.fail_msg
5174
        if msg:
5175
          self.LogWarning("Node failed to demote itself from master"
5176
                          " candidate status: %s" % msg)
5177
    else:
5178
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5179
                                  additional_vm=self.op.vm_capable)
5180
      self.context.AddNode(new_node, self.proc.GetECId())
5181

    
5182

    
5183
class LUNodeSetParams(LogicalUnit):
5184
  """Modifies the parameters of a node.
5185

5186
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5187
      to the node role (as _ROLE_*)
5188
  @cvar _R2F: a dictionary from node role to tuples of flags
5189
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5190

5191
  """
5192
  HPATH = "node-modify"
5193
  HTYPE = constants.HTYPE_NODE
5194
  REQ_BGL = False
5195
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5196
  _F2R = {
5197
    (True, False, False): _ROLE_CANDIDATE,
5198
    (False, True, False): _ROLE_DRAINED,
5199
    (False, False, True): _ROLE_OFFLINE,
5200
    (False, False, False): _ROLE_REGULAR,
5201
    }
5202
  _R2F = dict((v, k) for k, v in _F2R.items())
5203
  _FLAGS = ["master_candidate", "drained", "offline"]
5204

    
5205
  def CheckArguments(self):
5206
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5207
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5208
                self.op.master_capable, self.op.vm_capable,
5209
                self.op.secondary_ip, self.op.ndparams]
5210
    if all_mods.count(None) == len(all_mods):
5211
      raise errors.OpPrereqError("Please pass at least one modification",
5212
                                 errors.ECODE_INVAL)
5213
    if all_mods.count(True) > 1:
5214
      raise errors.OpPrereqError("Can't set the node into more than one"
5215
                                 " state at the same time",
5216
                                 errors.ECODE_INVAL)
5217

    
5218
    # Boolean value that tells us whether we might be demoting from MC
5219
    self.might_demote = (self.op.master_candidate == False or
5220
                         self.op.offline == True or
5221
                         self.op.drained == True or
5222
                         self.op.master_capable == False)
5223

    
5224
    if self.op.secondary_ip:
5225
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5226
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5227
                                   " address" % self.op.secondary_ip,
5228
                                   errors.ECODE_INVAL)
5229

    
5230
    self.lock_all = self.op.auto_promote and self.might_demote
5231
    self.lock_instances = self.op.secondary_ip is not None
5232

    
5233
  def ExpandNames(self):
5234
    if self.lock_all:
5235
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5236
    else:
5237
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5238

    
5239
    if self.lock_instances:
5240
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5241

    
5242
  def DeclareLocks(self, level):
5243
    # If we have locked all instances, before waiting to lock nodes, release
5244
    # all the ones living on nodes unrelated to the current operation.
5245
    if level == locking.LEVEL_NODE and self.lock_instances:
5246
      self.affected_instances = []
5247
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5248
        instances_keep = []
5249

    
5250
        # Build list of instances to release
5251
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5252
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5253
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5254
              self.op.node_name in instance.all_nodes):
5255
            instances_keep.append(instance_name)
5256
            self.affected_instances.append(instance)
5257

    
5258
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5259

    
5260
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5261
                set(instances_keep))
5262

    
5263
  def BuildHooksEnv(self):
5264
    """Build hooks env.
5265

5266
    This runs on the master node.
5267

5268
    """
5269
    return {
5270
      "OP_TARGET": self.op.node_name,
5271
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5272
      "OFFLINE": str(self.op.offline),
5273
      "DRAINED": str(self.op.drained),
5274
      "MASTER_CAPABLE": str(self.op.master_capable),
5275
      "VM_CAPABLE": str(self.op.vm_capable),
5276
      }
5277

    
5278
  def BuildHooksNodes(self):
5279
    """Build hooks nodes.
5280

5281
    """
5282
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5283
    return (nl, nl)
5284

    
5285
  def CheckPrereq(self):
5286
    """Check prerequisites.
5287

5288
    This only checks the instance list against the existing names.
5289

5290
    """
5291
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5292

    
5293
    if (self.op.master_candidate is not None or
5294
        self.op.drained is not None or
5295
        self.op.offline is not None):
5296
      # we can't change the master's node flags
5297
      if self.op.node_name == self.cfg.GetMasterNode():
5298
        raise errors.OpPrereqError("The master role can be changed"
5299
                                   " only via master-failover",
5300
                                   errors.ECODE_INVAL)
5301

    
5302
    if self.op.master_candidate and not node.master_capable:
5303
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5304
                                 " it a master candidate" % node.name,
5305
                                 errors.ECODE_STATE)
5306

    
5307
    if self.op.vm_capable == False:
5308
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5309
      if ipri or isec:
5310
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5311
                                   " the vm_capable flag" % node.name,
5312
                                   errors.ECODE_STATE)
5313

    
5314
    if node.master_candidate and self.might_demote and not self.lock_all:
5315
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5316
      # check if after removing the current node, we're missing master
5317
      # candidates
5318
      (mc_remaining, mc_should, _) = \
5319
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5320
      if mc_remaining < mc_should:
5321
        raise errors.OpPrereqError("Not enough master candidates, please"
5322
                                   " pass auto promote option to allow"
5323
                                   " promotion", errors.ECODE_STATE)
5324

    
5325
    self.old_flags = old_flags = (node.master_candidate,
5326
                                  node.drained, node.offline)
5327
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5328
    self.old_role = old_role = self._F2R[old_flags]
5329

    
5330
    # Check for ineffective changes
5331
    for attr in self._FLAGS:
5332
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5333
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5334
        setattr(self.op, attr, None)
5335

    
5336
    # Past this point, any flag change to False means a transition
5337
    # away from the respective state, as only real changes are kept
5338

    
5339
    # TODO: We might query the real power state if it supports OOB
5340
    if _SupportsOob(self.cfg, node):
5341
      if self.op.offline is False and not (node.powered or
5342
                                           self.op.powered == True):
5343
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5344
                                    " offline status can be reset") %
5345
                                   self.op.node_name)
5346
    elif self.op.powered is not None:
5347
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5348
                                  " as it does not support out-of-band"
5349
                                  " handling") % self.op.node_name)
5350

    
5351
    # If we're being deofflined/drained, we'll MC ourself if needed
5352
    if (self.op.drained == False or self.op.offline == False or
5353
        (self.op.master_capable and not node.master_capable)):
5354
      if _DecideSelfPromotion(self):
5355
        self.op.master_candidate = True
5356
        self.LogInfo("Auto-promoting node to master candidate")
5357

    
5358
    # If we're no longer master capable, we'll demote ourselves from MC
5359
    if self.op.master_capable == False and node.master_candidate:
5360
      self.LogInfo("Demoting from master candidate")
5361
      self.op.master_candidate = False
5362

    
5363
    # Compute new role
5364
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5365
    if self.op.master_candidate:
5366
      new_role = self._ROLE_CANDIDATE
5367
    elif self.op.drained:
5368
      new_role = self._ROLE_DRAINED
5369
    elif self.op.offline:
5370
      new_role = self._ROLE_OFFLINE
5371
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5372
      # False is still in new flags, which means we're un-setting (the
5373
      # only) True flag
5374
      new_role = self._ROLE_REGULAR
5375
    else: # no new flags, nothing, keep old role
5376
      new_role = old_role
5377

    
5378
    self.new_role = new_role
5379

    
5380
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5381
      # Trying to transition out of offline status
5382
      result = self.rpc.call_version([node.name])[node.name]
5383
      if result.fail_msg:
5384
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5385
                                   " to report its version: %s" %
5386
                                   (node.name, result.fail_msg),
5387
                                   errors.ECODE_STATE)
5388
      else:
5389
        self.LogWarning("Transitioning node from offline to online state"
5390
                        " without using re-add. Please make sure the node"
5391
                        " is healthy!")
5392

    
5393
    if self.op.secondary_ip:
5394
      # Ok even without locking, because this can't be changed by any LU
5395
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5396
      master_singlehomed = master.secondary_ip == master.primary_ip
5397
      if master_singlehomed and self.op.secondary_ip:
5398
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5399
                                   " homed cluster", errors.ECODE_INVAL)
5400

    
5401
      if node.offline:
5402
        if self.affected_instances:
5403
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5404
                                     " node has instances (%s) configured"
5405
                                     " to use it" % self.affected_instances)
5406
      else:
5407
        # On online nodes, check that no instances are running, and that
5408
        # the node has the new ip and we can reach it.
5409
        for instance in self.affected_instances:
5410
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5411

    
5412
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5413
        if master.name != node.name:
5414
          # check reachability from master secondary ip to new secondary ip
5415
          if not netutils.TcpPing(self.op.secondary_ip,
5416
                                  constants.DEFAULT_NODED_PORT,
5417
                                  source=master.secondary_ip):
5418
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5419
                                       " based ping to node daemon port",
5420
                                       errors.ECODE_ENVIRON)
5421

    
5422
    if self.op.ndparams:
5423
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5424
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5425
      self.new_ndparams = new_ndparams
5426

    
5427
  def Exec(self, feedback_fn):
5428
    """Modifies a node.
5429

5430
    """
5431
    node = self.node
5432
    old_role = self.old_role
5433
    new_role = self.new_role
5434

    
5435
    result = []
5436

    
5437
    if self.op.ndparams:
5438
      node.ndparams = self.new_ndparams
5439

    
5440
    if self.op.powered is not None:
5441
      node.powered = self.op.powered
5442

    
5443
    for attr in ["master_capable", "vm_capable"]:
5444
      val = getattr(self.op, attr)
5445
      if val is not None:
5446
        setattr(node, attr, val)
5447
        result.append((attr, str(val)))
5448

    
5449
    if new_role != old_role:
5450
      # Tell the node to demote itself, if no longer MC and not offline
5451
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5452
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5453
        if msg:
5454
          self.LogWarning("Node failed to demote itself: %s", msg)
5455

    
5456
      new_flags = self._R2F[new_role]
5457
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5458
        if of != nf:
5459
          result.append((desc, str(nf)))
5460
      (node.master_candidate, node.drained, node.offline) = new_flags
5461

    
5462
      # we locked all nodes, we adjust the CP before updating this node
5463
      if self.lock_all:
5464
        _AdjustCandidatePool(self, [node.name])
5465

    
5466
    if self.op.secondary_ip:
5467
      node.secondary_ip = self.op.secondary_ip
5468
      result.append(("secondary_ip", self.op.secondary_ip))
5469

    
5470
    # this will trigger configuration file update, if needed
5471
    self.cfg.Update(node, feedback_fn)
5472

    
5473
    # this will trigger job queue propagation or cleanup if the mc
5474
    # flag changed
5475
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5476
      self.context.ReaddNode(node)
5477

    
5478
    return result
5479

    
5480

    
5481
class LUNodePowercycle(NoHooksLU):
5482
  """Powercycles a node.
5483

5484
  """
5485
  REQ_BGL = False
5486

    
5487
  def CheckArguments(self):
5488
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5489
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5490
      raise errors.OpPrereqError("The node is the master and the force"
5491
                                 " parameter was not set",
5492
                                 errors.ECODE_INVAL)
5493

    
5494
  def ExpandNames(self):
5495
    """Locking for PowercycleNode.
5496

5497
    This is a last-resort option and shouldn't block on other
5498
    jobs. Therefore, we grab no locks.
5499

5500
    """
5501
    self.needed_locks = {}
5502

    
5503
  def Exec(self, feedback_fn):
5504
    """Reboots a node.
5505

5506
    """
5507
    result = self.rpc.call_node_powercycle(self.op.node_name,
5508
                                           self.cfg.GetHypervisorType())
5509
    result.Raise("Failed to schedule the reboot")
5510
    return result.payload
5511

    
5512

    
5513
class LUClusterQuery(NoHooksLU):
5514
  """Query cluster configuration.
5515

5516
  """
5517
  REQ_BGL = False
5518

    
5519
  def ExpandNames(self):
5520
    self.needed_locks = {}
5521

    
5522
  def Exec(self, feedback_fn):
5523
    """Return cluster config.
5524

5525
    """
5526
    cluster = self.cfg.GetClusterInfo()
5527
    os_hvp = {}
5528

    
5529
    # Filter just for enabled hypervisors
5530
    for os_name, hv_dict in cluster.os_hvp.items():
5531
      os_hvp[os_name] = {}
5532
      for hv_name, hv_params in hv_dict.items():
5533
        if hv_name in cluster.enabled_hypervisors:
5534
          os_hvp[os_name][hv_name] = hv_params
5535

    
5536
    # Convert ip_family to ip_version
5537
    primary_ip_version = constants.IP4_VERSION
5538
    if cluster.primary_ip_family == netutils.IP6Address.family:
5539
      primary_ip_version = constants.IP6_VERSION
5540

    
5541
    result = {
5542
      "software_version": constants.RELEASE_VERSION,
5543
      "protocol_version": constants.PROTOCOL_VERSION,
5544
      "config_version": constants.CONFIG_VERSION,
5545
      "os_api_version": max(constants.OS_API_VERSIONS),
5546
      "export_version": constants.EXPORT_VERSION,
5547
      "architecture": runtime.GetArchInfo(),
5548
      "name": cluster.cluster_name,
5549
      "master": cluster.master_node,
5550
      "default_hypervisor": cluster.enabled_hypervisors[0],
5551
      "enabled_hypervisors": cluster.enabled_hypervisors,
5552
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5553
                        for hypervisor_name in cluster.enabled_hypervisors]),
5554
      "os_hvp": os_hvp,
5555
      "beparams": cluster.beparams,
5556
      "osparams": cluster.osparams,
5557
      "nicparams": cluster.nicparams,
5558
      "ndparams": cluster.ndparams,
5559
      "candidate_pool_size": cluster.candidate_pool_size,
5560
      "master_netdev": cluster.master_netdev,
5561
      "volume_group_name": cluster.volume_group_name,
5562
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5563
      "file_storage_dir": cluster.file_storage_dir,
5564
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5565
      "maintain_node_health": cluster.maintain_node_health,
5566
      "ctime": cluster.ctime,
5567
      "mtime": cluster.mtime,
5568
      "uuid": cluster.uuid,
5569
      "tags": list(cluster.GetTags()),
5570
      "uid_pool": cluster.uid_pool,
5571
      "default_iallocator": cluster.default_iallocator,
5572
      "reserved_lvs": cluster.reserved_lvs,
5573
      "primary_ip_version": primary_ip_version,
5574
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5575
      "hidden_os": cluster.hidden_os,
5576
      "blacklisted_os": cluster.blacklisted_os,
5577
      }
5578

    
5579
    return result
5580

    
5581

    
5582
class LUClusterConfigQuery(NoHooksLU):
5583
  """Return configuration values.
5584

5585
  """
5586
  REQ_BGL = False
5587
  _FIELDS_DYNAMIC = utils.FieldSet()
5588
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5589
                                  "watcher_pause", "volume_group_name")
5590

    
5591
  def CheckArguments(self):
5592
    _CheckOutputFields(static=self._FIELDS_STATIC,
5593
                       dynamic=self._FIELDS_DYNAMIC,
5594
                       selected=self.op.output_fields)
5595

    
5596
  def ExpandNames(self):
5597
    self.needed_locks = {}
5598

    
5599
  def Exec(self, feedback_fn):
5600
    """Dump a representation of the cluster config to the standard output.
5601

5602
    """
5603
    values = []
5604
    for field in self.op.output_fields:
5605
      if field == "cluster_name":
5606
        entry = self.cfg.GetClusterName()
5607
      elif field == "master_node":
5608
        entry = self.cfg.GetMasterNode()
5609
      elif field == "drain_flag":
5610
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5611
      elif field == "watcher_pause":
5612
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5613
      elif field == "volume_group_name":
5614
        entry = self.cfg.GetVGName()
5615
      else:
5616
        raise errors.ParameterError(field)
5617
      values.append(entry)
5618
    return values
5619

    
5620

    
5621
class LUInstanceActivateDisks(NoHooksLU):
5622
  """Bring up an instance's disks.
5623

5624
  """
5625
  REQ_BGL = False
5626

    
5627
  def ExpandNames(self):
5628
    self._ExpandAndLockInstance()
5629
    self.needed_locks[locking.LEVEL_NODE] = []
5630
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5631

    
5632
  def DeclareLocks(self, level):
5633
    if level == locking.LEVEL_NODE:
5634
      self._LockInstancesNodes()
5635

    
5636
  def CheckPrereq(self):
5637
    """Check prerequisites.
5638

5639
    This checks that the instance is in the cluster.
5640

5641
    """
5642
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5643
    assert self.instance is not None, \
5644
      "Cannot retrieve locked instance %s" % self.op.instance_name
5645
    _CheckNodeOnline(self, self.instance.primary_node)
5646

    
5647
  def Exec(self, feedback_fn):
5648
    """Activate the disks.
5649

5650
    """
5651
    disks_ok, disks_info = \
5652
              _AssembleInstanceDisks(self, self.instance,
5653
                                     ignore_size=self.op.ignore_size)
5654
    if not disks_ok:
5655
      raise errors.OpExecError("Cannot activate block devices")
5656

    
5657
    return disks_info
5658

    
5659

    
5660
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5661
                           ignore_size=False):
5662
  """Prepare the block devices for an instance.
5663

5664
  This sets up the block devices on all nodes.
5665

5666
  @type lu: L{LogicalUnit}
5667
  @param lu: the logical unit on whose behalf we execute
5668
  @type instance: L{objects.Instance}
5669
  @param instance: the instance for whose disks we assemble
5670
  @type disks: list of L{objects.Disk} or None
5671
  @param disks: which disks to assemble (or all, if None)
5672
  @type ignore_secondaries: boolean
5673
  @param ignore_secondaries: if true, errors on secondary nodes
5674
      won't result in an error return from the function
5675
  @type ignore_size: boolean
5676
  @param ignore_size: if true, the current known size of the disk
5677
      will not be used during the disk activation, useful for cases
5678
      when the size is wrong
5679
  @return: False if the operation failed, otherwise a list of
5680
      (host, instance_visible_name, node_visible_name)
5681
      with the mapping from node devices to instance devices
5682

5683
  """
5684
  device_info = []
5685
  disks_ok = True
5686
  iname = instance.name
5687
  disks = _ExpandCheckDisks(instance, disks)
5688

    
5689
  # With the two passes mechanism we try to reduce the window of
5690
  # opportunity for the race condition of switching DRBD to primary
5691
  # before handshaking occured, but we do not eliminate it
5692

    
5693
  # The proper fix would be to wait (with some limits) until the
5694
  # connection has been made and drbd transitions from WFConnection
5695
  # into any other network-connected state (Connected, SyncTarget,
5696
  # SyncSource, etc.)
5697

    
5698
  # 1st pass, assemble on all nodes in secondary mode
5699
  for idx, inst_disk in enumerate(disks):
5700
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5701
      if ignore_size:
5702
        node_disk = node_disk.Copy()
5703
        node_disk.UnsetSize()
5704
      lu.cfg.SetDiskID(node_disk, node)
5705
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5706
      msg = result.fail_msg
5707
      if msg:
5708
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5709
                           " (is_primary=False, pass=1): %s",
5710
                           inst_disk.iv_name, node, msg)
5711
        if not ignore_secondaries:
5712
          disks_ok = False
5713

    
5714
  # FIXME: race condition on drbd migration to primary
5715

    
5716
  # 2nd pass, do only the primary node
5717
  for idx, inst_disk in enumerate(disks):
5718
    dev_path = None
5719

    
5720
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5721
      if node != instance.primary_node:
5722
        continue
5723
      if ignore_size:
5724
        node_disk = node_disk.Copy()
5725
        node_disk.UnsetSize()
5726
      lu.cfg.SetDiskID(node_disk, node)
5727
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5728
      msg = result.fail_msg
5729
      if msg:
5730
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5731
                           " (is_primary=True, pass=2): %s",
5732
                           inst_disk.iv_name, node, msg)
5733
        disks_ok = False
5734
      else:
5735
        dev_path = result.payload
5736

    
5737
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5738

    
5739
  # leave the disks configured for the primary node
5740
  # this is a workaround that would be fixed better by
5741
  # improving the logical/physical id handling
5742
  for disk in disks:
5743
    lu.cfg.SetDiskID(disk, instance.primary_node)
5744

    
5745
  return disks_ok, device_info
5746

    
5747

    
5748
def _StartInstanceDisks(lu, instance, force):
5749
  """Start the disks of an instance.
5750

5751
  """
5752
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5753
                                           ignore_secondaries=force)
5754
  if not disks_ok:
5755
    _ShutdownInstanceDisks(lu, instance)
5756
    if force is not None and not force:
5757
      lu.proc.LogWarning("", hint="If the message above refers to a"
5758
                         " secondary node,"
5759
                         " you can retry the operation using '--force'.")
5760
    raise errors.OpExecError("Disk consistency error")
5761

    
5762

    
5763
class LUInstanceDeactivateDisks(NoHooksLU):
5764
  """Shutdown an instance's disks.
5765

5766
  """
5767
  REQ_BGL = False
5768

    
5769
  def ExpandNames(self):
5770
    self._ExpandAndLockInstance()
5771
    self.needed_locks[locking.LEVEL_NODE] = []
5772
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5773

    
5774
  def DeclareLocks(self, level):
5775
    if level == locking.LEVEL_NODE:
5776
      self._LockInstancesNodes()
5777

    
5778
  def CheckPrereq(self):
5779
    """Check prerequisites.
5780

5781
    This checks that the instance is in the cluster.
5782

5783
    """
5784
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5785
    assert self.instance is not None, \
5786
      "Cannot retrieve locked instance %s" % self.op.instance_name
5787

    
5788
  def Exec(self, feedback_fn):
5789
    """Deactivate the disks
5790

5791
    """
5792
    instance = self.instance
5793
    if self.op.force:
5794
      _ShutdownInstanceDisks(self, instance)
5795
    else:
5796
      _SafeShutdownInstanceDisks(self, instance)
5797

    
5798

    
5799
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5800
  """Shutdown block devices of an instance.
5801

5802
  This function checks if an instance is running, before calling
5803
  _ShutdownInstanceDisks.
5804

5805
  """
5806
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5807
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5808

    
5809

    
5810
def _ExpandCheckDisks(instance, disks):
5811
  """Return the instance disks selected by the disks list
5812

5813
  @type disks: list of L{objects.Disk} or None
5814
  @param disks: selected disks
5815
  @rtype: list of L{objects.Disk}
5816
  @return: selected instance disks to act on
5817

5818
  """
5819
  if disks is None:
5820
    return instance.disks
5821
  else:
5822
    if not set(disks).issubset(instance.disks):
5823
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5824
                                   " target instance")
5825
    return disks
5826

    
5827

    
5828
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5829
  """Shutdown block devices of an instance.
5830

5831
  This does the shutdown on all nodes of the instance.
5832

5833
  If the ignore_primary is false, errors on the primary node are
5834
  ignored.
5835

5836
  """
5837
  all_result = True
5838
  disks = _ExpandCheckDisks(instance, disks)
5839

    
5840
  for disk in disks:
5841
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5842
      lu.cfg.SetDiskID(top_disk, node)
5843
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5844
      msg = result.fail_msg
5845
      if msg:
5846
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5847
                      disk.iv_name, node, msg)
5848
        if ((node == instance.primary_node and not ignore_primary) or
5849
            (node != instance.primary_node and not result.offline)):
5850
          all_result = False
5851
  return all_result
5852

    
5853

    
5854
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5855
  """Checks if a node has enough free memory.
5856

5857
  This function check if a given node has the needed amount of free
5858
  memory. In case the node has less memory or we cannot get the
5859
  information from the node, this function raise an OpPrereqError
5860
  exception.
5861

5862
  @type lu: C{LogicalUnit}
5863
  @param lu: a logical unit from which we get configuration data
5864
  @type node: C{str}
5865
  @param node: the node to check
5866
  @type reason: C{str}
5867
  @param reason: string to use in the error message
5868
  @type requested: C{int}
5869
  @param requested: the amount of memory in MiB to check for
5870
  @type hypervisor_name: C{str}
5871
  @param hypervisor_name: the hypervisor to ask for memory stats
5872
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5873
      we cannot check the node
5874

5875
  """
5876
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5877
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5878
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5879
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5880
  if not isinstance(free_mem, int):
5881
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5882
                               " was '%s'" % (node, free_mem),
5883
                               errors.ECODE_ENVIRON)
5884
  if requested > free_mem:
5885
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5886
                               " needed %s MiB, available %s MiB" %
5887
                               (node, reason, requested, free_mem),
5888
                               errors.ECODE_NORES)
5889

    
5890

    
5891
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5892
  """Checks if nodes have enough free disk space in the all VGs.
5893

5894
  This function check if all given nodes have the needed amount of
5895
  free disk. In case any node has less disk or we cannot get the
5896
  information from the node, this function raise an OpPrereqError
5897
  exception.
5898

5899
  @type lu: C{LogicalUnit}
5900
  @param lu: a logical unit from which we get configuration data
5901
  @type nodenames: C{list}
5902
  @param nodenames: the list of node names to check
5903
  @type req_sizes: C{dict}
5904
  @param req_sizes: the hash of vg and corresponding amount of disk in
5905
      MiB to check for
5906
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5907
      or we cannot check the node
5908

5909
  """
5910
  for vg, req_size in req_sizes.items():
5911
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5912

    
5913

    
5914
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5915
  """Checks if nodes have enough free disk space in the specified VG.
5916

5917
  This function check if all given nodes have the needed amount of
5918
  free disk. In case any node has less disk or we cannot get the
5919
  information from the node, this function raise an OpPrereqError
5920
  exception.
5921

5922
  @type lu: C{LogicalUnit}
5923
  @param lu: a logical unit from which we get configuration data
5924
  @type nodenames: C{list}
5925
  @param nodenames: the list of node names to check
5926
  @type vg: C{str}
5927
  @param vg: the volume group to check
5928
  @type requested: C{int}
5929
  @param requested: the amount of disk in MiB to check for
5930
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5931
      or we cannot check the node
5932

5933
  """
5934
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5935
  for node in nodenames:
5936
    info = nodeinfo[node]
5937
    info.Raise("Cannot get current information from node %s" % node,
5938
               prereq=True, ecode=errors.ECODE_ENVIRON)
5939
    vg_free = info.payload.get("vg_free", None)
5940
    if not isinstance(vg_free, int):
5941
      raise errors.OpPrereqError("Can't compute free disk space on node"
5942
                                 " %s for vg %s, result was '%s'" %
5943
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5944
    if requested > vg_free:
5945
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5946
                                 " vg %s: required %d MiB, available %d MiB" %
5947
                                 (node, vg, requested, vg_free),
5948
                                 errors.ECODE_NORES)
5949

    
5950

    
5951
class LUInstanceStartup(LogicalUnit):
5952
  """Starts an instance.
5953

5954
  """
5955
  HPATH = "instance-start"
5956
  HTYPE = constants.HTYPE_INSTANCE
5957
  REQ_BGL = False
5958

    
5959
  def CheckArguments(self):
5960
    # extra beparams
5961
    if self.op.beparams:
5962
      # fill the beparams dict
5963
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5964

    
5965
  def ExpandNames(self):
5966
    self._ExpandAndLockInstance()
5967

    
5968
  def BuildHooksEnv(self):
5969
    """Build hooks env.
5970

5971
    This runs on master, primary and secondary nodes of the instance.
5972

5973
    """
5974
    env = {
5975
      "FORCE": self.op.force,
5976
      }
5977

    
5978
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5979

    
5980
    return env
5981

    
5982
  def BuildHooksNodes(self):
5983
    """Build hooks nodes.
5984

5985
    """
5986
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5987
    return (nl, nl)
5988

    
5989
  def CheckPrereq(self):
5990
    """Check prerequisites.
5991

5992
    This checks that the instance is in the cluster.
5993

5994
    """
5995
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5996
    assert self.instance is not None, \
5997
      "Cannot retrieve locked instance %s" % self.op.instance_name
5998

    
5999
    # extra hvparams
6000
    if self.op.hvparams:
6001
      # check hypervisor parameter syntax (locally)
6002
      cluster = self.cfg.GetClusterInfo()
6003
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6004
      filled_hvp = cluster.FillHV(instance)
6005
      filled_hvp.update(self.op.hvparams)
6006
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6007
      hv_type.CheckParameterSyntax(filled_hvp)
6008
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6009

    
6010
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6011

    
6012
    if self.primary_offline and self.op.ignore_offline_nodes:
6013
      self.proc.LogWarning("Ignoring offline primary node")
6014

    
6015
      if self.op.hvparams or self.op.beparams:
6016
        self.proc.LogWarning("Overridden parameters are ignored")
6017
    else:
6018
      _CheckNodeOnline(self, instance.primary_node)
6019

    
6020
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6021

    
6022
      # check bridges existence
6023
      _CheckInstanceBridgesExist(self, instance)
6024

    
6025
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6026
                                                instance.name,
6027
                                                instance.hypervisor)
6028
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6029
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6030
      if not remote_info.payload: # not running already
6031
        _CheckNodeFreeMemory(self, instance.primary_node,
6032
                             "starting instance %s" % instance.name,
6033
                             bep[constants.BE_MEMORY], instance.hypervisor)
6034

    
6035
  def Exec(self, feedback_fn):
6036
    """Start the instance.
6037

6038
    """
6039
    instance = self.instance
6040
    force = self.op.force
6041

    
6042
    if not self.op.no_remember:
6043
      self.cfg.MarkInstanceUp(instance.name)
6044

    
6045
    if self.primary_offline:
6046
      assert self.op.ignore_offline_nodes
6047
      self.proc.LogInfo("Primary node offline, marked instance as started")
6048
    else:
6049
      node_current = instance.primary_node
6050

    
6051
      _StartInstanceDisks(self, instance, force)
6052

    
6053
      result = self.rpc.call_instance_start(node_current, instance,
6054
                                            self.op.hvparams, self.op.beparams,
6055
                                            self.op.startup_paused)
6056
      msg = result.fail_msg
6057
      if msg:
6058
        _ShutdownInstanceDisks(self, instance)
6059
        raise errors.OpExecError("Could not start instance: %s" % msg)
6060

    
6061

    
6062
class LUInstanceReboot(LogicalUnit):
6063
  """Reboot an instance.
6064

6065
  """
6066
  HPATH = "instance-reboot"
6067
  HTYPE = constants.HTYPE_INSTANCE
6068
  REQ_BGL = False
6069

    
6070
  def ExpandNames(self):
6071
    self._ExpandAndLockInstance()
6072

    
6073
  def BuildHooksEnv(self):
6074
    """Build hooks env.
6075

6076
    This runs on master, primary and secondary nodes of the instance.
6077

6078
    """
6079
    env = {
6080
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6081
      "REBOOT_TYPE": self.op.reboot_type,
6082
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6083
      }
6084

    
6085
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6086

    
6087
    return env
6088

    
6089
  def BuildHooksNodes(self):
6090
    """Build hooks nodes.
6091

6092
    """
6093
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6094
    return (nl, nl)
6095

    
6096
  def CheckPrereq(self):
6097
    """Check prerequisites.
6098

6099
    This checks that the instance is in the cluster.
6100

6101
    """
6102
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6103
    assert self.instance is not None, \
6104
      "Cannot retrieve locked instance %s" % self.op.instance_name
6105

    
6106
    _CheckNodeOnline(self, instance.primary_node)
6107

    
6108
    # check bridges existence
6109
    _CheckInstanceBridgesExist(self, instance)
6110

    
6111
  def Exec(self, feedback_fn):
6112
    """Reboot the instance.
6113

6114
    """
6115
    instance = self.instance
6116
    ignore_secondaries = self.op.ignore_secondaries
6117
    reboot_type = self.op.reboot_type
6118

    
6119
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6120
                                              instance.name,
6121
                                              instance.hypervisor)
6122
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6123
    instance_running = bool(remote_info.payload)
6124

    
6125
    node_current = instance.primary_node
6126

    
6127
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6128
                                            constants.INSTANCE_REBOOT_HARD]:
6129
      for disk in instance.disks:
6130
        self.cfg.SetDiskID(disk, node_current)
6131
      result = self.rpc.call_instance_reboot(node_current, instance,
6132
                                             reboot_type,
6133
                                             self.op.shutdown_timeout)
6134
      result.Raise("Could not reboot instance")
6135
    else:
6136
      if instance_running:
6137
        result = self.rpc.call_instance_shutdown(node_current, instance,
6138
                                                 self.op.shutdown_timeout)
6139
        result.Raise("Could not shutdown instance for full reboot")
6140
        _ShutdownInstanceDisks(self, instance)
6141
      else:
6142
        self.LogInfo("Instance %s was already stopped, starting now",
6143
                     instance.name)
6144
      _StartInstanceDisks(self, instance, ignore_secondaries)
6145
      result = self.rpc.call_instance_start(node_current, instance,
6146
                                            None, None, False)
6147
      msg = result.fail_msg
6148
      if msg:
6149
        _ShutdownInstanceDisks(self, instance)
6150
        raise errors.OpExecError("Could not start instance for"
6151
                                 " full reboot: %s" % msg)
6152

    
6153
    self.cfg.MarkInstanceUp(instance.name)
6154

    
6155

    
6156
class LUInstanceShutdown(LogicalUnit):
6157
  """Shutdown an instance.
6158

6159
  """
6160
  HPATH = "instance-stop"
6161
  HTYPE = constants.HTYPE_INSTANCE
6162
  REQ_BGL = False
6163

    
6164
  def ExpandNames(self):
6165
    self._ExpandAndLockInstance()
6166

    
6167
  def BuildHooksEnv(self):
6168
    """Build hooks env.
6169

6170
    This runs on master, primary and secondary nodes of the instance.
6171

6172
    """
6173
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6174
    env["TIMEOUT"] = self.op.timeout
6175
    return env
6176

    
6177
  def BuildHooksNodes(self):
6178
    """Build hooks nodes.
6179

6180
    """
6181
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6182
    return (nl, nl)
6183

    
6184
  def CheckPrereq(self):
6185
    """Check prerequisites.
6186

6187
    This checks that the instance is in the cluster.
6188

6189
    """
6190
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6191
    assert self.instance is not None, \
6192
      "Cannot retrieve locked instance %s" % self.op.instance_name
6193

    
6194
    self.primary_offline = \
6195
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6196

    
6197
    if self.primary_offline and self.op.ignore_offline_nodes:
6198
      self.proc.LogWarning("Ignoring offline primary node")
6199
    else:
6200
      _CheckNodeOnline(self, self.instance.primary_node)
6201

    
6202
  def Exec(self, feedback_fn):
6203
    """Shutdown the instance.
6204

6205
    """
6206
    instance = self.instance
6207
    node_current = instance.primary_node
6208
    timeout = self.op.timeout
6209

    
6210
    if not self.op.no_remember:
6211
      self.cfg.MarkInstanceDown(instance.name)
6212

    
6213
    if self.primary_offline:
6214
      assert self.op.ignore_offline_nodes
6215
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6216
    else:
6217
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6218
      msg = result.fail_msg
6219
      if msg:
6220
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6221

    
6222
      _ShutdownInstanceDisks(self, instance)
6223

    
6224

    
6225
class LUInstanceReinstall(LogicalUnit):
6226
  """Reinstall an instance.
6227

6228
  """
6229
  HPATH = "instance-reinstall"
6230
  HTYPE = constants.HTYPE_INSTANCE
6231
  REQ_BGL = False
6232

    
6233
  def ExpandNames(self):
6234
    self._ExpandAndLockInstance()
6235

    
6236
  def BuildHooksEnv(self):
6237
    """Build hooks env.
6238

6239
    This runs on master, primary and secondary nodes of the instance.
6240

6241
    """
6242
    return _BuildInstanceHookEnvByObject(self, self.instance)
6243

    
6244
  def BuildHooksNodes(self):
6245
    """Build hooks nodes.
6246

6247
    """
6248
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6249
    return (nl, nl)
6250

    
6251
  def CheckPrereq(self):
6252
    """Check prerequisites.
6253

6254
    This checks that the instance is in the cluster and is not running.
6255

6256
    """
6257
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6258
    assert instance is not None, \
6259
      "Cannot retrieve locked instance %s" % self.op.instance_name
6260
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6261
                     " offline, cannot reinstall")
6262
    for node in instance.secondary_nodes:
6263
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6264
                       " cannot reinstall")
6265

    
6266
    if instance.disk_template == constants.DT_DISKLESS:
6267
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6268
                                 self.op.instance_name,
6269
                                 errors.ECODE_INVAL)
6270
    _CheckInstanceDown(self, instance, "cannot reinstall")
6271

    
6272
    if self.op.os_type is not None:
6273
      # OS verification
6274
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6275
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6276
      instance_os = self.op.os_type
6277
    else:
6278
      instance_os = instance.os
6279

    
6280
    nodelist = list(instance.all_nodes)
6281

    
6282
    if self.op.osparams:
6283
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6284
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6285
      self.os_inst = i_osdict # the new dict (without defaults)
6286
    else:
6287
      self.os_inst = None
6288

    
6289
    self.instance = instance
6290

    
6291
  def Exec(self, feedback_fn):
6292
    """Reinstall the instance.
6293

6294
    """
6295
    inst = self.instance
6296

    
6297
    if self.op.os_type is not None:
6298
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6299
      inst.os = self.op.os_type
6300
      # Write to configuration
6301
      self.cfg.Update(inst, feedback_fn)
6302

    
6303
    _StartInstanceDisks(self, inst, None)
6304
    try:
6305
      feedback_fn("Running the instance OS create scripts...")
6306
      # FIXME: pass debug option from opcode to backend
6307
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6308
                                             self.op.debug_level,
6309
                                             osparams=self.os_inst)
6310
      result.Raise("Could not install OS for instance %s on node %s" %
6311
                   (inst.name, inst.primary_node))
6312
    finally:
6313
      _ShutdownInstanceDisks(self, inst)
6314

    
6315

    
6316
class LUInstanceRecreateDisks(LogicalUnit):
6317
  """Recreate an instance's missing disks.
6318

6319
  """
6320
  HPATH = "instance-recreate-disks"
6321
  HTYPE = constants.HTYPE_INSTANCE
6322
  REQ_BGL = False
6323

    
6324
  def CheckArguments(self):
6325
    # normalise the disk list
6326
    self.op.disks = sorted(frozenset(self.op.disks))
6327

    
6328
  def ExpandNames(self):
6329
    self._ExpandAndLockInstance()
6330
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6331
    if self.op.nodes:
6332
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6333
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6334
    else:
6335
      self.needed_locks[locking.LEVEL_NODE] = []
6336

    
6337
  def DeclareLocks(self, level):
6338
    if level == locking.LEVEL_NODE:
6339
      # if we replace the nodes, we only need to lock the old primary,
6340
      # otherwise we need to lock all nodes for disk re-creation
6341
      primary_only = bool(self.op.nodes)
6342
      self._LockInstancesNodes(primary_only=primary_only)
6343

    
6344
  def BuildHooksEnv(self):
6345
    """Build hooks env.
6346

6347
    This runs on master, primary and secondary nodes of the instance.
6348

6349
    """
6350
    return _BuildInstanceHookEnvByObject(self, self.instance)
6351

    
6352
  def BuildHooksNodes(self):
6353
    """Build hooks nodes.
6354

6355
    """
6356
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6357
    return (nl, nl)
6358

    
6359
  def CheckPrereq(self):
6360
    """Check prerequisites.
6361

6362
    This checks that the instance is in the cluster and is not running.
6363

6364
    """
6365
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6366
    assert instance is not None, \
6367
      "Cannot retrieve locked instance %s" % self.op.instance_name
6368
    if self.op.nodes:
6369
      if len(self.op.nodes) != len(instance.all_nodes):
6370
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6371
                                   " %d replacement nodes were specified" %
6372
                                   (instance.name, len(instance.all_nodes),
6373
                                    len(self.op.nodes)),
6374
                                   errors.ECODE_INVAL)
6375
      assert instance.disk_template != constants.DT_DRBD8 or \
6376
          len(self.op.nodes) == 2
6377
      assert instance.disk_template != constants.DT_PLAIN or \
6378
          len(self.op.nodes) == 1
6379
      primary_node = self.op.nodes[0]
6380
    else:
6381
      primary_node = instance.primary_node
6382
    _CheckNodeOnline(self, primary_node)
6383

    
6384
    if instance.disk_template == constants.DT_DISKLESS:
6385
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6386
                                 self.op.instance_name, errors.ECODE_INVAL)
6387
    # if we replace nodes *and* the old primary is offline, we don't
6388
    # check
6389
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6390
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6391
    if not (self.op.nodes and old_pnode.offline):
6392
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6393

    
6394
    if not self.op.disks:
6395
      self.op.disks = range(len(instance.disks))
6396
    else:
6397
      for idx in self.op.disks:
6398
        if idx >= len(instance.disks):
6399
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6400
                                     errors.ECODE_INVAL)
6401
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6402
      raise errors.OpPrereqError("Can't recreate disks partially and"
6403
                                 " change the nodes at the same time",
6404
                                 errors.ECODE_INVAL)
6405
    self.instance = instance
6406

    
6407
  def Exec(self, feedback_fn):
6408
    """Recreate the disks.
6409

6410
    """
6411
    instance = self.instance
6412

    
6413
    to_skip = []
6414
    mods = [] # keeps track of needed logical_id changes
6415

    
6416
    for idx, disk in enumerate(instance.disks):
6417
      if idx not in self.op.disks: # disk idx has not been passed in
6418
        to_skip.append(idx)
6419
        continue
6420
      # update secondaries for disks, if needed
6421
      if self.op.nodes:
6422
        if disk.dev_type == constants.LD_DRBD8:
6423
          # need to update the nodes and minors
6424
          assert len(self.op.nodes) == 2
6425
          assert len(disk.logical_id) == 6 # otherwise disk internals
6426
                                           # have changed
6427
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6428
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6429
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6430
                    new_minors[0], new_minors[1], old_secret)
6431
          assert len(disk.logical_id) == len(new_id)
6432
          mods.append((idx, new_id))
6433

    
6434
    # now that we have passed all asserts above, we can apply the mods
6435
    # in a single run (to avoid partial changes)
6436
    for idx, new_id in mods:
6437
      instance.disks[idx].logical_id = new_id
6438

    
6439
    # change primary node, if needed
6440
    if self.op.nodes:
6441
      instance.primary_node = self.op.nodes[0]
6442
      self.LogWarning("Changing the instance's nodes, you will have to"
6443
                      " remove any disks left on the older nodes manually")
6444

    
6445
    if self.op.nodes:
6446
      self.cfg.Update(instance, feedback_fn)
6447

    
6448
    _CreateDisks(self, instance, to_skip=to_skip)
6449

    
6450

    
6451
class LUInstanceRename(LogicalUnit):
6452
  """Rename an instance.
6453

6454
  """
6455
  HPATH = "instance-rename"
6456
  HTYPE = constants.HTYPE_INSTANCE
6457

    
6458
  def CheckArguments(self):
6459
    """Check arguments.
6460

6461
    """
6462
    if self.op.ip_check and not self.op.name_check:
6463
      # TODO: make the ip check more flexible and not depend on the name check
6464
      raise errors.OpPrereqError("IP address check requires a name check",
6465
                                 errors.ECODE_INVAL)
6466

    
6467
  def BuildHooksEnv(self):
6468
    """Build hooks env.
6469

6470
    This runs on master, primary and secondary nodes of the instance.
6471

6472
    """
6473
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6474
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6475
    return env
6476

    
6477
  def BuildHooksNodes(self):
6478
    """Build hooks nodes.
6479

6480
    """
6481
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6482
    return (nl, nl)
6483

    
6484
  def CheckPrereq(self):
6485
    """Check prerequisites.
6486

6487
    This checks that the instance is in the cluster and is not running.
6488

6489
    """
6490
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6491
                                                self.op.instance_name)
6492
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6493
    assert instance is not None
6494
    _CheckNodeOnline(self, instance.primary_node)
6495
    _CheckInstanceDown(self, instance, "cannot rename")
6496
    self.instance = instance
6497

    
6498
    new_name = self.op.new_name
6499
    if self.op.name_check:
6500
      hostname = netutils.GetHostname(name=new_name)
6501
      if hostname.name != new_name:
6502
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6503
                     hostname.name)
6504
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6505
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6506
                                    " same as given hostname '%s'") %
6507
                                    (hostname.name, self.op.new_name),
6508
                                    errors.ECODE_INVAL)
6509
      new_name = self.op.new_name = hostname.name
6510
      if (self.op.ip_check and
6511
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6512
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6513
                                   (hostname.ip, new_name),
6514
                                   errors.ECODE_NOTUNIQUE)
6515

    
6516
    instance_list = self.cfg.GetInstanceList()
6517
    if new_name in instance_list and new_name != instance.name:
6518
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6519
                                 new_name, errors.ECODE_EXISTS)
6520

    
6521
  def Exec(self, feedback_fn):
6522
    """Rename the instance.
6523

6524
    """
6525
    inst = self.instance
6526
    old_name = inst.name
6527

    
6528
    rename_file_storage = False
6529
    if (inst.disk_template in constants.DTS_FILEBASED and
6530
        self.op.new_name != inst.name):
6531
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6532
      rename_file_storage = True
6533

    
6534
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6535
    # Change the instance lock. This is definitely safe while we hold the BGL.
6536
    # Otherwise the new lock would have to be added in acquired mode.
6537
    assert self.REQ_BGL
6538
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6539
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6540

    
6541
    # re-read the instance from the configuration after rename
6542
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6543

    
6544
    if rename_file_storage:
6545
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6546
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6547
                                                     old_file_storage_dir,
6548
                                                     new_file_storage_dir)
6549
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6550
                   " (but the instance has been renamed in Ganeti)" %
6551
                   (inst.primary_node, old_file_storage_dir,
6552
                    new_file_storage_dir))
6553

    
6554
    _StartInstanceDisks(self, inst, None)
6555
    try:
6556
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6557
                                                 old_name, self.op.debug_level)
6558
      msg = result.fail_msg
6559
      if msg:
6560
        msg = ("Could not run OS rename script for instance %s on node %s"
6561
               " (but the instance has been renamed in Ganeti): %s" %
6562
               (inst.name, inst.primary_node, msg))
6563
        self.proc.LogWarning(msg)
6564
    finally:
6565
      _ShutdownInstanceDisks(self, inst)
6566

    
6567
    return inst.name
6568

    
6569

    
6570
class LUInstanceRemove(LogicalUnit):
6571
  """Remove an instance.
6572

6573
  """
6574
  HPATH = "instance-remove"
6575
  HTYPE = constants.HTYPE_INSTANCE
6576
  REQ_BGL = False
6577

    
6578
  def ExpandNames(self):
6579
    self._ExpandAndLockInstance()
6580
    self.needed_locks[locking.LEVEL_NODE] = []
6581
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6582

    
6583
  def DeclareLocks(self, level):
6584
    if level == locking.LEVEL_NODE:
6585
      self._LockInstancesNodes()
6586

    
6587
  def BuildHooksEnv(self):
6588
    """Build hooks env.
6589

6590
    This runs on master, primary and secondary nodes of the instance.
6591

6592
    """
6593
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6594
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6595
    return env
6596

    
6597
  def BuildHooksNodes(self):
6598
    """Build hooks nodes.
6599

6600
    """
6601
    nl = [self.cfg.GetMasterNode()]
6602
    nl_post = list(self.instance.all_nodes) + nl
6603
    return (nl, nl_post)
6604

    
6605
  def CheckPrereq(self):
6606
    """Check prerequisites.
6607

6608
    This checks that the instance is in the cluster.
6609

6610
    """
6611
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6612
    assert self.instance is not None, \
6613
      "Cannot retrieve locked instance %s" % self.op.instance_name
6614

    
6615
  def Exec(self, feedback_fn):
6616
    """Remove the instance.
6617

6618
    """
6619
    instance = self.instance
6620
    logging.info("Shutting down instance %s on node %s",
6621
                 instance.name, instance.primary_node)
6622

    
6623
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6624
                                             self.op.shutdown_timeout)
6625
    msg = result.fail_msg
6626
    if msg:
6627
      if self.op.ignore_failures:
6628
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6629
      else:
6630
        raise errors.OpExecError("Could not shutdown instance %s on"
6631
                                 " node %s: %s" %
6632
                                 (instance.name, instance.primary_node, msg))
6633

    
6634
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6635

    
6636

    
6637
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6638
  """Utility function to remove an instance.
6639

6640
  """
6641
  logging.info("Removing block devices for instance %s", instance.name)
6642

    
6643
  if not _RemoveDisks(lu, instance):
6644
    if not ignore_failures:
6645
      raise errors.OpExecError("Can't remove instance's disks")
6646
    feedback_fn("Warning: can't remove instance's disks")
6647

    
6648
  logging.info("Removing instance %s out of cluster config", instance.name)
6649

    
6650
  lu.cfg.RemoveInstance(instance.name)
6651

    
6652
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6653
    "Instance lock removal conflict"
6654

    
6655
  # Remove lock for the instance
6656
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6657

    
6658

    
6659
class LUInstanceQuery(NoHooksLU):
6660
  """Logical unit for querying instances.
6661

6662
  """
6663
  # pylint: disable=W0142
6664
  REQ_BGL = False
6665

    
6666
  def CheckArguments(self):
6667
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6668
                             self.op.output_fields, self.op.use_locking)
6669

    
6670
  def ExpandNames(self):
6671
    self.iq.ExpandNames(self)
6672

    
6673
  def DeclareLocks(self, level):
6674
    self.iq.DeclareLocks(self, level)
6675

    
6676
  def Exec(self, feedback_fn):
6677
    return self.iq.OldStyleQuery(self)
6678

    
6679

    
6680
class LUInstanceFailover(LogicalUnit):
6681
  """Failover an instance.
6682

6683
  """
6684
  HPATH = "instance-failover"
6685
  HTYPE = constants.HTYPE_INSTANCE
6686
  REQ_BGL = False
6687

    
6688
  def CheckArguments(self):
6689
    """Check the arguments.
6690

6691
    """
6692
    self.iallocator = getattr(self.op, "iallocator", None)
6693
    self.target_node = getattr(self.op, "target_node", None)
6694

    
6695
  def ExpandNames(self):
6696
    self._ExpandAndLockInstance()
6697

    
6698
    if self.op.target_node is not None:
6699
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6700

    
6701
    self.needed_locks[locking.LEVEL_NODE] = []
6702
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6703

    
6704
    ignore_consistency = self.op.ignore_consistency
6705
    shutdown_timeout = self.op.shutdown_timeout
6706
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6707
                                       cleanup=False,
6708
                                       failover=True,
6709
                                       ignore_consistency=ignore_consistency,
6710
                                       shutdown_timeout=shutdown_timeout)
6711
    self.tasklets = [self._migrater]
6712

    
6713
  def DeclareLocks(self, level):
6714
    if level == locking.LEVEL_NODE:
6715
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6716
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6717
        if self.op.target_node is None:
6718
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6719
        else:
6720
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6721
                                                   self.op.target_node]
6722
        del self.recalculate_locks[locking.LEVEL_NODE]
6723
      else:
6724
        self._LockInstancesNodes()
6725

    
6726
  def BuildHooksEnv(self):
6727
    """Build hooks env.
6728

6729
    This runs on master, primary and secondary nodes of the instance.
6730

6731
    """
6732
    instance = self._migrater.instance
6733
    source_node = instance.primary_node
6734
    target_node = self.op.target_node
6735
    env = {
6736
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6737
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6738
      "OLD_PRIMARY": source_node,
6739
      "NEW_PRIMARY": target_node,
6740
      }
6741

    
6742
    if instance.disk_template in constants.DTS_INT_MIRROR:
6743
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6744
      env["NEW_SECONDARY"] = source_node
6745
    else:
6746
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6747

    
6748
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6749

    
6750
    return env
6751

    
6752
  def BuildHooksNodes(self):
6753
    """Build hooks nodes.
6754

6755
    """
6756
    instance = self._migrater.instance
6757
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6758
    return (nl, nl + [instance.primary_node])
6759

    
6760

    
6761
class LUInstanceMigrate(LogicalUnit):
6762
  """Migrate an instance.
6763

6764
  This is migration without shutting down, compared to the failover,
6765
  which is done with shutdown.
6766

6767
  """
6768
  HPATH = "instance-migrate"
6769
  HTYPE = constants.HTYPE_INSTANCE
6770
  REQ_BGL = False
6771

    
6772
  def ExpandNames(self):
6773
    self._ExpandAndLockInstance()
6774

    
6775
    if self.op.target_node is not None:
6776
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6777

    
6778
    self.needed_locks[locking.LEVEL_NODE] = []
6779
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6780

    
6781
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6782
                                       cleanup=self.op.cleanup,
6783
                                       failover=False,
6784
                                       fallback=self.op.allow_failover)
6785
    self.tasklets = [self._migrater]
6786

    
6787
  def DeclareLocks(self, level):
6788
    if level == locking.LEVEL_NODE:
6789
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6790
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6791
        if self.op.target_node is None:
6792
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6793
        else:
6794
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6795
                                                   self.op.target_node]
6796
        del self.recalculate_locks[locking.LEVEL_NODE]
6797
      else:
6798
        self._LockInstancesNodes()
6799

    
6800
  def BuildHooksEnv(self):
6801
    """Build hooks env.
6802

6803
    This runs on master, primary and secondary nodes of the instance.
6804

6805
    """
6806
    instance = self._migrater.instance
6807
    source_node = instance.primary_node
6808
    target_node = self.op.target_node
6809
    env = _BuildInstanceHookEnvByObject(self, instance)
6810
    env.update({
6811
      "MIGRATE_LIVE": self._migrater.live,
6812
      "MIGRATE_CLEANUP": self.op.cleanup,
6813
      "OLD_PRIMARY": source_node,
6814
      "NEW_PRIMARY": target_node,
6815
      })
6816

    
6817
    if instance.disk_template in constants.DTS_INT_MIRROR:
6818
      env["OLD_SECONDARY"] = target_node
6819
      env["NEW_SECONDARY"] = source_node
6820
    else:
6821
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6822

    
6823
    return env
6824

    
6825
  def BuildHooksNodes(self):
6826
    """Build hooks nodes.
6827

6828
    """
6829
    instance = self._migrater.instance
6830
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6831
    return (nl, nl + [instance.primary_node])
6832

    
6833

    
6834
class LUInstanceMove(LogicalUnit):
6835
  """Move an instance by data-copying.
6836

6837
  """
6838
  HPATH = "instance-move"
6839
  HTYPE = constants.HTYPE_INSTANCE
6840
  REQ_BGL = False
6841

    
6842
  def ExpandNames(self):
6843
    self._ExpandAndLockInstance()
6844
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6845
    self.op.target_node = target_node
6846
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6847
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6848

    
6849
  def DeclareLocks(self, level):
6850
    if level == locking.LEVEL_NODE:
6851
      self._LockInstancesNodes(primary_only=True)
6852

    
6853
  def BuildHooksEnv(self):
6854
    """Build hooks env.
6855

6856
    This runs on master, primary and secondary nodes of the instance.
6857

6858
    """
6859
    env = {
6860
      "TARGET_NODE": self.op.target_node,
6861
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6862
      }
6863
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6864
    return env
6865

    
6866
  def BuildHooksNodes(self):
6867
    """Build hooks nodes.
6868

6869
    """
6870
    nl = [
6871
      self.cfg.GetMasterNode(),
6872
      self.instance.primary_node,
6873
      self.op.target_node,
6874
      ]
6875
    return (nl, nl)
6876

    
6877
  def CheckPrereq(self):
6878
    """Check prerequisites.
6879

6880
    This checks that the instance is in the cluster.
6881

6882
    """
6883
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6884
    assert self.instance is not None, \
6885
      "Cannot retrieve locked instance %s" % self.op.instance_name
6886

    
6887
    node = self.cfg.GetNodeInfo(self.op.target_node)
6888
    assert node is not None, \
6889
      "Cannot retrieve locked node %s" % self.op.target_node
6890

    
6891
    self.target_node = target_node = node.name
6892

    
6893
    if target_node == instance.primary_node:
6894
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6895
                                 (instance.name, target_node),
6896
                                 errors.ECODE_STATE)
6897

    
6898
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6899

    
6900
    for idx, dsk in enumerate(instance.disks):
6901
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6902
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6903
                                   " cannot copy" % idx, errors.ECODE_STATE)
6904

    
6905
    _CheckNodeOnline(self, target_node)
6906
    _CheckNodeNotDrained(self, target_node)
6907
    _CheckNodeVmCapable(self, target_node)
6908

    
6909
    if instance.admin_up:
6910
      # check memory requirements on the secondary node
6911
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6912
                           instance.name, bep[constants.BE_MEMORY],
6913
                           instance.hypervisor)
6914
    else:
6915
      self.LogInfo("Not checking memory on the secondary node as"
6916
                   " instance will not be started")
6917

    
6918
    # check bridge existance
6919
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6920

    
6921
  def Exec(self, feedback_fn):
6922
    """Move an instance.
6923

6924
    The move is done by shutting it down on its present node, copying
6925
    the data over (slow) and starting it on the new node.
6926

6927
    """
6928
    instance = self.instance
6929

    
6930
    source_node = instance.primary_node
6931
    target_node = self.target_node
6932

    
6933
    self.LogInfo("Shutting down instance %s on source node %s",
6934
                 instance.name, source_node)
6935

    
6936
    result = self.rpc.call_instance_shutdown(source_node, instance,
6937
                                             self.op.shutdown_timeout)
6938
    msg = result.fail_msg
6939
    if msg:
6940
      if self.op.ignore_consistency:
6941
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6942
                             " Proceeding anyway. Please make sure node"
6943
                             " %s is down. Error details: %s",
6944
                             instance.name, source_node, source_node, msg)
6945
      else:
6946
        raise errors.OpExecError("Could not shutdown instance %s on"
6947
                                 " node %s: %s" %
6948
                                 (instance.name, source_node, msg))
6949

    
6950
    # create the target disks
6951
    try:
6952
      _CreateDisks(self, instance, target_node=target_node)
6953
    except errors.OpExecError:
6954
      self.LogWarning("Device creation failed, reverting...")
6955
      try:
6956
        _RemoveDisks(self, instance, target_node=target_node)
6957
      finally:
6958
        self.cfg.ReleaseDRBDMinors(instance.name)
6959
        raise
6960

    
6961
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6962

    
6963
    errs = []
6964
    # activate, get path, copy the data over
6965
    for idx, disk in enumerate(instance.disks):
6966
      self.LogInfo("Copying data for disk %d", idx)
6967
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6968
                                               instance.name, True, idx)
6969
      if result.fail_msg:
6970
        self.LogWarning("Can't assemble newly created disk %d: %s",
6971
                        idx, result.fail_msg)
6972
        errs.append(result.fail_msg)
6973
        break
6974
      dev_path = result.payload
6975
      result = self.rpc.call_blockdev_export(source_node, disk,
6976
                                             target_node, dev_path,
6977
                                             cluster_name)
6978
      if result.fail_msg:
6979
        self.LogWarning("Can't copy data over for disk %d: %s",
6980
                        idx, result.fail_msg)
6981
        errs.append(result.fail_msg)
6982
        break
6983

    
6984
    if errs:
6985
      self.LogWarning("Some disks failed to copy, aborting")
6986
      try:
6987
        _RemoveDisks(self, instance, target_node=target_node)
6988
      finally:
6989
        self.cfg.ReleaseDRBDMinors(instance.name)
6990
        raise errors.OpExecError("Errors during disk copy: %s" %
6991
                                 (",".join(errs),))
6992

    
6993
    instance.primary_node = target_node
6994
    self.cfg.Update(instance, feedback_fn)
6995

    
6996
    self.LogInfo("Removing the disks on the original node")
6997
    _RemoveDisks(self, instance, target_node=source_node)
6998

    
6999
    # Only start the instance if it's marked as up
7000
    if instance.admin_up:
7001
      self.LogInfo("Starting instance %s on node %s",
7002
                   instance.name, target_node)
7003

    
7004
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7005
                                           ignore_secondaries=True)
7006
      if not disks_ok:
7007
        _ShutdownInstanceDisks(self, instance)
7008
        raise errors.OpExecError("Can't activate the instance's disks")
7009

    
7010
      result = self.rpc.call_instance_start(target_node, instance,
7011
                                            None, None, False)
7012
      msg = result.fail_msg
7013
      if msg:
7014
        _ShutdownInstanceDisks(self, instance)
7015
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7016
                                 (instance.name, target_node, msg))
7017

    
7018

    
7019
class LUNodeMigrate(LogicalUnit):
7020
  """Migrate all instances from a node.
7021

7022
  """
7023
  HPATH = "node-migrate"
7024
  HTYPE = constants.HTYPE_NODE
7025
  REQ_BGL = False
7026

    
7027
  def CheckArguments(self):
7028
    pass
7029

    
7030
  def ExpandNames(self):
7031
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7032

    
7033
    self.share_locks = _ShareAll()
7034
    self.needed_locks = {
7035
      locking.LEVEL_NODE: [self.op.node_name],
7036
      }
7037

    
7038
  def BuildHooksEnv(self):
7039
    """Build hooks env.
7040

7041
    This runs on the master, the primary and all the secondaries.
7042

7043
    """
7044
    return {
7045
      "NODE_NAME": self.op.node_name,
7046
      }
7047

    
7048
  def BuildHooksNodes(self):
7049
    """Build hooks nodes.
7050

7051
    """
7052
    nl = [self.cfg.GetMasterNode()]
7053
    return (nl, nl)
7054

    
7055
  def CheckPrereq(self):
7056
    pass
7057

    
7058
  def Exec(self, feedback_fn):
7059
    # Prepare jobs for migration instances
7060
    jobs = [
7061
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7062
                                 mode=self.op.mode,
7063
                                 live=self.op.live,
7064
                                 iallocator=self.op.iallocator,
7065
                                 target_node=self.op.target_node)]
7066
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7067
      ]
7068

    
7069
    # TODO: Run iallocator in this opcode and pass correct placement options to
7070
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7071
    # running the iallocator and the actual migration, a good consistency model
7072
    # will have to be found.
7073

    
7074
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7075
            frozenset([self.op.node_name]))
7076

    
7077
    return ResultWithJobs(jobs)
7078

    
7079

    
7080
class TLMigrateInstance(Tasklet):
7081
  """Tasklet class for instance migration.
7082

7083
  @type live: boolean
7084
  @ivar live: whether the migration will be done live or non-live;
7085
      this variable is initalized only after CheckPrereq has run
7086
  @type cleanup: boolean
7087
  @ivar cleanup: Wheater we cleanup from a failed migration
7088
  @type iallocator: string
7089
  @ivar iallocator: The iallocator used to determine target_node
7090
  @type target_node: string
7091
  @ivar target_node: If given, the target_node to reallocate the instance to
7092
  @type failover: boolean
7093
  @ivar failover: Whether operation results in failover or migration
7094
  @type fallback: boolean
7095
  @ivar fallback: Whether fallback to failover is allowed if migration not
7096
                  possible
7097
  @type ignore_consistency: boolean
7098
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7099
                            and target node
7100
  @type shutdown_timeout: int
7101
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7102

7103
  """
7104
  def __init__(self, lu, instance_name, cleanup=False,
7105
               failover=False, fallback=False,
7106
               ignore_consistency=False,
7107
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7108
    """Initializes this class.
7109

7110
    """
7111
    Tasklet.__init__(self, lu)
7112

    
7113
    # Parameters
7114
    self.instance_name = instance_name
7115
    self.cleanup = cleanup
7116
    self.live = False # will be overridden later
7117
    self.failover = failover
7118
    self.fallback = fallback
7119
    self.ignore_consistency = ignore_consistency
7120
    self.shutdown_timeout = shutdown_timeout
7121

    
7122
  def CheckPrereq(self):
7123
    """Check prerequisites.
7124

7125
    This checks that the instance is in the cluster.
7126

7127
    """
7128
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7129
    instance = self.cfg.GetInstanceInfo(instance_name)
7130
    assert instance is not None
7131
    self.instance = instance
7132

    
7133
    if (not self.cleanup and not instance.admin_up and not self.failover and
7134
        self.fallback):
7135
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7136
                      " to failover")
7137
      self.failover = True
7138

    
7139
    if instance.disk_template not in constants.DTS_MIRRORED:
7140
      if self.failover:
7141
        text = "failovers"
7142
      else:
7143
        text = "migrations"
7144
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7145
                                 " %s" % (instance.disk_template, text),
7146
                                 errors.ECODE_STATE)
7147

    
7148
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7149
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7150

    
7151
      if self.lu.op.iallocator:
7152
        self._RunAllocator()
7153
      else:
7154
        # We set set self.target_node as it is required by
7155
        # BuildHooksEnv
7156
        self.target_node = self.lu.op.target_node
7157

    
7158
      # self.target_node is already populated, either directly or by the
7159
      # iallocator run
7160
      target_node = self.target_node
7161
      if self.target_node == instance.primary_node:
7162
        raise errors.OpPrereqError("Cannot migrate instance %s"
7163
                                   " to its primary (%s)" %
7164
                                   (instance.name, instance.primary_node))
7165

    
7166
      if len(self.lu.tasklets) == 1:
7167
        # It is safe to release locks only when we're the only tasklet
7168
        # in the LU
7169
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7170
                      keep=[instance.primary_node, self.target_node])
7171

    
7172
    else:
7173
      secondary_nodes = instance.secondary_nodes
7174
      if not secondary_nodes:
7175
        raise errors.ConfigurationError("No secondary node but using"
7176
                                        " %s disk template" %
7177
                                        instance.disk_template)
7178
      target_node = secondary_nodes[0]
7179
      if self.lu.op.iallocator or (self.lu.op.target_node and
7180
                                   self.lu.op.target_node != target_node):
7181
        if self.failover:
7182
          text = "failed over"
7183
        else:
7184
          text = "migrated"
7185
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7186
                                   " be %s to arbitrary nodes"
7187
                                   " (neither an iallocator nor a target"
7188
                                   " node can be passed)" %
7189
                                   (instance.disk_template, text),
7190
                                   errors.ECODE_INVAL)
7191

    
7192
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7193

    
7194
    # check memory requirements on the secondary node
7195
    if not self.cleanup and (not self.failover or instance.admin_up):
7196
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7197
                           instance.name, i_be[constants.BE_MEMORY],
7198
                           instance.hypervisor)
7199
    else:
7200
      self.lu.LogInfo("Not checking memory on the secondary node as"
7201
                      " instance will not be started")
7202

    
7203
    # check bridge existance
7204
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7205

    
7206
    if not self.cleanup:
7207
      _CheckNodeNotDrained(self.lu, target_node)
7208
      if not self.failover:
7209
        result = self.rpc.call_instance_migratable(instance.primary_node,
7210
                                                   instance)
7211
        if result.fail_msg and self.fallback:
7212
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7213
                          " failover")
7214
          self.failover = True
7215
        else:
7216
          result.Raise("Can't migrate, please use failover",
7217
                       prereq=True, ecode=errors.ECODE_STATE)
7218

    
7219
    assert not (self.failover and self.cleanup)
7220

    
7221
    if not self.failover:
7222
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7223
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7224
                                   " parameters are accepted",
7225
                                   errors.ECODE_INVAL)
7226
      if self.lu.op.live is not None:
7227
        if self.lu.op.live:
7228
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7229
        else:
7230
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7231
        # reset the 'live' parameter to None so that repeated
7232
        # invocations of CheckPrereq do not raise an exception
7233
        self.lu.op.live = None
7234
      elif self.lu.op.mode is None:
7235
        # read the default value from the hypervisor
7236
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7237
                                                skip_globals=False)
7238
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7239

    
7240
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7241
    else:
7242
      # Failover is never live
7243
      self.live = False
7244

    
7245
  def _RunAllocator(self):
7246
    """Run the allocator based on input opcode.
7247

7248
    """
7249
    ial = IAllocator(self.cfg, self.rpc,
7250
                     mode=constants.IALLOCATOR_MODE_RELOC,
7251
                     name=self.instance_name,
7252
                     # TODO See why hail breaks with a single node below
7253
                     relocate_from=[self.instance.primary_node,
7254
                                    self.instance.primary_node],
7255
                     )
7256

    
7257
    ial.Run(self.lu.op.iallocator)
7258

    
7259
    if not ial.success:
7260
      raise errors.OpPrereqError("Can't compute nodes using"
7261
                                 " iallocator '%s': %s" %
7262
                                 (self.lu.op.iallocator, ial.info),
7263
                                 errors.ECODE_NORES)
7264
    if len(ial.result) != ial.required_nodes:
7265
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7266
                                 " of nodes (%s), required %s" %
7267
                                 (self.lu.op.iallocator, len(ial.result),
7268
                                  ial.required_nodes), errors.ECODE_FAULT)
7269
    self.target_node = ial.result[0]
7270
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7271
                 self.instance_name, self.lu.op.iallocator,
7272
                 utils.CommaJoin(ial.result))
7273

    
7274
  def _WaitUntilSync(self):
7275
    """Poll with custom rpc for disk sync.
7276

7277
    This uses our own step-based rpc call.
7278

7279
    """
7280
    self.feedback_fn("* wait until resync is done")
7281
    all_done = False
7282
    while not all_done:
7283
      all_done = True
7284
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7285
                                            self.nodes_ip,
7286
                                            self.instance.disks)
7287
      min_percent = 100
7288
      for node, nres in result.items():
7289
        nres.Raise("Cannot resync disks on node %s" % node)
7290
        node_done, node_percent = nres.payload
7291
        all_done = all_done and node_done
7292
        if node_percent is not None:
7293
          min_percent = min(min_percent, node_percent)
7294
      if not all_done:
7295
        if min_percent < 100:
7296
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7297
        time.sleep(2)
7298

    
7299
  def _EnsureSecondary(self, node):
7300
    """Demote a node to secondary.
7301

7302
    """
7303
    self.feedback_fn("* switching node %s to secondary mode" % node)
7304

    
7305
    for dev in self.instance.disks:
7306
      self.cfg.SetDiskID(dev, node)
7307

    
7308
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7309
                                          self.instance.disks)
7310
    result.Raise("Cannot change disk to secondary on node %s" % node)
7311

    
7312
  def _GoStandalone(self):
7313
    """Disconnect from the network.
7314

7315
    """
7316
    self.feedback_fn("* changing into standalone mode")
7317
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7318
                                               self.instance.disks)
7319
    for node, nres in result.items():
7320
      nres.Raise("Cannot disconnect disks node %s" % node)
7321

    
7322
  def _GoReconnect(self, multimaster):
7323
    """Reconnect to the network.
7324

7325
    """
7326
    if multimaster:
7327
      msg = "dual-master"
7328
    else:
7329
      msg = "single-master"
7330
    self.feedback_fn("* changing disks into %s mode" % msg)
7331
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7332
                                           self.instance.disks,
7333
                                           self.instance.name, multimaster)
7334
    for node, nres in result.items():
7335
      nres.Raise("Cannot change disks config on node %s" % node)
7336

    
7337
  def _ExecCleanup(self):
7338
    """Try to cleanup after a failed migration.
7339

7340
    The cleanup is done by:
7341
      - check that the instance is running only on one node
7342
        (and update the config if needed)
7343
      - change disks on its secondary node to secondary
7344
      - wait until disks are fully synchronized
7345
      - disconnect from the network
7346
      - change disks into single-master mode
7347
      - wait again until disks are fully synchronized
7348

7349
    """
7350
    instance = self.instance
7351
    target_node = self.target_node
7352
    source_node = self.source_node
7353

    
7354
    # check running on only one node
7355
    self.feedback_fn("* checking where the instance actually runs"
7356
                     " (if this hangs, the hypervisor might be in"
7357
                     " a bad state)")
7358
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7359
    for node, result in ins_l.items():
7360
      result.Raise("Can't contact node %s" % node)
7361

    
7362
    runningon_source = instance.name in ins_l[source_node].payload
7363
    runningon_target = instance.name in ins_l[target_node].payload
7364

    
7365
    if runningon_source and runningon_target:
7366
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7367
                               " or the hypervisor is confused; you will have"
7368
                               " to ensure manually that it runs only on one"
7369
                               " and restart this operation")
7370

    
7371
    if not (runningon_source or runningon_target):
7372
      raise errors.OpExecError("Instance does not seem to be running at all;"
7373
                               " in this case it's safer to repair by"
7374
                               " running 'gnt-instance stop' to ensure disk"
7375
                               " shutdown, and then restarting it")
7376

    
7377
    if runningon_target:
7378
      # the migration has actually succeeded, we need to update the config
7379
      self.feedback_fn("* instance running on secondary node (%s),"
7380
                       " updating config" % target_node)
7381
      instance.primary_node = target_node
7382
      self.cfg.Update(instance, self.feedback_fn)
7383
      demoted_node = source_node
7384
    else:
7385
      self.feedback_fn("* instance confirmed to be running on its"
7386
                       " primary node (%s)" % source_node)
7387
      demoted_node = target_node
7388

    
7389
    if instance.disk_template in constants.DTS_INT_MIRROR:
7390
      self._EnsureSecondary(demoted_node)
7391
      try:
7392
        self._WaitUntilSync()
7393
      except errors.OpExecError:
7394
        # we ignore here errors, since if the device is standalone, it
7395
        # won't be able to sync
7396
        pass
7397
      self._GoStandalone()
7398
      self._GoReconnect(False)
7399
      self._WaitUntilSync()
7400

    
7401
    self.feedback_fn("* done")
7402

    
7403
  def _RevertDiskStatus(self):
7404
    """Try to revert the disk status after a failed migration.
7405

7406
    """
7407
    target_node = self.target_node
7408
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7409
      return
7410

    
7411
    try:
7412
      self._EnsureSecondary(target_node)
7413
      self._GoStandalone()
7414
      self._GoReconnect(False)
7415
      self._WaitUntilSync()
7416
    except errors.OpExecError, err:
7417
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7418
                         " please try to recover the instance manually;"
7419
                         " error '%s'" % str(err))
7420

    
7421
  def _AbortMigration(self):
7422
    """Call the hypervisor code to abort a started migration.
7423

7424
    """
7425
    instance = self.instance
7426
    target_node = self.target_node
7427
    migration_info = self.migration_info
7428

    
7429
    abort_result = self.rpc.call_finalize_migration(target_node,
7430
                                                    instance,
7431
                                                    migration_info,
7432
                                                    False)
7433
    abort_msg = abort_result.fail_msg
7434
    if abort_msg:
7435
      logging.error("Aborting migration failed on target node %s: %s",
7436
                    target_node, abort_msg)
7437
      # Don't raise an exception here, as we stil have to try to revert the
7438
      # disk status, even if this step failed.
7439

    
7440
  def _ExecMigration(self):
7441
    """Migrate an instance.
7442

7443
    The migrate is done by:
7444
      - change the disks into dual-master mode
7445
      - wait until disks are fully synchronized again
7446
      - migrate the instance
7447
      - change disks on the new secondary node (the old primary) to secondary
7448
      - wait until disks are fully synchronized
7449
      - change disks into single-master mode
7450

7451
    """
7452
    instance = self.instance
7453
    target_node = self.target_node
7454
    source_node = self.source_node
7455

    
7456
    # Check for hypervisor version mismatch and warn the user.
7457
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7458
                                       None, self.instance.hypervisor)
7459
    src_info = nodeinfo[source_node]
7460
    dst_info = nodeinfo[target_node]
7461

    
7462
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7463
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7464
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7465
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7466
      if src_version != dst_version:
7467
        self.feedback_fn("* warning: hypervisor version mismatch between"
7468
                         " source (%s) and target (%s) node" %
7469
                         (src_version, dst_version))
7470

    
7471
    self.feedback_fn("* checking disk consistency between source and target")
7472
    for dev in instance.disks:
7473
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7474
        raise errors.OpExecError("Disk %s is degraded or not fully"
7475
                                 " synchronized on target node,"
7476
                                 " aborting migration" % dev.iv_name)
7477

    
7478
    # First get the migration information from the remote node
7479
    result = self.rpc.call_migration_info(source_node, instance)
7480
    msg = result.fail_msg
7481
    if msg:
7482
      log_err = ("Failed fetching source migration information from %s: %s" %
7483
                 (source_node, msg))
7484
      logging.error(log_err)
7485
      raise errors.OpExecError(log_err)
7486

    
7487
    self.migration_info = migration_info = result.payload
7488

    
7489
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7490
      # Then switch the disks to master/master mode
7491
      self._EnsureSecondary(target_node)
7492
      self._GoStandalone()
7493
      self._GoReconnect(True)
7494
      self._WaitUntilSync()
7495

    
7496
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7497
    result = self.rpc.call_accept_instance(target_node,
7498
                                           instance,
7499
                                           migration_info,
7500
                                           self.nodes_ip[target_node])
7501

    
7502
    msg = result.fail_msg
7503
    if msg:
7504
      logging.error("Instance pre-migration failed, trying to revert"
7505
                    " disk status: %s", msg)
7506
      self.feedback_fn("Pre-migration failed, aborting")
7507
      self._AbortMigration()
7508
      self._RevertDiskStatus()
7509
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7510
                               (instance.name, msg))
7511

    
7512
    self.feedback_fn("* migrating instance to %s" % target_node)
7513
    result = self.rpc.call_instance_migrate(source_node, instance,
7514
                                            self.nodes_ip[target_node],
7515
                                            self.live)
7516
    msg = result.fail_msg
7517
    if msg:
7518
      logging.error("Instance migration failed, trying to revert"
7519
                    " disk status: %s", msg)
7520
      self.feedback_fn("Migration failed, aborting")
7521
      self._AbortMigration()
7522
      self._RevertDiskStatus()
7523
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7524
                               (instance.name, msg))
7525

    
7526
    instance.primary_node = target_node
7527
    # distribute new instance config to the other nodes
7528
    self.cfg.Update(instance, self.feedback_fn)
7529

    
7530
    result = self.rpc.call_finalize_migration(target_node,
7531
                                              instance,
7532
                                              migration_info,
7533
                                              True)
7534
    msg = result.fail_msg
7535
    if msg:
7536
      logging.error("Instance migration succeeded, but finalization failed:"
7537
                    " %s", msg)
7538
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7539
                               msg)
7540

    
7541
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7542
      self._EnsureSecondary(source_node)
7543
      self._WaitUntilSync()
7544
      self._GoStandalone()
7545
      self._GoReconnect(False)
7546
      self._WaitUntilSync()
7547

    
7548
    self.feedback_fn("* done")
7549

    
7550
  def _ExecFailover(self):
7551
    """Failover an instance.
7552

7553
    The failover is done by shutting it down on its present node and
7554
    starting it on the secondary.
7555

7556
    """
7557
    instance = self.instance
7558
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7559

    
7560
    source_node = instance.primary_node
7561
    target_node = self.target_node
7562

    
7563
    if instance.admin_up:
7564
      self.feedback_fn("* checking disk consistency between source and target")
7565
      for dev in instance.disks:
7566
        # for drbd, these are drbd over lvm
7567
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7568
          if primary_node.offline:
7569
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7570
                             " target node %s" %
7571
                             (primary_node.name, dev.iv_name, target_node))
7572
          elif not self.ignore_consistency:
7573
            raise errors.OpExecError("Disk %s is degraded on target node,"
7574
                                     " aborting failover" % dev.iv_name)
7575
    else:
7576
      self.feedback_fn("* not checking disk consistency as instance is not"
7577
                       " running")
7578

    
7579
    self.feedback_fn("* shutting down instance on source node")
7580
    logging.info("Shutting down instance %s on node %s",
7581
                 instance.name, source_node)
7582

    
7583
    result = self.rpc.call_instance_shutdown(source_node, instance,
7584
                                             self.shutdown_timeout)
7585
    msg = result.fail_msg
7586
    if msg:
7587
      if self.ignore_consistency or primary_node.offline:
7588
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7589
                           " proceeding anyway; please make sure node"
7590
                           " %s is down; error details: %s",
7591
                           instance.name, source_node, source_node, msg)
7592
      else:
7593
        raise errors.OpExecError("Could not shutdown instance %s on"
7594
                                 " node %s: %s" %
7595
                                 (instance.name, source_node, msg))
7596

    
7597
    self.feedback_fn("* deactivating the instance's disks on source node")
7598
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7599
      raise errors.OpExecError("Can't shut down the instance's disks")
7600

    
7601
    instance.primary_node = target_node
7602
    # distribute new instance config to the other nodes
7603
    self.cfg.Update(instance, self.feedback_fn)
7604

    
7605
    # Only start the instance if it's marked as up
7606
    if instance.admin_up:
7607
      self.feedback_fn("* activating the instance's disks on target node %s" %
7608
                       target_node)
7609
      logging.info("Starting instance %s on node %s",
7610
                   instance.name, target_node)
7611

    
7612
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7613
                                           ignore_secondaries=True)
7614
      if not disks_ok:
7615
        _ShutdownInstanceDisks(self.lu, instance)
7616
        raise errors.OpExecError("Can't activate the instance's disks")
7617

    
7618
      self.feedback_fn("* starting the instance on the target node %s" %
7619
                       target_node)
7620
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7621
                                            False)
7622
      msg = result.fail_msg
7623
      if msg:
7624
        _ShutdownInstanceDisks(self.lu, instance)
7625
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7626
                                 (instance.name, target_node, msg))
7627

    
7628
  def Exec(self, feedback_fn):
7629
    """Perform the migration.
7630

7631
    """
7632
    self.feedback_fn = feedback_fn
7633
    self.source_node = self.instance.primary_node
7634

    
7635
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7636
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7637
      self.target_node = self.instance.secondary_nodes[0]
7638
      # Otherwise self.target_node has been populated either
7639
      # directly, or through an iallocator.
7640

    
7641
    self.all_nodes = [self.source_node, self.target_node]
7642
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7643
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7644

    
7645
    if self.failover:
7646
      feedback_fn("Failover instance %s" % self.instance.name)
7647
      self._ExecFailover()
7648
    else:
7649
      feedback_fn("Migrating instance %s" % self.instance.name)
7650

    
7651
      if self.cleanup:
7652
        return self._ExecCleanup()
7653
      else:
7654
        return self._ExecMigration()
7655

    
7656

    
7657
def _CreateBlockDev(lu, node, instance, device, force_create,
7658
                    info, force_open):
7659
  """Create a tree of block devices on a given node.
7660

7661
  If this device type has to be created on secondaries, create it and
7662
  all its children.
7663

7664
  If not, just recurse to children keeping the same 'force' value.
7665

7666
  @param lu: the lu on whose behalf we execute
7667
  @param node: the node on which to create the device
7668
  @type instance: L{objects.Instance}
7669
  @param instance: the instance which owns the device
7670
  @type device: L{objects.Disk}
7671
  @param device: the device to create
7672
  @type force_create: boolean
7673
  @param force_create: whether to force creation of this device; this
7674
      will be change to True whenever we find a device which has
7675
      CreateOnSecondary() attribute
7676
  @param info: the extra 'metadata' we should attach to the device
7677
      (this will be represented as a LVM tag)
7678
  @type force_open: boolean
7679
  @param force_open: this parameter will be passes to the
7680
      L{backend.BlockdevCreate} function where it specifies
7681
      whether we run on primary or not, and it affects both
7682
      the child assembly and the device own Open() execution
7683

7684
  """
7685
  if device.CreateOnSecondary():
7686
    force_create = True
7687

    
7688
  if device.children:
7689
    for child in device.children:
7690
      _CreateBlockDev(lu, node, instance, child, force_create,
7691
                      info, force_open)
7692

    
7693
  if not force_create:
7694
    return
7695

    
7696
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7697

    
7698

    
7699
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7700
  """Create a single block device on a given node.
7701

7702
  This will not recurse over children of the device, so they must be
7703
  created in advance.
7704

7705
  @param lu: the lu on whose behalf we execute
7706
  @param node: the node on which to create the device
7707
  @type instance: L{objects.Instance}
7708
  @param instance: the instance which owns the device
7709
  @type device: L{objects.Disk}
7710
  @param device: the device to create
7711
  @param info: the extra 'metadata' we should attach to the device
7712
      (this will be represented as a LVM tag)
7713
  @type force_open: boolean
7714
  @param force_open: this parameter will be passes to the
7715
      L{backend.BlockdevCreate} function where it specifies
7716
      whether we run on primary or not, and it affects both
7717
      the child assembly and the device own Open() execution
7718

7719
  """
7720
  lu.cfg.SetDiskID(device, node)
7721
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7722
                                       instance.name, force_open, info)
7723
  result.Raise("Can't create block device %s on"
7724
               " node %s for instance %s" % (device, node, instance.name))
7725
  if device.physical_id is None:
7726
    device.physical_id = result.payload
7727

    
7728

    
7729
def _GenerateUniqueNames(lu, exts):
7730
  """Generate a suitable LV name.
7731

7732
  This will generate a logical volume name for the given instance.
7733

7734
  """
7735
  results = []
7736
  for val in exts:
7737
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7738
    results.append("%s%s" % (new_id, val))
7739
  return results
7740

    
7741

    
7742
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7743
                         iv_name, p_minor, s_minor):
7744
  """Generate a drbd8 device complete with its children.
7745

7746
  """
7747
  assert len(vgnames) == len(names) == 2
7748
  port = lu.cfg.AllocatePort()
7749
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7750
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7751
                          logical_id=(vgnames[0], names[0]))
7752
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7753
                          logical_id=(vgnames[1], names[1]))
7754
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7755
                          logical_id=(primary, secondary, port,
7756
                                      p_minor, s_minor,
7757
                                      shared_secret),
7758
                          children=[dev_data, dev_meta],
7759
                          iv_name=iv_name)
7760
  return drbd_dev
7761

    
7762

    
7763
def _GenerateDiskTemplate(lu, template_name,
7764
                          instance_name, primary_node,
7765
                          secondary_nodes, disk_info,
7766
                          file_storage_dir, file_driver,
7767
                          base_index, feedback_fn):
7768
  """Generate the entire disk layout for a given template type.
7769

7770
  """
7771
  #TODO: compute space requirements
7772

    
7773
  vgname = lu.cfg.GetVGName()
7774
  disk_count = len(disk_info)
7775
  disks = []
7776
  if template_name == constants.DT_DISKLESS:
7777
    pass
7778
  elif template_name == constants.DT_PLAIN:
7779
    if len(secondary_nodes) != 0:
7780
      raise errors.ProgrammerError("Wrong template configuration")
7781

    
7782
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7783
                                      for i in range(disk_count)])
7784
    for idx, disk in enumerate(disk_info):
7785
      disk_index = idx + base_index
7786
      vg = disk.get(constants.IDISK_VG, vgname)
7787
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7788
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7789
                              size=disk[constants.IDISK_SIZE],
7790
                              logical_id=(vg, names[idx]),
7791
                              iv_name="disk/%d" % disk_index,
7792
                              mode=disk[constants.IDISK_MODE])
7793
      disks.append(disk_dev)
7794
  elif template_name == constants.DT_DRBD8:
7795
    if len(secondary_nodes) != 1:
7796
      raise errors.ProgrammerError("Wrong template configuration")
7797
    remote_node = secondary_nodes[0]
7798
    minors = lu.cfg.AllocateDRBDMinor(
7799
      [primary_node, remote_node] * len(disk_info), instance_name)
7800

    
7801
    names = []
7802
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7803
                                               for i in range(disk_count)]):
7804
      names.append(lv_prefix + "_data")
7805
      names.append(lv_prefix + "_meta")
7806
    for idx, disk in enumerate(disk_info):
7807
      disk_index = idx + base_index
7808
      data_vg = disk.get(constants.IDISK_VG, vgname)
7809
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7810
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7811
                                      disk[constants.IDISK_SIZE],
7812
                                      [data_vg, meta_vg],
7813
                                      names[idx * 2:idx * 2 + 2],
7814
                                      "disk/%d" % disk_index,
7815
                                      minors[idx * 2], minors[idx * 2 + 1])
7816
      disk_dev.mode = disk[constants.IDISK_MODE]
7817
      disks.append(disk_dev)
7818
  elif template_name == constants.DT_FILE:
7819
    if len(secondary_nodes) != 0:
7820
      raise errors.ProgrammerError("Wrong template configuration")
7821

    
7822
    opcodes.RequireFileStorage()
7823

    
7824
    for idx, disk in enumerate(disk_info):
7825
      disk_index = idx + base_index
7826
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7827
                              size=disk[constants.IDISK_SIZE],
7828
                              iv_name="disk/%d" % disk_index,
7829
                              logical_id=(file_driver,
7830
                                          "%s/disk%d" % (file_storage_dir,
7831
                                                         disk_index)),
7832
                              mode=disk[constants.IDISK_MODE])
7833
      disks.append(disk_dev)
7834
  elif template_name == constants.DT_SHARED_FILE:
7835
    if len(secondary_nodes) != 0:
7836
      raise errors.ProgrammerError("Wrong template configuration")
7837

    
7838
    opcodes.RequireSharedFileStorage()
7839

    
7840
    for idx, disk in enumerate(disk_info):
7841
      disk_index = idx + base_index
7842
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7843
                              size=disk[constants.IDISK_SIZE],
7844
                              iv_name="disk/%d" % disk_index,
7845
                              logical_id=(file_driver,
7846
                                          "%s/disk%d" % (file_storage_dir,
7847
                                                         disk_index)),
7848
                              mode=disk[constants.IDISK_MODE])
7849
      disks.append(disk_dev)
7850
  elif template_name == constants.DT_BLOCK:
7851
    if len(secondary_nodes) != 0:
7852
      raise errors.ProgrammerError("Wrong template configuration")
7853

    
7854
    for idx, disk in enumerate(disk_info):
7855
      disk_index = idx + base_index
7856
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7857
                              size=disk[constants.IDISK_SIZE],
7858
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7859
                                          disk[constants.IDISK_ADOPT]),
7860
                              iv_name="disk/%d" % disk_index,
7861
                              mode=disk[constants.IDISK_MODE])
7862
      disks.append(disk_dev)
7863

    
7864
  else:
7865
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7866
  return disks
7867

    
7868

    
7869
def _GetInstanceInfoText(instance):
7870
  """Compute that text that should be added to the disk's metadata.
7871

7872
  """
7873
  return "originstname+%s" % instance.name
7874

    
7875

    
7876
def _CalcEta(time_taken, written, total_size):
7877
  """Calculates the ETA based on size written and total size.
7878

7879
  @param time_taken: The time taken so far
7880
  @param written: amount written so far
7881
  @param total_size: The total size of data to be written
7882
  @return: The remaining time in seconds
7883

7884
  """
7885
  avg_time = time_taken / float(written)
7886
  return (total_size - written) * avg_time
7887

    
7888

    
7889
def _WipeDisks(lu, instance):
7890
  """Wipes instance disks.
7891

7892
  @type lu: L{LogicalUnit}
7893
  @param lu: the logical unit on whose behalf we execute
7894
  @type instance: L{objects.Instance}
7895
  @param instance: the instance whose disks we should create
7896
  @return: the success of the wipe
7897

7898
  """
7899
  node = instance.primary_node
7900

    
7901
  for device in instance.disks:
7902
    lu.cfg.SetDiskID(device, node)
7903

    
7904
  logging.info("Pause sync of instance %s disks", instance.name)
7905
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7906

    
7907
  for idx, success in enumerate(result.payload):
7908
    if not success:
7909
      logging.warn("pause-sync of instance %s for disks %d failed",
7910
                   instance.name, idx)
7911

    
7912
  try:
7913
    for idx, device in enumerate(instance.disks):
7914
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7915
      # MAX_WIPE_CHUNK at max
7916
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7917
                            constants.MIN_WIPE_CHUNK_PERCENT)
7918
      # we _must_ make this an int, otherwise rounding errors will
7919
      # occur
7920
      wipe_chunk_size = int(wipe_chunk_size)
7921

    
7922
      lu.LogInfo("* Wiping disk %d", idx)
7923
      logging.info("Wiping disk %d for instance %s, node %s using"
7924
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7925

    
7926
      offset = 0
7927
      size = device.size
7928
      last_output = 0
7929
      start_time = time.time()
7930

    
7931
      while offset < size:
7932
        wipe_size = min(wipe_chunk_size, size - offset)
7933
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7934
                      idx, offset, wipe_size)
7935
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7936
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7937
                     (idx, offset, wipe_size))
7938
        now = time.time()
7939
        offset += wipe_size
7940
        if now - last_output >= 60:
7941
          eta = _CalcEta(now - start_time, offset, size)
7942
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7943
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7944
          last_output = now
7945
  finally:
7946
    logging.info("Resume sync of instance %s disks", instance.name)
7947

    
7948
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7949

    
7950
    for idx, success in enumerate(result.payload):
7951
      if not success:
7952
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7953
                      " look at the status and troubleshoot the issue", idx)
7954
        logging.warn("resume-sync of instance %s for disks %d failed",
7955
                     instance.name, idx)
7956

    
7957

    
7958
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7959
  """Create all disks for an instance.
7960

7961
  This abstracts away some work from AddInstance.
7962

7963
  @type lu: L{LogicalUnit}
7964
  @param lu: the logical unit on whose behalf we execute
7965
  @type instance: L{objects.Instance}
7966
  @param instance: the instance whose disks we should create
7967
  @type to_skip: list
7968
  @param to_skip: list of indices to skip
7969
  @type target_node: string
7970
  @param target_node: if passed, overrides the target node for creation
7971
  @rtype: boolean
7972
  @return: the success of the creation
7973

7974
  """
7975
  info = _GetInstanceInfoText(instance)
7976
  if target_node is None:
7977
    pnode = instance.primary_node
7978
    all_nodes = instance.all_nodes
7979
  else:
7980
    pnode = target_node
7981
    all_nodes = [pnode]
7982

    
7983
  if instance.disk_template in constants.DTS_FILEBASED:
7984
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7985
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7986

    
7987
    result.Raise("Failed to create directory '%s' on"
7988
                 " node %s" % (file_storage_dir, pnode))
7989

    
7990
  # Note: this needs to be kept in sync with adding of disks in
7991
  # LUInstanceSetParams
7992
  for idx, device in enumerate(instance.disks):
7993
    if to_skip and idx in to_skip:
7994
      continue
7995
    logging.info("Creating volume %s for instance %s",
7996
                 device.iv_name, instance.name)
7997
    #HARDCODE
7998
    for node in all_nodes:
7999
      f_create = node == pnode
8000
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8001

    
8002

    
8003
def _RemoveDisks(lu, instance, target_node=None):
8004
  """Remove all disks for an instance.
8005

8006
  This abstracts away some work from `AddInstance()` and
8007
  `RemoveInstance()`. Note that in case some of the devices couldn't
8008
  be removed, the removal will continue with the other ones (compare
8009
  with `_CreateDisks()`).
8010

8011
  @type lu: L{LogicalUnit}
8012
  @param lu: the logical unit on whose behalf we execute
8013
  @type instance: L{objects.Instance}
8014
  @param instance: the instance whose disks we should remove
8015
  @type target_node: string
8016
  @param target_node: used to override the node on which to remove the disks
8017
  @rtype: boolean
8018
  @return: the success of the removal
8019

8020
  """
8021
  logging.info("Removing block devices for instance %s", instance.name)
8022

    
8023
  all_result = True
8024
  for device in instance.disks:
8025
    if target_node:
8026
      edata = [(target_node, device)]
8027
    else:
8028
      edata = device.ComputeNodeTree(instance.primary_node)
8029
    for node, disk in edata:
8030
      lu.cfg.SetDiskID(disk, node)
8031
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8032
      if msg:
8033
        lu.LogWarning("Could not remove block device %s on node %s,"
8034
                      " continuing anyway: %s", device.iv_name, node, msg)
8035
        all_result = False
8036

    
8037
    # if this is a DRBD disk, return its port to the pool
8038
    if device.dev_type in constants.LDS_DRBD:
8039
      tcp_port = device.logical_id[2]
8040
      lu.cfg.AddTcpUdpPort(tcp_port)
8041

    
8042
  if instance.disk_template == constants.DT_FILE:
8043
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8044
    if target_node:
8045
      tgt = target_node
8046
    else:
8047
      tgt = instance.primary_node
8048
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8049
    if result.fail_msg:
8050
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8051
                    file_storage_dir, instance.primary_node, result.fail_msg)
8052
      all_result = False
8053

    
8054
  return all_result
8055

    
8056

    
8057
def _ComputeDiskSizePerVG(disk_template, disks):
8058
  """Compute disk size requirements in the volume group
8059

8060
  """
8061
  def _compute(disks, payload):
8062
    """Universal algorithm.
8063

8064
    """
8065
    vgs = {}
8066
    for disk in disks:
8067
      vgs[disk[constants.IDISK_VG]] = \
8068
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8069

    
8070
    return vgs
8071

    
8072
  # Required free disk space as a function of disk and swap space
8073
  req_size_dict = {
8074
    constants.DT_DISKLESS: {},
8075
    constants.DT_PLAIN: _compute(disks, 0),
8076
    # 128 MB are added for drbd metadata for each disk
8077
    constants.DT_DRBD8: _compute(disks, 128),
8078
    constants.DT_FILE: {},
8079
    constants.DT_SHARED_FILE: {},
8080
  }
8081

    
8082
  if disk_template not in req_size_dict:
8083
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8084
                                 " is unknown" % disk_template)
8085

    
8086
  return req_size_dict[disk_template]
8087

    
8088

    
8089
def _ComputeDiskSize(disk_template, disks):
8090
  """Compute disk size requirements in the volume group
8091

8092
  """
8093
  # Required free disk space as a function of disk and swap space
8094
  req_size_dict = {
8095
    constants.DT_DISKLESS: None,
8096
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8097
    # 128 MB are added for drbd metadata for each disk
8098
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8099
    constants.DT_FILE: None,
8100
    constants.DT_SHARED_FILE: 0,
8101
    constants.DT_BLOCK: 0,
8102
  }
8103

    
8104
  if disk_template not in req_size_dict:
8105
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8106
                                 " is unknown" % disk_template)
8107

    
8108
  return req_size_dict[disk_template]
8109

    
8110

    
8111
def _FilterVmNodes(lu, nodenames):
8112
  """Filters out non-vm_capable nodes from a list.
8113

8114
  @type lu: L{LogicalUnit}
8115
  @param lu: the logical unit for which we check
8116
  @type nodenames: list
8117
  @param nodenames: the list of nodes on which we should check
8118
  @rtype: list
8119
  @return: the list of vm-capable nodes
8120

8121
  """
8122
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8123
  return [name for name in nodenames if name not in vm_nodes]
8124

    
8125

    
8126
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8127
  """Hypervisor parameter validation.
8128

8129
  This function abstract the hypervisor parameter validation to be
8130
  used in both instance create and instance modify.
8131

8132
  @type lu: L{LogicalUnit}
8133
  @param lu: the logical unit for which we check
8134
  @type nodenames: list
8135
  @param nodenames: the list of nodes on which we should check
8136
  @type hvname: string
8137
  @param hvname: the name of the hypervisor we should use
8138
  @type hvparams: dict
8139
  @param hvparams: the parameters which we need to check
8140
  @raise errors.OpPrereqError: if the parameters are not valid
8141

8142
  """
8143
  nodenames = _FilterVmNodes(lu, nodenames)
8144
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8145
                                                  hvname,
8146
                                                  hvparams)
8147
  for node in nodenames:
8148
    info = hvinfo[node]
8149
    if info.offline:
8150
      continue
8151
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8152

    
8153

    
8154
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8155
  """OS parameters validation.
8156

8157
  @type lu: L{LogicalUnit}
8158
  @param lu: the logical unit for which we check
8159
  @type required: boolean
8160
  @param required: whether the validation should fail if the OS is not
8161
      found
8162
  @type nodenames: list
8163
  @param nodenames: the list of nodes on which we should check
8164
  @type osname: string
8165
  @param osname: the name of the hypervisor we should use
8166
  @type osparams: dict
8167
  @param osparams: the parameters which we need to check
8168
  @raise errors.OpPrereqError: if the parameters are not valid
8169

8170
  """
8171
  nodenames = _FilterVmNodes(lu, nodenames)
8172
  result = lu.rpc.call_os_validate(required, nodenames, osname,
8173
                                   [constants.OS_VALIDATE_PARAMETERS],
8174
                                   osparams)
8175
  for node, nres in result.items():
8176
    # we don't check for offline cases since this should be run only
8177
    # against the master node and/or an instance's nodes
8178
    nres.Raise("OS Parameters validation failed on node %s" % node)
8179
    if not nres.payload:
8180
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8181
                 osname, node)
8182

    
8183

    
8184
class LUInstanceCreate(LogicalUnit):
8185
  """Create an instance.
8186

8187
  """
8188
  HPATH = "instance-add"
8189
  HTYPE = constants.HTYPE_INSTANCE
8190
  REQ_BGL = False
8191

    
8192
  def CheckArguments(self):
8193
    """Check arguments.
8194

8195
    """
8196
    # do not require name_check to ease forward/backward compatibility
8197
    # for tools
8198
    if self.op.no_install and self.op.start:
8199
      self.LogInfo("No-installation mode selected, disabling startup")
8200
      self.op.start = False
8201
    # validate/normalize the instance name
8202
    self.op.instance_name = \
8203
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8204

    
8205
    if self.op.ip_check and not self.op.name_check:
8206
      # TODO: make the ip check more flexible and not depend on the name check
8207
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8208
                                 " check", errors.ECODE_INVAL)
8209

    
8210
    # check nics' parameter names
8211
    for nic in self.op.nics:
8212
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8213

    
8214
    # check disks. parameter names and consistent adopt/no-adopt strategy
8215
    has_adopt = has_no_adopt = False
8216
    for disk in self.op.disks:
8217
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8218
      if constants.IDISK_ADOPT in disk:
8219
        has_adopt = True
8220
      else:
8221
        has_no_adopt = True
8222
    if has_adopt and has_no_adopt:
8223
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8224
                                 errors.ECODE_INVAL)
8225
    if has_adopt:
8226
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8227
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8228
                                   " '%s' disk template" %
8229
                                   self.op.disk_template,
8230
                                   errors.ECODE_INVAL)
8231
      if self.op.iallocator is not None:
8232
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8233
                                   " iallocator script", errors.ECODE_INVAL)
8234
      if self.op.mode == constants.INSTANCE_IMPORT:
8235
        raise errors.OpPrereqError("Disk adoption not allowed for"
8236
                                   " instance import", errors.ECODE_INVAL)
8237
    else:
8238
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8239
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8240
                                   " but no 'adopt' parameter given" %
8241
                                   self.op.disk_template,
8242
                                   errors.ECODE_INVAL)
8243

    
8244
    self.adopt_disks = has_adopt
8245

    
8246
    # instance name verification
8247
    if self.op.name_check:
8248
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8249
      self.op.instance_name = self.hostname1.name
8250
      # used in CheckPrereq for ip ping check
8251
      self.check_ip = self.hostname1.ip
8252
    else:
8253
      self.check_ip = None
8254

    
8255
    # file storage checks
8256
    if (self.op.file_driver and
8257
        not self.op.file_driver in constants.FILE_DRIVER):
8258
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8259
                                 self.op.file_driver, errors.ECODE_INVAL)
8260

    
8261
    if self.op.disk_template == constants.DT_FILE:
8262
      opcodes.RequireFileStorage()
8263
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8264
      opcodes.RequireSharedFileStorage()
8265

    
8266
    ### Node/iallocator related checks
8267
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8268

    
8269
    if self.op.pnode is not None:
8270
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8271
        if self.op.snode is None:
8272
          raise errors.OpPrereqError("The networked disk templates need"
8273
                                     " a mirror node", errors.ECODE_INVAL)
8274
      elif self.op.snode:
8275
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8276
                        " template")
8277
        self.op.snode = None
8278

    
8279
    self._cds = _GetClusterDomainSecret()
8280

    
8281
    if self.op.mode == constants.INSTANCE_IMPORT:
8282
      # On import force_variant must be True, because if we forced it at
8283
      # initial install, our only chance when importing it back is that it
8284
      # works again!
8285
      self.op.force_variant = True
8286

    
8287
      if self.op.no_install:
8288
        self.LogInfo("No-installation mode has no effect during import")
8289

    
8290
    elif self.op.mode == constants.INSTANCE_CREATE:
8291
      if self.op.os_type is None:
8292
        raise errors.OpPrereqError("No guest OS specified",
8293
                                   errors.ECODE_INVAL)
8294
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8295
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8296
                                   " installation" % self.op.os_type,
8297
                                   errors.ECODE_STATE)
8298
      if self.op.disk_template is None:
8299
        raise errors.OpPrereqError("No disk template specified",
8300
                                   errors.ECODE_INVAL)
8301

    
8302
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8303
      # Check handshake to ensure both clusters have the same domain secret
8304
      src_handshake = self.op.source_handshake
8305
      if not src_handshake:
8306
        raise errors.OpPrereqError("Missing source handshake",
8307
                                   errors.ECODE_INVAL)
8308

    
8309
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8310
                                                           src_handshake)
8311
      if errmsg:
8312
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8313
                                   errors.ECODE_INVAL)
8314

    
8315
      # Load and check source CA
8316
      self.source_x509_ca_pem = self.op.source_x509_ca
8317
      if not self.source_x509_ca_pem:
8318
        raise errors.OpPrereqError("Missing source X509 CA",
8319
                                   errors.ECODE_INVAL)
8320

    
8321
      try:
8322
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8323
                                                    self._cds)
8324
      except OpenSSL.crypto.Error, err:
8325
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8326
                                   (err, ), errors.ECODE_INVAL)
8327

    
8328
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8329
      if errcode is not None:
8330
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8331
                                   errors.ECODE_INVAL)
8332

    
8333
      self.source_x509_ca = cert
8334

    
8335
      src_instance_name = self.op.source_instance_name
8336
      if not src_instance_name:
8337
        raise errors.OpPrereqError("Missing source instance name",
8338
                                   errors.ECODE_INVAL)
8339

    
8340
      self.source_instance_name = \
8341
          netutils.GetHostname(name=src_instance_name).name
8342

    
8343
    else:
8344
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8345
                                 self.op.mode, errors.ECODE_INVAL)
8346

    
8347
  def ExpandNames(self):
8348
    """ExpandNames for CreateInstance.
8349

8350
    Figure out the right locks for instance creation.
8351

8352
    """
8353
    self.needed_locks = {}
8354

    
8355
    instance_name = self.op.instance_name
8356
    # this is just a preventive check, but someone might still add this
8357
    # instance in the meantime, and creation will fail at lock-add time
8358
    if instance_name in self.cfg.GetInstanceList():
8359
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8360
                                 instance_name, errors.ECODE_EXISTS)
8361

    
8362
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8363

    
8364
    if self.op.iallocator:
8365
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8366
    else:
8367
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8368
      nodelist = [self.op.pnode]
8369
      if self.op.snode is not None:
8370
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8371
        nodelist.append(self.op.snode)
8372
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8373

    
8374
    # in case of import lock the source node too
8375
    if self.op.mode == constants.INSTANCE_IMPORT:
8376
      src_node = self.op.src_node
8377
      src_path = self.op.src_path
8378

    
8379
      if src_path is None:
8380
        self.op.src_path = src_path = self.op.instance_name
8381

    
8382
      if src_node is None:
8383
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8384
        self.op.src_node = None
8385
        if os.path.isabs(src_path):
8386
          raise errors.OpPrereqError("Importing an instance from a path"
8387
                                     " requires a source node option",
8388
                                     errors.ECODE_INVAL)
8389
      else:
8390
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8391
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8392
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8393
        if not os.path.isabs(src_path):
8394
          self.op.src_path = src_path = \
8395
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8396

    
8397
  def _RunAllocator(self):
8398
    """Run the allocator based on input opcode.
8399

8400
    """
8401
    nics = [n.ToDict() for n in self.nics]
8402
    ial = IAllocator(self.cfg, self.rpc,
8403
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8404
                     name=self.op.instance_name,
8405
                     disk_template=self.op.disk_template,
8406
                     tags=self.op.tags,
8407
                     os=self.op.os_type,
8408
                     vcpus=self.be_full[constants.BE_VCPUS],
8409
                     memory=self.be_full[constants.BE_MEMORY],
8410
                     disks=self.disks,
8411
                     nics=nics,
8412
                     hypervisor=self.op.hypervisor,
8413
                     )
8414

    
8415
    ial.Run(self.op.iallocator)
8416

    
8417
    if not ial.success:
8418
      raise errors.OpPrereqError("Can't compute nodes using"
8419
                                 " iallocator '%s': %s" %
8420
                                 (self.op.iallocator, ial.info),
8421
                                 errors.ECODE_NORES)
8422
    if len(ial.result) != ial.required_nodes:
8423
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8424
                                 " of nodes (%s), required %s" %
8425
                                 (self.op.iallocator, len(ial.result),
8426
                                  ial.required_nodes), errors.ECODE_FAULT)
8427
    self.op.pnode = ial.result[0]
8428
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8429
                 self.op.instance_name, self.op.iallocator,
8430
                 utils.CommaJoin(ial.result))
8431
    if ial.required_nodes == 2:
8432
      self.op.snode = ial.result[1]
8433

    
8434
  def BuildHooksEnv(self):
8435
    """Build hooks env.
8436

8437
    This runs on master, primary and secondary nodes of the instance.
8438

8439
    """
8440
    env = {
8441
      "ADD_MODE": self.op.mode,
8442
      }
8443
    if self.op.mode == constants.INSTANCE_IMPORT:
8444
      env["SRC_NODE"] = self.op.src_node
8445
      env["SRC_PATH"] = self.op.src_path
8446
      env["SRC_IMAGES"] = self.src_images
8447

    
8448
    env.update(_BuildInstanceHookEnv(
8449
      name=self.op.instance_name,
8450
      primary_node=self.op.pnode,
8451
      secondary_nodes=self.secondaries,
8452
      status=self.op.start,
8453
      os_type=self.op.os_type,
8454
      memory=self.be_full[constants.BE_MEMORY],
8455
      vcpus=self.be_full[constants.BE_VCPUS],
8456
      nics=_NICListToTuple(self, self.nics),
8457
      disk_template=self.op.disk_template,
8458
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8459
             for d in self.disks],
8460
      bep=self.be_full,
8461
      hvp=self.hv_full,
8462
      hypervisor_name=self.op.hypervisor,
8463
      tags=self.op.tags,
8464
    ))
8465

    
8466
    return env
8467

    
8468
  def BuildHooksNodes(self):
8469
    """Build hooks nodes.
8470

8471
    """
8472
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8473
    return nl, nl
8474

    
8475
  def _ReadExportInfo(self):
8476
    """Reads the export information from disk.
8477

8478
    It will override the opcode source node and path with the actual
8479
    information, if these two were not specified before.
8480

8481
    @return: the export information
8482

8483
    """
8484
    assert self.op.mode == constants.INSTANCE_IMPORT
8485

    
8486
    src_node = self.op.src_node
8487
    src_path = self.op.src_path
8488

    
8489
    if src_node is None:
8490
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8491
      exp_list = self.rpc.call_export_list(locked_nodes)
8492
      found = False
8493
      for node in exp_list:
8494
        if exp_list[node].fail_msg:
8495
          continue
8496
        if src_path in exp_list[node].payload:
8497
          found = True
8498
          self.op.src_node = src_node = node
8499
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8500
                                                       src_path)
8501
          break
8502
      if not found:
8503
        raise errors.OpPrereqError("No export found for relative path %s" %
8504
                                    src_path, errors.ECODE_INVAL)
8505

    
8506
    _CheckNodeOnline(self, src_node)
8507
    result = self.rpc.call_export_info(src_node, src_path)
8508
    result.Raise("No export or invalid export found in dir %s" % src_path)
8509

    
8510
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8511
    if not export_info.has_section(constants.INISECT_EXP):
8512
      raise errors.ProgrammerError("Corrupted export config",
8513
                                   errors.ECODE_ENVIRON)
8514

    
8515
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8516
    if (int(ei_version) != constants.EXPORT_VERSION):
8517
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8518
                                 (ei_version, constants.EXPORT_VERSION),
8519
                                 errors.ECODE_ENVIRON)
8520
    return export_info
8521

    
8522
  def _ReadExportParams(self, einfo):
8523
    """Use export parameters as defaults.
8524

8525
    In case the opcode doesn't specify (as in override) some instance
8526
    parameters, then try to use them from the export information, if
8527
    that declares them.
8528

8529
    """
8530
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8531

    
8532
    if self.op.disk_template is None:
8533
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8534
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8535
                                          "disk_template")
8536
      else:
8537
        raise errors.OpPrereqError("No disk template specified and the export"
8538
                                   " is missing the disk_template information",
8539
                                   errors.ECODE_INVAL)
8540

    
8541
    if not self.op.disks:
8542
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8543
        disks = []
8544
        # TODO: import the disk iv_name too
8545
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8546
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8547
          disks.append({constants.IDISK_SIZE: disk_sz})
8548
        self.op.disks = disks
8549
      else:
8550
        raise errors.OpPrereqError("No disk info specified and the export"
8551
                                   " is missing the disk information",
8552
                                   errors.ECODE_INVAL)
8553

    
8554
    if (not self.op.nics and
8555
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8556
      nics = []
8557
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8558
        ndict = {}
8559
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8560
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8561
          ndict[name] = v
8562
        nics.append(ndict)
8563
      self.op.nics = nics
8564

    
8565
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8566
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8567

    
8568
    if (self.op.hypervisor is None and
8569
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8570
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8571

    
8572
    if einfo.has_section(constants.INISECT_HYP):
8573
      # use the export parameters but do not override the ones
8574
      # specified by the user
8575
      for name, value in einfo.items(constants.INISECT_HYP):
8576
        if name not in self.op.hvparams:
8577
          self.op.hvparams[name] = value
8578

    
8579
    if einfo.has_section(constants.INISECT_BEP):
8580
      # use the parameters, without overriding
8581
      for name, value in einfo.items(constants.INISECT_BEP):
8582
        if name not in self.op.beparams:
8583
          self.op.beparams[name] = value
8584
    else:
8585
      # try to read the parameters old style, from the main section
8586
      for name in constants.BES_PARAMETERS:
8587
        if (name not in self.op.beparams and
8588
            einfo.has_option(constants.INISECT_INS, name)):
8589
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8590

    
8591
    if einfo.has_section(constants.INISECT_OSP):
8592
      # use the parameters, without overriding
8593
      for name, value in einfo.items(constants.INISECT_OSP):
8594
        if name not in self.op.osparams:
8595
          self.op.osparams[name] = value
8596

    
8597
  def _RevertToDefaults(self, cluster):
8598
    """Revert the instance parameters to the default values.
8599

8600
    """
8601
    # hvparams
8602
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8603
    for name in self.op.hvparams.keys():
8604
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8605
        del self.op.hvparams[name]
8606
    # beparams
8607
    be_defs = cluster.SimpleFillBE({})
8608
    for name in self.op.beparams.keys():
8609
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8610
        del self.op.beparams[name]
8611
    # nic params
8612
    nic_defs = cluster.SimpleFillNIC({})
8613
    for nic in self.op.nics:
8614
      for name in constants.NICS_PARAMETERS:
8615
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8616
          del nic[name]
8617
    # osparams
8618
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8619
    for name in self.op.osparams.keys():
8620
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8621
        del self.op.osparams[name]
8622

    
8623
  def _CalculateFileStorageDir(self):
8624
    """Calculate final instance file storage dir.
8625

8626
    """
8627
    # file storage dir calculation/check
8628
    self.instance_file_storage_dir = None
8629
    if self.op.disk_template in constants.DTS_FILEBASED:
8630
      # build the full file storage dir path
8631
      joinargs = []
8632

    
8633
      if self.op.disk_template == constants.DT_SHARED_FILE:
8634
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8635
      else:
8636
        get_fsd_fn = self.cfg.GetFileStorageDir
8637

    
8638
      cfg_storagedir = get_fsd_fn()
8639
      if not cfg_storagedir:
8640
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8641
      joinargs.append(cfg_storagedir)
8642

    
8643
      if self.op.file_storage_dir is not None:
8644
        joinargs.append(self.op.file_storage_dir)
8645

    
8646
      joinargs.append(self.op.instance_name)
8647

    
8648
      # pylint: disable=W0142
8649
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8650

    
8651
  def CheckPrereq(self):
8652
    """Check prerequisites.
8653

8654
    """
8655
    self._CalculateFileStorageDir()
8656

    
8657
    if self.op.mode == constants.INSTANCE_IMPORT:
8658
      export_info = self._ReadExportInfo()
8659
      self._ReadExportParams(export_info)
8660

    
8661
    if (not self.cfg.GetVGName() and
8662
        self.op.disk_template not in constants.DTS_NOT_LVM):
8663
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8664
                                 " instances", errors.ECODE_STATE)
8665

    
8666
    if self.op.hypervisor is None:
8667
      self.op.hypervisor = self.cfg.GetHypervisorType()
8668

    
8669
    cluster = self.cfg.GetClusterInfo()
8670
    enabled_hvs = cluster.enabled_hypervisors
8671
    if self.op.hypervisor not in enabled_hvs:
8672
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8673
                                 " cluster (%s)" % (self.op.hypervisor,
8674
                                  ",".join(enabled_hvs)),
8675
                                 errors.ECODE_STATE)
8676

    
8677
    # Check tag validity
8678
    for tag in self.op.tags:
8679
      objects.TaggableObject.ValidateTag(tag)
8680

    
8681
    # check hypervisor parameter syntax (locally)
8682
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8683
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8684
                                      self.op.hvparams)
8685
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8686
    hv_type.CheckParameterSyntax(filled_hvp)
8687
    self.hv_full = filled_hvp
8688
    # check that we don't specify global parameters on an instance
8689
    _CheckGlobalHvParams(self.op.hvparams)
8690

    
8691
    # fill and remember the beparams dict
8692
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8693
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8694

    
8695
    # build os parameters
8696
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8697

    
8698
    # now that hvp/bep are in final format, let's reset to defaults,
8699
    # if told to do so
8700
    if self.op.identify_defaults:
8701
      self._RevertToDefaults(cluster)
8702

    
8703
    # NIC buildup
8704
    self.nics = []
8705
    for idx, nic in enumerate(self.op.nics):
8706
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8707
      nic_mode = nic_mode_req
8708
      if nic_mode is None:
8709
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8710

    
8711
      # in routed mode, for the first nic, the default ip is 'auto'
8712
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8713
        default_ip_mode = constants.VALUE_AUTO
8714
      else:
8715
        default_ip_mode = constants.VALUE_NONE
8716

    
8717
      # ip validity checks
8718
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8719
      if ip is None or ip.lower() == constants.VALUE_NONE:
8720
        nic_ip = None
8721
      elif ip.lower() == constants.VALUE_AUTO:
8722
        if not self.op.name_check:
8723
          raise errors.OpPrereqError("IP address set to auto but name checks"
8724
                                     " have been skipped",
8725
                                     errors.ECODE_INVAL)
8726
        nic_ip = self.hostname1.ip
8727
      else:
8728
        if not netutils.IPAddress.IsValid(ip):
8729
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8730
                                     errors.ECODE_INVAL)
8731
        nic_ip = ip
8732

    
8733
      # TODO: check the ip address for uniqueness
8734
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8735
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8736
                                   errors.ECODE_INVAL)
8737

    
8738
      # MAC address verification
8739
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8740
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8741
        mac = utils.NormalizeAndValidateMac(mac)
8742

    
8743
        try:
8744
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8745
        except errors.ReservationError:
8746
          raise errors.OpPrereqError("MAC address %s already in use"
8747
                                     " in cluster" % mac,
8748
                                     errors.ECODE_NOTUNIQUE)
8749

    
8750
      #  Build nic parameters
8751
      link = nic.get(constants.INIC_LINK, None)
8752
      nicparams = {}
8753
      if nic_mode_req:
8754
        nicparams[constants.NIC_MODE] = nic_mode_req
8755
      if link:
8756
        nicparams[constants.NIC_LINK] = link
8757

    
8758
      check_params = cluster.SimpleFillNIC(nicparams)
8759
      objects.NIC.CheckParameterSyntax(check_params)
8760
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8761

    
8762
    # disk checks/pre-build
8763
    default_vg = self.cfg.GetVGName()
8764
    self.disks = []
8765
    for disk in self.op.disks:
8766
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8767
      if mode not in constants.DISK_ACCESS_SET:
8768
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8769
                                   mode, errors.ECODE_INVAL)
8770
      size = disk.get(constants.IDISK_SIZE, None)
8771
      if size is None:
8772
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8773
      try:
8774
        size = int(size)
8775
      except (TypeError, ValueError):
8776
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8777
                                   errors.ECODE_INVAL)
8778

    
8779
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8780
      new_disk = {
8781
        constants.IDISK_SIZE: size,
8782
        constants.IDISK_MODE: mode,
8783
        constants.IDISK_VG: data_vg,
8784
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8785
        }
8786
      if constants.IDISK_ADOPT in disk:
8787
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8788
      self.disks.append(new_disk)
8789

    
8790
    if self.op.mode == constants.INSTANCE_IMPORT:
8791

    
8792
      # Check that the new instance doesn't have less disks than the export
8793
      instance_disks = len(self.disks)
8794
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8795
      if instance_disks < export_disks:
8796
        raise errors.OpPrereqError("Not enough disks to import."
8797
                                   " (instance: %d, export: %d)" %
8798
                                   (instance_disks, export_disks),
8799
                                   errors.ECODE_INVAL)
8800

    
8801
      disk_images = []
8802
      for idx in range(export_disks):
8803
        option = "disk%d_dump" % idx
8804
        if export_info.has_option(constants.INISECT_INS, option):
8805
          # FIXME: are the old os-es, disk sizes, etc. useful?
8806
          export_name = export_info.get(constants.INISECT_INS, option)
8807
          image = utils.PathJoin(self.op.src_path, export_name)
8808
          disk_images.append(image)
8809
        else:
8810
          disk_images.append(False)
8811

    
8812
      self.src_images = disk_images
8813

    
8814
      old_name = export_info.get(constants.INISECT_INS, "name")
8815
      try:
8816
        exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8817
      except (TypeError, ValueError), err:
8818
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8819
                                   " an integer: %s" % str(err),
8820
                                   errors.ECODE_STATE)
8821
      if self.op.instance_name == old_name:
8822
        for idx, nic in enumerate(self.nics):
8823
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8824
            nic_mac_ini = "nic%d_mac" % idx
8825
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8826

    
8827
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8828

    
8829
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8830
    if self.op.ip_check:
8831
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8832
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8833
                                   (self.check_ip, self.op.instance_name),
8834
                                   errors.ECODE_NOTUNIQUE)
8835

    
8836
    #### mac address generation
8837
    # By generating here the mac address both the allocator and the hooks get
8838
    # the real final mac address rather than the 'auto' or 'generate' value.
8839
    # There is a race condition between the generation and the instance object
8840
    # creation, which means that we know the mac is valid now, but we're not
8841
    # sure it will be when we actually add the instance. If things go bad
8842
    # adding the instance will abort because of a duplicate mac, and the
8843
    # creation job will fail.
8844
    for nic in self.nics:
8845
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8846
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8847

    
8848
    #### allocator run
8849

    
8850
    if self.op.iallocator is not None:
8851
      self._RunAllocator()
8852

    
8853
    # Release all unneeded node locks
8854
    _ReleaseLocks(self, locking.LEVEL_NODE,
8855
                  keep=filter(None, [self.op.pnode, self.op.snode,
8856
                                     self.op.src_node]))
8857

    
8858
    #### node related checks
8859

    
8860
    # check primary node
8861
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8862
    assert self.pnode is not None, \
8863
      "Cannot retrieve locked node %s" % self.op.pnode
8864
    if pnode.offline:
8865
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8866
                                 pnode.name, errors.ECODE_STATE)
8867
    if pnode.drained:
8868
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8869
                                 pnode.name, errors.ECODE_STATE)
8870
    if not pnode.vm_capable:
8871
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8872
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8873

    
8874
    self.secondaries = []
8875

    
8876
    # mirror node verification
8877
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8878
      if self.op.snode == pnode.name:
8879
        raise errors.OpPrereqError("The secondary node cannot be the"
8880
                                   " primary node", errors.ECODE_INVAL)
8881
      _CheckNodeOnline(self, self.op.snode)
8882
      _CheckNodeNotDrained(self, self.op.snode)
8883
      _CheckNodeVmCapable(self, self.op.snode)
8884
      self.secondaries.append(self.op.snode)
8885

    
8886
    nodenames = [pnode.name] + self.secondaries
8887

    
8888
    if not self.adopt_disks:
8889
      # Check lv size requirements, if not adopting
8890
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8891
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8892

    
8893
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8894
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8895
                                disk[constants.IDISK_ADOPT])
8896
                     for disk in self.disks])
8897
      if len(all_lvs) != len(self.disks):
8898
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8899
                                   errors.ECODE_INVAL)
8900
      for lv_name in all_lvs:
8901
        try:
8902
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8903
          # to ReserveLV uses the same syntax
8904
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8905
        except errors.ReservationError:
8906
          raise errors.OpPrereqError("LV named %s used by another instance" %
8907
                                     lv_name, errors.ECODE_NOTUNIQUE)
8908

    
8909
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8910
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8911

    
8912
      node_lvs = self.rpc.call_lv_list([pnode.name],
8913
                                       vg_names.payload.keys())[pnode.name]
8914
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8915
      node_lvs = node_lvs.payload
8916

    
8917
      delta = all_lvs.difference(node_lvs.keys())
8918
      if delta:
8919
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8920
                                   utils.CommaJoin(delta),
8921
                                   errors.ECODE_INVAL)
8922
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8923
      if online_lvs:
8924
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8925
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8926
                                   errors.ECODE_STATE)
8927
      # update the size of disk based on what is found
8928
      for dsk in self.disks:
8929
        dsk[constants.IDISK_SIZE] = \
8930
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8931
                                        dsk[constants.IDISK_ADOPT])][0]))
8932

    
8933
    elif self.op.disk_template == constants.DT_BLOCK:
8934
      # Normalize and de-duplicate device paths
8935
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8936
                       for disk in self.disks])
8937
      if len(all_disks) != len(self.disks):
8938
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8939
                                   errors.ECODE_INVAL)
8940
      baddisks = [d for d in all_disks
8941
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8942
      if baddisks:
8943
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8944
                                   " cannot be adopted" %
8945
                                   (", ".join(baddisks),
8946
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8947
                                   errors.ECODE_INVAL)
8948

    
8949
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8950
                                            list(all_disks))[pnode.name]
8951
      node_disks.Raise("Cannot get block device information from node %s" %
8952
                       pnode.name)
8953
      node_disks = node_disks.payload
8954
      delta = all_disks.difference(node_disks.keys())
8955
      if delta:
8956
        raise errors.OpPrereqError("Missing block device(s): %s" %
8957
                                   utils.CommaJoin(delta),
8958
                                   errors.ECODE_INVAL)
8959
      for dsk in self.disks:
8960
        dsk[constants.IDISK_SIZE] = \
8961
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8962

    
8963
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8964

    
8965
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8966
    # check OS parameters (remotely)
8967
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8968

    
8969
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8970

    
8971
    # memory check on primary node
8972
    if self.op.start:
8973
      _CheckNodeFreeMemory(self, self.pnode.name,
8974
                           "creating instance %s" % self.op.instance_name,
8975
                           self.be_full[constants.BE_MEMORY],
8976
                           self.op.hypervisor)
8977

    
8978
    self.dry_run_result = list(nodenames)
8979

    
8980
  def Exec(self, feedback_fn):
8981
    """Create and add the instance to the cluster.
8982

8983
    """
8984
    instance = self.op.instance_name
8985
    pnode_name = self.pnode.name
8986

    
8987
    ht_kind = self.op.hypervisor
8988
    if ht_kind in constants.HTS_REQ_PORT:
8989
      network_port = self.cfg.AllocatePort()
8990
    else:
8991
      network_port = None
8992

    
8993
    disks = _GenerateDiskTemplate(self,
8994
                                  self.op.disk_template,
8995
                                  instance, pnode_name,
8996
                                  self.secondaries,
8997
                                  self.disks,
8998
                                  self.instance_file_storage_dir,
8999
                                  self.op.file_driver,
9000
                                  0,
9001
                                  feedback_fn)
9002

    
9003
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9004
                            primary_node=pnode_name,
9005
                            nics=self.nics, disks=disks,
9006
                            disk_template=self.op.disk_template,
9007
                            admin_up=False,
9008
                            network_port=network_port,
9009
                            beparams=self.op.beparams,
9010
                            hvparams=self.op.hvparams,
9011
                            hypervisor=self.op.hypervisor,
9012
                            osparams=self.op.osparams,
9013
                            )
9014

    
9015
    if self.op.tags:
9016
      for tag in self.op.tags:
9017
        iobj.AddTag(tag)
9018

    
9019
    if self.adopt_disks:
9020
      if self.op.disk_template == constants.DT_PLAIN:
9021
        # rename LVs to the newly-generated names; we need to construct
9022
        # 'fake' LV disks with the old data, plus the new unique_id
9023
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9024
        rename_to = []
9025
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9026
          rename_to.append(t_dsk.logical_id)
9027
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9028
          self.cfg.SetDiskID(t_dsk, pnode_name)
9029
        result = self.rpc.call_blockdev_rename(pnode_name,
9030
                                               zip(tmp_disks, rename_to))
9031
        result.Raise("Failed to rename adoped LVs")
9032
    else:
9033
      feedback_fn("* creating instance disks...")
9034
      try:
9035
        _CreateDisks(self, iobj)
9036
      except errors.OpExecError:
9037
        self.LogWarning("Device creation failed, reverting...")
9038
        try:
9039
          _RemoveDisks(self, iobj)
9040
        finally:
9041
          self.cfg.ReleaseDRBDMinors(instance)
9042
          raise
9043

    
9044
    feedback_fn("adding instance %s to cluster config" % instance)
9045

    
9046
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9047

    
9048
    # Declare that we don't want to remove the instance lock anymore, as we've
9049
    # added the instance to the config
9050
    del self.remove_locks[locking.LEVEL_INSTANCE]
9051

    
9052
    if self.op.mode == constants.INSTANCE_IMPORT:
9053
      # Release unused nodes
9054
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9055
    else:
9056
      # Release all nodes
9057
      _ReleaseLocks(self, locking.LEVEL_NODE)
9058

    
9059
    disk_abort = False
9060
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9061
      feedback_fn("* wiping instance disks...")
9062
      try:
9063
        _WipeDisks(self, iobj)
9064
      except errors.OpExecError, err:
9065
        logging.exception("Wiping disks failed")
9066
        self.LogWarning("Wiping instance disks failed (%s)", err)
9067
        disk_abort = True
9068

    
9069
    if disk_abort:
9070
      # Something is already wrong with the disks, don't do anything else
9071
      pass
9072
    elif self.op.wait_for_sync:
9073
      disk_abort = not _WaitForSync(self, iobj)
9074
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9075
      # make sure the disks are not degraded (still sync-ing is ok)
9076
      feedback_fn("* checking mirrors status")
9077
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9078
    else:
9079
      disk_abort = False
9080

    
9081
    if disk_abort:
9082
      _RemoveDisks(self, iobj)
9083
      self.cfg.RemoveInstance(iobj.name)
9084
      # Make sure the instance lock gets removed
9085
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9086
      raise errors.OpExecError("There are some degraded disks for"
9087
                               " this instance")
9088

    
9089
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9090
      if self.op.mode == constants.INSTANCE_CREATE:
9091
        if not self.op.no_install:
9092
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9093
                        not self.op.wait_for_sync)
9094
          if pause_sync:
9095
            feedback_fn("* pausing disk sync to install instance OS")
9096
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9097
                                                              iobj.disks, True)
9098
            for idx, success in enumerate(result.payload):
9099
              if not success:
9100
                logging.warn("pause-sync of instance %s for disk %d failed",
9101
                             instance, idx)
9102

    
9103
          feedback_fn("* running the instance OS create scripts...")
9104
          # FIXME: pass debug option from opcode to backend
9105
          os_add_result = \
9106
            self.rpc.call_instance_os_add(pnode_name, iobj, False,
9107
                                          self.op.debug_level)
9108
          if pause_sync:
9109
            feedback_fn("* resuming disk sync")
9110
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9111
                                                              iobj.disks, False)
9112
            for idx, success in enumerate(result.payload):
9113
              if not success:
9114
                logging.warn("resume-sync of instance %s for disk %d failed",
9115
                             instance, idx)
9116

    
9117
          os_add_result.Raise("Could not add os for instance %s"
9118
                              " on node %s" % (instance, pnode_name))
9119

    
9120
      elif self.op.mode == constants.INSTANCE_IMPORT:
9121
        feedback_fn("* running the instance OS import scripts...")
9122

    
9123
        transfers = []
9124

    
9125
        for idx, image in enumerate(self.src_images):
9126
          if not image:
9127
            continue
9128

    
9129
          # FIXME: pass debug option from opcode to backend
9130
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9131
                                             constants.IEIO_FILE, (image, ),
9132
                                             constants.IEIO_SCRIPT,
9133
                                             (iobj.disks[idx], idx),
9134
                                             None)
9135
          transfers.append(dt)
9136

    
9137
        import_result = \
9138
          masterd.instance.TransferInstanceData(self, feedback_fn,
9139
                                                self.op.src_node, pnode_name,
9140
                                                self.pnode.secondary_ip,
9141
                                                iobj, transfers)
9142
        if not compat.all(import_result):
9143
          self.LogWarning("Some disks for instance %s on node %s were not"
9144
                          " imported successfully" % (instance, pnode_name))
9145

    
9146
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9147
        feedback_fn("* preparing remote import...")
9148
        # The source cluster will stop the instance before attempting to make a
9149
        # connection. In some cases stopping an instance can take a long time,
9150
        # hence the shutdown timeout is added to the connection timeout.
9151
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9152
                           self.op.source_shutdown_timeout)
9153
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9154

    
9155
        assert iobj.primary_node == self.pnode.name
9156
        disk_results = \
9157
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9158
                                        self.source_x509_ca,
9159
                                        self._cds, timeouts)
9160
        if not compat.all(disk_results):
9161
          # TODO: Should the instance still be started, even if some disks
9162
          # failed to import (valid for local imports, too)?
9163
          self.LogWarning("Some disks for instance %s on node %s were not"
9164
                          " imported successfully" % (instance, pnode_name))
9165

    
9166
        # Run rename script on newly imported instance
9167
        assert iobj.name == instance
9168
        feedback_fn("Running rename script for %s" % instance)
9169
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9170
                                                   self.source_instance_name,
9171
                                                   self.op.debug_level)
9172
        if result.fail_msg:
9173
          self.LogWarning("Failed to run rename script for %s on node"
9174
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9175

    
9176
      else:
9177
        # also checked in the prereq part
9178
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9179
                                     % self.op.mode)
9180

    
9181
    if self.op.start:
9182
      iobj.admin_up = True
9183
      self.cfg.Update(iobj, feedback_fn)
9184
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9185
      feedback_fn("* starting instance...")
9186
      result = self.rpc.call_instance_start(pnode_name, iobj,
9187
                                            None, None, False)
9188
      result.Raise("Could not start instance")
9189

    
9190
    return list(iobj.all_nodes)
9191

    
9192

    
9193
class LUInstanceConsole(NoHooksLU):
9194
  """Connect to an instance's console.
9195

9196
  This is somewhat special in that it returns the command line that
9197
  you need to run on the master node in order to connect to the
9198
  console.
9199

9200
  """
9201
  REQ_BGL = False
9202

    
9203
  def ExpandNames(self):
9204
    self._ExpandAndLockInstance()
9205

    
9206
  def CheckPrereq(self):
9207
    """Check prerequisites.
9208

9209
    This checks that the instance is in the cluster.
9210

9211
    """
9212
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9213
    assert self.instance is not None, \
9214
      "Cannot retrieve locked instance %s" % self.op.instance_name
9215
    _CheckNodeOnline(self, self.instance.primary_node)
9216

    
9217
  def Exec(self, feedback_fn):
9218
    """Connect to the console of an instance
9219

9220
    """
9221
    instance = self.instance
9222
    node = instance.primary_node
9223

    
9224
    node_insts = self.rpc.call_instance_list([node],
9225
                                             [instance.hypervisor])[node]
9226
    node_insts.Raise("Can't get node information from %s" % node)
9227

    
9228
    if instance.name not in node_insts.payload:
9229
      if instance.admin_up:
9230
        state = constants.INSTST_ERRORDOWN
9231
      else:
9232
        state = constants.INSTST_ADMINDOWN
9233
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9234
                               (instance.name, state))
9235

    
9236
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9237

    
9238
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9239

    
9240

    
9241
def _GetInstanceConsole(cluster, instance):
9242
  """Returns console information for an instance.
9243

9244
  @type cluster: L{objects.Cluster}
9245
  @type instance: L{objects.Instance}
9246
  @rtype: dict
9247

9248
  """
9249
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9250
  # beparams and hvparams are passed separately, to avoid editing the
9251
  # instance and then saving the defaults in the instance itself.
9252
  hvparams = cluster.FillHV(instance)
9253
  beparams = cluster.FillBE(instance)
9254
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9255

    
9256
  assert console.instance == instance.name
9257
  assert console.Validate()
9258

    
9259
  return console.ToDict()
9260

    
9261

    
9262
class LUInstanceReplaceDisks(LogicalUnit):
9263
  """Replace the disks of an instance.
9264

9265
  """
9266
  HPATH = "mirrors-replace"
9267
  HTYPE = constants.HTYPE_INSTANCE
9268
  REQ_BGL = False
9269

    
9270
  def CheckArguments(self):
9271
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9272
                                  self.op.iallocator)
9273

    
9274
  def ExpandNames(self):
9275
    self._ExpandAndLockInstance()
9276

    
9277
    assert locking.LEVEL_NODE not in self.needed_locks
9278
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9279

    
9280
    assert self.op.iallocator is None or self.op.remote_node is None, \
9281
      "Conflicting options"
9282

    
9283
    if self.op.remote_node is not None:
9284
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9285

    
9286
      # Warning: do not remove the locking of the new secondary here
9287
      # unless DRBD8.AddChildren is changed to work in parallel;
9288
      # currently it doesn't since parallel invocations of
9289
      # FindUnusedMinor will conflict
9290
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9291
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9292
    else:
9293
      self.needed_locks[locking.LEVEL_NODE] = []
9294
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9295

    
9296
      if self.op.iallocator is not None:
9297
        # iallocator will select a new node in the same group
9298
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9299

    
9300
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9301
                                   self.op.iallocator, self.op.remote_node,
9302
                                   self.op.disks, False, self.op.early_release)
9303

    
9304
    self.tasklets = [self.replacer]
9305

    
9306
  def DeclareLocks(self, level):
9307
    if level == locking.LEVEL_NODEGROUP:
9308
      assert self.op.remote_node is None
9309
      assert self.op.iallocator is not None
9310
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9311

    
9312
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9313
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9314
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9315

    
9316
    elif level == locking.LEVEL_NODE:
9317
      if self.op.iallocator is not None:
9318
        assert self.op.remote_node is None
9319
        assert not self.needed_locks[locking.LEVEL_NODE]
9320

    
9321
        # Lock member nodes of all locked groups
9322
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9323
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9324
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9325
      else:
9326
        self._LockInstancesNodes()
9327

    
9328
  def BuildHooksEnv(self):
9329
    """Build hooks env.
9330

9331
    This runs on the master, the primary and all the secondaries.
9332

9333
    """
9334
    instance = self.replacer.instance
9335
    env = {
9336
      "MODE": self.op.mode,
9337
      "NEW_SECONDARY": self.op.remote_node,
9338
      "OLD_SECONDARY": instance.secondary_nodes[0],
9339
      }
9340
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9341
    return env
9342

    
9343
  def BuildHooksNodes(self):
9344
    """Build hooks nodes.
9345

9346
    """
9347
    instance = self.replacer.instance
9348
    nl = [
9349
      self.cfg.GetMasterNode(),
9350
      instance.primary_node,
9351
      ]
9352
    if self.op.remote_node is not None:
9353
      nl.append(self.op.remote_node)
9354
    return nl, nl
9355

    
9356
  def CheckPrereq(self):
9357
    """Check prerequisites.
9358

9359
    """
9360
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9361
            self.op.iallocator is None)
9362

    
9363
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9364
    if owned_groups:
9365
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9366

    
9367
    return LogicalUnit.CheckPrereq(self)
9368

    
9369

    
9370
class TLReplaceDisks(Tasklet):
9371
  """Replaces disks for an instance.
9372

9373
  Note: Locking is not within the scope of this class.
9374

9375
  """
9376
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9377
               disks, delay_iallocator, early_release):
9378
    """Initializes this class.
9379

9380
    """
9381
    Tasklet.__init__(self, lu)
9382

    
9383
    # Parameters
9384
    self.instance_name = instance_name
9385
    self.mode = mode
9386
    self.iallocator_name = iallocator_name
9387
    self.remote_node = remote_node
9388
    self.disks = disks
9389
    self.delay_iallocator = delay_iallocator
9390
    self.early_release = early_release
9391

    
9392
    # Runtime data
9393
    self.instance = None
9394
    self.new_node = None
9395
    self.target_node = None
9396
    self.other_node = None
9397
    self.remote_node_info = None
9398
    self.node_secondary_ip = None
9399

    
9400
  @staticmethod
9401
  def CheckArguments(mode, remote_node, iallocator):
9402
    """Helper function for users of this class.
9403

9404
    """
9405
    # check for valid parameter combination
9406
    if mode == constants.REPLACE_DISK_CHG:
9407
      if remote_node is None and iallocator is None:
9408
        raise errors.OpPrereqError("When changing the secondary either an"
9409
                                   " iallocator script must be used or the"
9410
                                   " new node given", errors.ECODE_INVAL)
9411

    
9412
      if remote_node is not None and iallocator is not None:
9413
        raise errors.OpPrereqError("Give either the iallocator or the new"
9414
                                   " secondary, not both", errors.ECODE_INVAL)
9415

    
9416
    elif remote_node is not None or iallocator is not None:
9417
      # Not replacing the secondary
9418
      raise errors.OpPrereqError("The iallocator and new node options can"
9419
                                 " only be used when changing the"
9420
                                 " secondary node", errors.ECODE_INVAL)
9421

    
9422
  @staticmethod
9423
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9424
    """Compute a new secondary node using an IAllocator.
9425

9426
    """
9427
    ial = IAllocator(lu.cfg, lu.rpc,
9428
                     mode=constants.IALLOCATOR_MODE_RELOC,
9429
                     name=instance_name,
9430
                     relocate_from=list(relocate_from))
9431

    
9432
    ial.Run(iallocator_name)
9433

    
9434
    if not ial.success:
9435
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9436
                                 " %s" % (iallocator_name, ial.info),
9437
                                 errors.ECODE_NORES)
9438

    
9439
    if len(ial.result) != ial.required_nodes:
9440
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9441
                                 " of nodes (%s), required %s" %
9442
                                 (iallocator_name,
9443
                                  len(ial.result), ial.required_nodes),
9444
                                 errors.ECODE_FAULT)
9445

    
9446
    remote_node_name = ial.result[0]
9447

    
9448
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9449
               instance_name, remote_node_name)
9450

    
9451
    return remote_node_name
9452

    
9453
  def _FindFaultyDisks(self, node_name):
9454
    """Wrapper for L{_FindFaultyInstanceDisks}.
9455

9456
    """
9457
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9458
                                    node_name, True)
9459

    
9460
  def _CheckDisksActivated(self, instance):
9461
    """Checks if the instance disks are activated.
9462

9463
    @param instance: The instance to check disks
9464
    @return: True if they are activated, False otherwise
9465

9466
    """
9467
    nodes = instance.all_nodes
9468

    
9469
    for idx, dev in enumerate(instance.disks):
9470
      for node in nodes:
9471
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9472
        self.cfg.SetDiskID(dev, node)
9473

    
9474
        result = self.rpc.call_blockdev_find(node, dev)
9475

    
9476
        if result.offline:
9477
          continue
9478
        elif result.fail_msg or not result.payload:
9479
          return False
9480

    
9481
    return True
9482

    
9483
  def CheckPrereq(self):
9484
    """Check prerequisites.
9485

9486
    This checks that the instance is in the cluster.
9487

9488
    """
9489
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9490
    assert instance is not None, \
9491
      "Cannot retrieve locked instance %s" % self.instance_name
9492

    
9493
    if instance.disk_template != constants.DT_DRBD8:
9494
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9495
                                 " instances", errors.ECODE_INVAL)
9496

    
9497
    if len(instance.secondary_nodes) != 1:
9498
      raise errors.OpPrereqError("The instance has a strange layout,"
9499
                                 " expected one secondary but found %d" %
9500
                                 len(instance.secondary_nodes),
9501
                                 errors.ECODE_FAULT)
9502

    
9503
    if not self.delay_iallocator:
9504
      self._CheckPrereq2()
9505

    
9506
  def _CheckPrereq2(self):
9507
    """Check prerequisites, second part.
9508

9509
    This function should always be part of CheckPrereq. It was separated and is
9510
    now called from Exec because during node evacuation iallocator was only
9511
    called with an unmodified cluster model, not taking planned changes into
9512
    account.
9513

9514
    """
9515
    instance = self.instance
9516
    secondary_node = instance.secondary_nodes[0]
9517

    
9518
    if self.iallocator_name is None:
9519
      remote_node = self.remote_node
9520
    else:
9521
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9522
                                       instance.name, instance.secondary_nodes)
9523

    
9524
    if remote_node is None:
9525
      self.remote_node_info = None
9526
    else:
9527
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9528
             "Remote node '%s' is not locked" % remote_node
9529

    
9530
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9531
      assert self.remote_node_info is not None, \
9532
        "Cannot retrieve locked node %s" % remote_node
9533

    
9534
    if remote_node == self.instance.primary_node:
9535
      raise errors.OpPrereqError("The specified node is the primary node of"
9536
                                 " the instance", errors.ECODE_INVAL)
9537

    
9538
    if remote_node == secondary_node:
9539
      raise errors.OpPrereqError("The specified node is already the"
9540
                                 " secondary node of the instance",
9541
                                 errors.ECODE_INVAL)
9542

    
9543
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9544
                                    constants.REPLACE_DISK_CHG):
9545
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9546
                                 errors.ECODE_INVAL)
9547

    
9548
    if self.mode == constants.REPLACE_DISK_AUTO:
9549
      if not self._CheckDisksActivated(instance):
9550
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9551
                                   " first" % self.instance_name,
9552
                                   errors.ECODE_STATE)
9553
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9554
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9555

    
9556
      if faulty_primary and faulty_secondary:
9557
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9558
                                   " one node and can not be repaired"
9559
                                   " automatically" % self.instance_name,
9560
                                   errors.ECODE_STATE)
9561

    
9562
      if faulty_primary:
9563
        self.disks = faulty_primary
9564
        self.target_node = instance.primary_node
9565
        self.other_node = secondary_node
9566
        check_nodes = [self.target_node, self.other_node]
9567
      elif faulty_secondary:
9568
        self.disks = faulty_secondary
9569
        self.target_node = secondary_node
9570
        self.other_node = instance.primary_node
9571
        check_nodes = [self.target_node, self.other_node]
9572
      else:
9573
        self.disks = []
9574
        check_nodes = []
9575

    
9576
    else:
9577
      # Non-automatic modes
9578
      if self.mode == constants.REPLACE_DISK_PRI:
9579
        self.target_node = instance.primary_node
9580
        self.other_node = secondary_node
9581
        check_nodes = [self.target_node, self.other_node]
9582

    
9583
      elif self.mode == constants.REPLACE_DISK_SEC:
9584
        self.target_node = secondary_node
9585
        self.other_node = instance.primary_node
9586
        check_nodes = [self.target_node, self.other_node]
9587

    
9588
      elif self.mode == constants.REPLACE_DISK_CHG:
9589
        self.new_node = remote_node
9590
        self.other_node = instance.primary_node
9591
        self.target_node = secondary_node
9592
        check_nodes = [self.new_node, self.other_node]
9593

    
9594
        _CheckNodeNotDrained(self.lu, remote_node)
9595
        _CheckNodeVmCapable(self.lu, remote_node)
9596

    
9597
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9598
        assert old_node_info is not None
9599
        if old_node_info.offline and not self.early_release:
9600
          # doesn't make sense to delay the release
9601
          self.early_release = True
9602
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9603
                          " early-release mode", secondary_node)
9604

    
9605
      else:
9606
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9607
                                     self.mode)
9608

    
9609
      # If not specified all disks should be replaced
9610
      if not self.disks:
9611
        self.disks = range(len(self.instance.disks))
9612

    
9613
    for node in check_nodes:
9614
      _CheckNodeOnline(self.lu, node)
9615

    
9616
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9617
                                                          self.other_node,
9618
                                                          self.target_node]
9619
                              if node_name is not None)
9620

    
9621
    # Release unneeded node locks
9622
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9623

    
9624
    # Release any owned node group
9625
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9626
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9627

    
9628
    # Check whether disks are valid
9629
    for disk_idx in self.disks:
9630
      instance.FindDisk(disk_idx)
9631

    
9632
    # Get secondary node IP addresses
9633
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9634
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9635

    
9636
  def Exec(self, feedback_fn):
9637
    """Execute disk replacement.
9638

9639
    This dispatches the disk replacement to the appropriate handler.
9640

9641
    """
9642
    if self.delay_iallocator:
9643
      self._CheckPrereq2()
9644

    
9645
    if __debug__:
9646
      # Verify owned locks before starting operation
9647
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9648
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9649
          ("Incorrect node locks, owning %s, expected %s" %
9650
           (owned_nodes, self.node_secondary_ip.keys()))
9651

    
9652
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9653
      assert list(owned_instances) == [self.instance_name], \
9654
          "Instance '%s' not locked" % self.instance_name
9655

    
9656
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9657
          "Should not own any node group lock at this point"
9658

    
9659
    if not self.disks:
9660
      feedback_fn("No disks need replacement")
9661
      return
9662

    
9663
    feedback_fn("Replacing disk(s) %s for %s" %
9664
                (utils.CommaJoin(self.disks), self.instance.name))
9665

    
9666
    activate_disks = (not self.instance.admin_up)
9667

    
9668
    # Activate the instance disks if we're replacing them on a down instance
9669
    if activate_disks:
9670
      _StartInstanceDisks(self.lu, self.instance, True)
9671

    
9672
    try:
9673
      # Should we replace the secondary node?
9674
      if self.new_node is not None:
9675
        fn = self._ExecDrbd8Secondary
9676
      else:
9677
        fn = self._ExecDrbd8DiskOnly
9678

    
9679
      result = fn(feedback_fn)
9680
    finally:
9681
      # Deactivate the instance disks if we're replacing them on a
9682
      # down instance
9683
      if activate_disks:
9684
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9685

    
9686
    if __debug__:
9687
      # Verify owned locks
9688
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9689
      nodes = frozenset(self.node_secondary_ip)
9690
      assert ((self.early_release and not owned_nodes) or
9691
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9692
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9693
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9694

    
9695
    return result
9696

    
9697
  def _CheckVolumeGroup(self, nodes):
9698
    self.lu.LogInfo("Checking volume groups")
9699

    
9700
    vgname = self.cfg.GetVGName()
9701

    
9702
    # Make sure volume group exists on all involved nodes
9703
    results = self.rpc.call_vg_list(nodes)
9704
    if not results:
9705
      raise errors.OpExecError("Can't list volume groups on the nodes")
9706

    
9707
    for node in nodes:
9708
      res = results[node]
9709
      res.Raise("Error checking node %s" % node)
9710
      if vgname not in res.payload:
9711
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9712
                                 (vgname, node))
9713

    
9714
  def _CheckDisksExistence(self, nodes):
9715
    # Check disk existence
9716
    for idx, dev in enumerate(self.instance.disks):
9717
      if idx not in self.disks:
9718
        continue
9719

    
9720
      for node in nodes:
9721
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9722
        self.cfg.SetDiskID(dev, node)
9723

    
9724
        result = self.rpc.call_blockdev_find(node, dev)
9725

    
9726
        msg = result.fail_msg
9727
        if msg or not result.payload:
9728
          if not msg:
9729
            msg = "disk not found"
9730
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9731
                                   (idx, node, msg))
9732

    
9733
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9734
    for idx, dev in enumerate(self.instance.disks):
9735
      if idx not in self.disks:
9736
        continue
9737

    
9738
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9739
                      (idx, node_name))
9740

    
9741
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9742
                                   ldisk=ldisk):
9743
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9744
                                 " replace disks for instance %s" %
9745
                                 (node_name, self.instance.name))
9746

    
9747
  def _CreateNewStorage(self, node_name):
9748
    """Create new storage on the primary or secondary node.
9749

9750
    This is only used for same-node replaces, not for changing the
9751
    secondary node, hence we don't want to modify the existing disk.
9752

9753
    """
9754
    iv_names = {}
9755

    
9756
    for idx, dev in enumerate(self.instance.disks):
9757
      if idx not in self.disks:
9758
        continue
9759

    
9760
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9761

    
9762
      self.cfg.SetDiskID(dev, node_name)
9763

    
9764
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9765
      names = _GenerateUniqueNames(self.lu, lv_names)
9766

    
9767
      vg_data = dev.children[0].logical_id[0]
9768
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9769
                             logical_id=(vg_data, names[0]))
9770
      vg_meta = dev.children[1].logical_id[0]
9771
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9772
                             logical_id=(vg_meta, names[1]))
9773

    
9774
      new_lvs = [lv_data, lv_meta]
9775
      old_lvs = [child.Copy() for child in dev.children]
9776
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9777

    
9778
      # we pass force_create=True to force the LVM creation
9779
      for new_lv in new_lvs:
9780
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9781
                        _GetInstanceInfoText(self.instance), False)
9782

    
9783
    return iv_names
9784

    
9785
  def _CheckDevices(self, node_name, iv_names):
9786
    for name, (dev, _, _) in iv_names.iteritems():
9787
      self.cfg.SetDiskID(dev, node_name)
9788

    
9789
      result = self.rpc.call_blockdev_find(node_name, dev)
9790

    
9791
      msg = result.fail_msg
9792
      if msg or not result.payload:
9793
        if not msg:
9794
          msg = "disk not found"
9795
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9796
                                 (name, msg))
9797

    
9798
      if result.payload.is_degraded:
9799
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9800

    
9801
  def _RemoveOldStorage(self, node_name, iv_names):
9802
    for name, (_, old_lvs, _) in iv_names.iteritems():
9803
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9804

    
9805
      for lv in old_lvs:
9806
        self.cfg.SetDiskID(lv, node_name)
9807

    
9808
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9809
        if msg:
9810
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9811
                             hint="remove unused LVs manually")
9812

    
9813
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9814
    """Replace a disk on the primary or secondary for DRBD 8.
9815

9816
    The algorithm for replace is quite complicated:
9817

9818
      1. for each disk to be replaced:
9819

9820
        1. create new LVs on the target node with unique names
9821
        1. detach old LVs from the drbd device
9822
        1. rename old LVs to name_replaced.<time_t>
9823
        1. rename new LVs to old LVs
9824
        1. attach the new LVs (with the old names now) to the drbd device
9825

9826
      1. wait for sync across all devices
9827

9828
      1. for each modified disk:
9829

9830
        1. remove old LVs (which have the name name_replaces.<time_t>)
9831

9832
    Failures are not very well handled.
9833

9834
    """
9835
    steps_total = 6
9836

    
9837
    # Step: check device activation
9838
    self.lu.LogStep(1, steps_total, "Check device existence")
9839
    self._CheckDisksExistence([self.other_node, self.target_node])
9840
    self._CheckVolumeGroup([self.target_node, self.other_node])
9841

    
9842
    # Step: check other node consistency
9843
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9844
    self._CheckDisksConsistency(self.other_node,
9845
                                self.other_node == self.instance.primary_node,
9846
                                False)
9847

    
9848
    # Step: create new storage
9849
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9850
    iv_names = self._CreateNewStorage(self.target_node)
9851

    
9852
    # Step: for each lv, detach+rename*2+attach
9853
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9854
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9855
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9856

    
9857
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9858
                                                     old_lvs)
9859
      result.Raise("Can't detach drbd from local storage on node"
9860
                   " %s for device %s" % (self.target_node, dev.iv_name))
9861
      #dev.children = []
9862
      #cfg.Update(instance)
9863

    
9864
      # ok, we created the new LVs, so now we know we have the needed
9865
      # storage; as such, we proceed on the target node to rename
9866
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9867
      # using the assumption that logical_id == physical_id (which in
9868
      # turn is the unique_id on that node)
9869

    
9870
      # FIXME(iustin): use a better name for the replaced LVs
9871
      temp_suffix = int(time.time())
9872
      ren_fn = lambda d, suff: (d.physical_id[0],
9873
                                d.physical_id[1] + "_replaced-%s" % suff)
9874

    
9875
      # Build the rename list based on what LVs exist on the node
9876
      rename_old_to_new = []
9877
      for to_ren in old_lvs:
9878
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9879
        if not result.fail_msg and result.payload:
9880
          # device exists
9881
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9882

    
9883
      self.lu.LogInfo("Renaming the old LVs on the target node")
9884
      result = self.rpc.call_blockdev_rename(self.target_node,
9885
                                             rename_old_to_new)
9886
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9887

    
9888
      # Now we rename the new LVs to the old LVs
9889
      self.lu.LogInfo("Renaming the new LVs on the target node")
9890
      rename_new_to_old = [(new, old.physical_id)
9891
                           for old, new in zip(old_lvs, new_lvs)]
9892
      result = self.rpc.call_blockdev_rename(self.target_node,
9893
                                             rename_new_to_old)
9894
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9895

    
9896
      # Intermediate steps of in memory modifications
9897
      for old, new in zip(old_lvs, new_lvs):
9898
        new.logical_id = old.logical_id
9899
        self.cfg.SetDiskID(new, self.target_node)
9900

    
9901
      # We need to modify old_lvs so that removal later removes the
9902
      # right LVs, not the newly added ones; note that old_lvs is a
9903
      # copy here
9904
      for disk in old_lvs:
9905
        disk.logical_id = ren_fn(disk, temp_suffix)
9906
        self.cfg.SetDiskID(disk, self.target_node)
9907

    
9908
      # Now that the new lvs have the old name, we can add them to the device
9909
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9910
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9911
                                                  new_lvs)
9912
      msg = result.fail_msg
9913
      if msg:
9914
        for new_lv in new_lvs:
9915
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9916
                                               new_lv).fail_msg
9917
          if msg2:
9918
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9919
                               hint=("cleanup manually the unused logical"
9920
                                     "volumes"))
9921
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9922

    
9923
    cstep = 5
9924
    if self.early_release:
9925
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9926
      cstep += 1
9927
      self._RemoveOldStorage(self.target_node, iv_names)
9928
      # WARNING: we release both node locks here, do not do other RPCs
9929
      # than WaitForSync to the primary node
9930
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9931
                    names=[self.target_node, self.other_node])
9932

    
9933
    # Wait for sync
9934
    # This can fail as the old devices are degraded and _WaitForSync
9935
    # does a combined result over all disks, so we don't check its return value
9936
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9937
    cstep += 1
9938
    _WaitForSync(self.lu, self.instance)
9939

    
9940
    # Check all devices manually
9941
    self._CheckDevices(self.instance.primary_node, iv_names)
9942

    
9943
    # Step: remove old storage
9944
    if not self.early_release:
9945
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9946
      cstep += 1
9947
      self._RemoveOldStorage(self.target_node, iv_names)
9948

    
9949
  def _ExecDrbd8Secondary(self, feedback_fn):
9950
    """Replace the secondary node for DRBD 8.
9951

9952
    The algorithm for replace is quite complicated:
9953
      - for all disks of the instance:
9954
        - create new LVs on the new node with same names
9955
        - shutdown the drbd device on the old secondary
9956
        - disconnect the drbd network on the primary
9957
        - create the drbd device on the new secondary
9958
        - network attach the drbd on the primary, using an artifice:
9959
          the drbd code for Attach() will connect to the network if it
9960
          finds a device which is connected to the good local disks but
9961
          not network enabled
9962
      - wait for sync across all devices
9963
      - remove all disks from the old secondary
9964

9965
    Failures are not very well handled.
9966

9967
    """
9968
    steps_total = 6
9969

    
9970
    pnode = self.instance.primary_node
9971

    
9972
    # Step: check device activation
9973
    self.lu.LogStep(1, steps_total, "Check device existence")
9974
    self._CheckDisksExistence([self.instance.primary_node])
9975
    self._CheckVolumeGroup([self.instance.primary_node])
9976

    
9977
    # Step: check other node consistency
9978
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9979
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9980

    
9981
    # Step: create new storage
9982
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9983
    for idx, dev in enumerate(self.instance.disks):
9984
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9985
                      (self.new_node, idx))
9986
      # we pass force_create=True to force LVM creation
9987
      for new_lv in dev.children:
9988
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9989
                        _GetInstanceInfoText(self.instance), False)
9990

    
9991
    # Step 4: dbrd minors and drbd setups changes
9992
    # after this, we must manually remove the drbd minors on both the
9993
    # error and the success paths
9994
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9995
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9996
                                         for dev in self.instance.disks],
9997
                                        self.instance.name)
9998
    logging.debug("Allocated minors %r", minors)
9999

    
10000
    iv_names = {}
10001
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10002
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10003
                      (self.new_node, idx))
10004
      # create new devices on new_node; note that we create two IDs:
10005
      # one without port, so the drbd will be activated without
10006
      # networking information on the new node at this stage, and one
10007
      # with network, for the latter activation in step 4
10008
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10009
      if self.instance.primary_node == o_node1:
10010
        p_minor = o_minor1
10011
      else:
10012
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10013
        p_minor = o_minor2
10014

    
10015
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10016
                      p_minor, new_minor, o_secret)
10017
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10018
                    p_minor, new_minor, o_secret)
10019

    
10020
      iv_names[idx] = (dev, dev.children, new_net_id)
10021
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10022
                    new_net_id)
10023
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10024
                              logical_id=new_alone_id,
10025
                              children=dev.children,
10026
                              size=dev.size)
10027
      try:
10028
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10029
                              _GetInstanceInfoText(self.instance), False)
10030
      except errors.GenericError:
10031
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10032
        raise
10033

    
10034
    # We have new devices, shutdown the drbd on the old secondary
10035
    for idx, dev in enumerate(self.instance.disks):
10036
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10037
      self.cfg.SetDiskID(dev, self.target_node)
10038
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10039
      if msg:
10040
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10041
                           "node: %s" % (idx, msg),
10042
                           hint=("Please cleanup this device manually as"
10043
                                 " soon as possible"))
10044

    
10045
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10046
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10047
                                               self.instance.disks)[pnode]
10048

    
10049
    msg = result.fail_msg
10050
    if msg:
10051
      # detaches didn't succeed (unlikely)
10052
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10053
      raise errors.OpExecError("Can't detach the disks from the network on"
10054
                               " old node: %s" % (msg,))
10055

    
10056
    # if we managed to detach at least one, we update all the disks of
10057
    # the instance to point to the new secondary
10058
    self.lu.LogInfo("Updating instance configuration")
10059
    for dev, _, new_logical_id in iv_names.itervalues():
10060
      dev.logical_id = new_logical_id
10061
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10062

    
10063
    self.cfg.Update(self.instance, feedback_fn)
10064

    
10065
    # and now perform the drbd attach
10066
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10067
                    " (standalone => connected)")
10068
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10069
                                            self.new_node],
10070
                                           self.node_secondary_ip,
10071
                                           self.instance.disks,
10072
                                           self.instance.name,
10073
                                           False)
10074
    for to_node, to_result in result.items():
10075
      msg = to_result.fail_msg
10076
      if msg:
10077
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10078
                           to_node, msg,
10079
                           hint=("please do a gnt-instance info to see the"
10080
                                 " status of disks"))
10081
    cstep = 5
10082
    if self.early_release:
10083
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10084
      cstep += 1
10085
      self._RemoveOldStorage(self.target_node, iv_names)
10086
      # WARNING: we release all node locks here, do not do other RPCs
10087
      # than WaitForSync to the primary node
10088
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10089
                    names=[self.instance.primary_node,
10090
                           self.target_node,
10091
                           self.new_node])
10092

    
10093
    # Wait for sync
10094
    # This can fail as the old devices are degraded and _WaitForSync
10095
    # does a combined result over all disks, so we don't check its return value
10096
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10097
    cstep += 1
10098
    _WaitForSync(self.lu, self.instance)
10099

    
10100
    # Check all devices manually
10101
    self._CheckDevices(self.instance.primary_node, iv_names)
10102

    
10103
    # Step: remove old storage
10104
    if not self.early_release:
10105
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10106
      self._RemoveOldStorage(self.target_node, iv_names)
10107

    
10108

    
10109
class LURepairNodeStorage(NoHooksLU):
10110
  """Repairs the volume group on a node.
10111

10112
  """
10113
  REQ_BGL = False
10114

    
10115
  def CheckArguments(self):
10116
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10117

    
10118
    storage_type = self.op.storage_type
10119

    
10120
    if (constants.SO_FIX_CONSISTENCY not in
10121
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10122
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10123
                                 " repaired" % storage_type,
10124
                                 errors.ECODE_INVAL)
10125

    
10126
  def ExpandNames(self):
10127
    self.needed_locks = {
10128
      locking.LEVEL_NODE: [self.op.node_name],
10129
      }
10130

    
10131
  def _CheckFaultyDisks(self, instance, node_name):
10132
    """Ensure faulty disks abort the opcode or at least warn."""
10133
    try:
10134
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10135
                                  node_name, True):
10136
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10137
                                   " node '%s'" % (instance.name, node_name),
10138
                                   errors.ECODE_STATE)
10139
    except errors.OpPrereqError, err:
10140
      if self.op.ignore_consistency:
10141
        self.proc.LogWarning(str(err.args[0]))
10142
      else:
10143
        raise
10144

    
10145
  def CheckPrereq(self):
10146
    """Check prerequisites.
10147

10148
    """
10149
    # Check whether any instance on this node has faulty disks
10150
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10151
      if not inst.admin_up:
10152
        continue
10153
      check_nodes = set(inst.all_nodes)
10154
      check_nodes.discard(self.op.node_name)
10155
      for inst_node_name in check_nodes:
10156
        self._CheckFaultyDisks(inst, inst_node_name)
10157

    
10158
  def Exec(self, feedback_fn):
10159
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10160
                (self.op.name, self.op.node_name))
10161

    
10162
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10163
    result = self.rpc.call_storage_execute(self.op.node_name,
10164
                                           self.op.storage_type, st_args,
10165
                                           self.op.name,
10166
                                           constants.SO_FIX_CONSISTENCY)
10167
    result.Raise("Failed to repair storage unit '%s' on %s" %
10168
                 (self.op.name, self.op.node_name))
10169

    
10170

    
10171
class LUNodeEvacuate(NoHooksLU):
10172
  """Evacuates instances off a list of nodes.
10173

10174
  """
10175
  REQ_BGL = False
10176

    
10177
  _MODE2IALLOCATOR = {
10178
    constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10179
    constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10180
    constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10181
    }
10182
  assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10183
  assert (frozenset(_MODE2IALLOCATOR.values()) ==
10184
          constants.IALLOCATOR_NEVAC_MODES)
10185

    
10186
  def CheckArguments(self):
10187
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10188

    
10189
  def ExpandNames(self):
10190
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10191

    
10192
    if self.op.remote_node is not None:
10193
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10194
      assert self.op.remote_node
10195

    
10196
      if self.op.remote_node == self.op.node_name:
10197
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10198
                                   " secondary node", errors.ECODE_INVAL)
10199

    
10200
      if self.op.mode != constants.NODE_EVAC_SEC:
10201
        raise errors.OpPrereqError("Without the use of an iallocator only"
10202
                                   " secondary instances can be evacuated",
10203
                                   errors.ECODE_INVAL)
10204

    
10205
    # Declare locks
10206
    self.share_locks = _ShareAll()
10207
    self.needed_locks = {
10208
      locking.LEVEL_INSTANCE: [],
10209
      locking.LEVEL_NODEGROUP: [],
10210
      locking.LEVEL_NODE: [],
10211
      }
10212

    
10213
    # Determine nodes (via group) optimistically, needs verification once locks
10214
    # have been acquired
10215
    self.lock_nodes = self._DetermineNodes()
10216

    
10217
  def _DetermineNodes(self):
10218
    """Gets the list of nodes to operate on.
10219

10220
    """
10221
    if self.op.remote_node is None:
10222
      # Iallocator will choose any node(s) in the same group
10223
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10224
    else:
10225
      group_nodes = frozenset([self.op.remote_node])
10226

    
10227
    # Determine nodes to be locked
10228
    return set([self.op.node_name]) | group_nodes
10229

    
10230
  def _DetermineInstances(self):
10231
    """Builds list of instances to operate on.
10232

10233
    """
10234
    assert self.op.mode in constants.NODE_EVAC_MODES
10235

    
10236
    if self.op.mode == constants.NODE_EVAC_PRI:
10237
      # Primary instances only
10238
      inst_fn = _GetNodePrimaryInstances
10239
      assert self.op.remote_node is None, \
10240
        "Evacuating primary instances requires iallocator"
10241
    elif self.op.mode == constants.NODE_EVAC_SEC:
10242
      # Secondary instances only
10243
      inst_fn = _GetNodeSecondaryInstances
10244
    else:
10245
      # All instances
10246
      assert self.op.mode == constants.NODE_EVAC_ALL
10247
      inst_fn = _GetNodeInstances
10248
      # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10249
      # per instance
10250
      raise errors.OpPrereqError("Due to an issue with the iallocator"
10251
                                 " interface it is not possible to evacuate"
10252
                                 " all instances at once; specify explicitly"
10253
                                 " whether to evacuate primary or secondary"
10254
                                 " instances",
10255
                                 errors.ECODE_INVAL)
10256

    
10257
    return inst_fn(self.cfg, self.op.node_name)
10258

    
10259
  def DeclareLocks(self, level):
10260
    if level == locking.LEVEL_INSTANCE:
10261
      # Lock instances optimistically, needs verification once node and group
10262
      # locks have been acquired
10263
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10264
        set(i.name for i in self._DetermineInstances())
10265

    
10266
    elif level == locking.LEVEL_NODEGROUP:
10267
      # Lock node groups for all potential target nodes optimistically, needs
10268
      # verification once nodes have been acquired
10269
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10270
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10271

    
10272
    elif level == locking.LEVEL_NODE:
10273
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10274

    
10275
  def CheckPrereq(self):
10276
    # Verify locks
10277
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10278
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10279
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10280

    
10281
    need_nodes = self._DetermineNodes()
10282

    
10283
    if not owned_nodes.issuperset(need_nodes):
10284
      raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10285
                                 " locks were acquired, current nodes are"
10286
                                 " are '%s', used to be '%s'; retry the"
10287
                                 " operation" %
10288
                                 (self.op.node_name,
10289
                                  utils.CommaJoin(need_nodes),
10290
                                  utils.CommaJoin(owned_nodes)),
10291
                                 errors.ECODE_STATE)
10292

    
10293
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10294
    if owned_groups != wanted_groups:
10295
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10296
                               " current groups are '%s', used to be '%s';"
10297
                               " retry the operation" %
10298
                               (utils.CommaJoin(wanted_groups),
10299
                                utils.CommaJoin(owned_groups)))
10300

    
10301
    # Determine affected instances
10302
    self.instances = self._DetermineInstances()
10303
    self.instance_names = [i.name for i in self.instances]
10304

    
10305
    if set(self.instance_names) != owned_instances:
10306
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10307
                               " were acquired, current instances are '%s',"
10308
                               " used to be '%s'; retry the operation" %
10309
                               (self.op.node_name,
10310
                                utils.CommaJoin(self.instance_names),
10311
                                utils.CommaJoin(owned_instances)))
10312

    
10313
    if self.instance_names:
10314
      self.LogInfo("Evacuating instances from node '%s': %s",
10315
                   self.op.node_name,
10316
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10317
    else:
10318
      self.LogInfo("No instances to evacuate from node '%s'",
10319
                   self.op.node_name)
10320

    
10321
    if self.op.remote_node is not None:
10322
      for i in self.instances:
10323
        if i.primary_node == self.op.remote_node:
10324
          raise errors.OpPrereqError("Node %s is the primary node of"
10325
                                     " instance %s, cannot use it as"
10326
                                     " secondary" %
10327
                                     (self.op.remote_node, i.name),
10328
                                     errors.ECODE_INVAL)
10329

    
10330
  def Exec(self, feedback_fn):
10331
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10332

    
10333
    if not self.instance_names:
10334
      # No instances to evacuate
10335
      jobs = []
10336

    
10337
    elif self.op.iallocator is not None:
10338
      # TODO: Implement relocation to other group
10339
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10340
                       evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10341
                       instances=list(self.instance_names))
10342

    
10343
      ial.Run(self.op.iallocator)
10344

    
10345
      if not ial.success:
10346
        raise errors.OpPrereqError("Can't compute node evacuation using"
10347
                                   " iallocator '%s': %s" %
10348
                                   (self.op.iallocator, ial.info),
10349
                                   errors.ECODE_NORES)
10350

    
10351
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10352

    
10353
    elif self.op.remote_node is not None:
10354
      assert self.op.mode == constants.NODE_EVAC_SEC
10355
      jobs = [
10356
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10357
                                        remote_node=self.op.remote_node,
10358
                                        disks=[],
10359
                                        mode=constants.REPLACE_DISK_CHG,
10360
                                        early_release=self.op.early_release)]
10361
        for instance_name in self.instance_names
10362
        ]
10363

    
10364
    else:
10365
      raise errors.ProgrammerError("No iallocator or remote node")
10366

    
10367
    return ResultWithJobs(jobs)
10368

    
10369

    
10370
def _SetOpEarlyRelease(early_release, op):
10371
  """Sets C{early_release} flag on opcodes if available.
10372

10373
  """
10374
  try:
10375
    op.early_release = early_release
10376
  except AttributeError:
10377
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10378

    
10379
  return op
10380

    
10381

    
10382
def _NodeEvacDest(use_nodes, group, nodes):
10383
  """Returns group or nodes depending on caller's choice.
10384

10385
  """
10386
  if use_nodes:
10387
    return utils.CommaJoin(nodes)
10388
  else:
10389
    return group
10390

    
10391

    
10392
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10393
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10394

10395
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10396
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10397

10398
  @type lu: L{LogicalUnit}
10399
  @param lu: Logical unit instance
10400
  @type alloc_result: tuple/list
10401
  @param alloc_result: Result from iallocator
10402
  @type early_release: bool
10403
  @param early_release: Whether to release locks early if possible
10404
  @type use_nodes: bool
10405
  @param use_nodes: Whether to display node names instead of groups
10406

10407
  """
10408
  (moved, failed, jobs) = alloc_result
10409

    
10410
  if failed:
10411
    failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10412
                                 for (name, reason) in failed)
10413
    lu.LogWarning("Unable to evacuate instances %s", failreason)
10414
    raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10415

    
10416
  if moved:
10417
    lu.LogInfo("Instances to be moved: %s",
10418
               utils.CommaJoin("%s (to %s)" %
10419
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10420
                               for (name, group, nodes) in moved))
10421

    
10422
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10423
              map(opcodes.OpCode.LoadOpCode, ops))
10424
          for ops in jobs]
10425

    
10426

    
10427
class LUInstanceGrowDisk(LogicalUnit):
10428
  """Grow a disk of an instance.
10429

10430
  """
10431
  HPATH = "disk-grow"
10432
  HTYPE = constants.HTYPE_INSTANCE
10433
  REQ_BGL = False
10434

    
10435
  def ExpandNames(self):
10436
    self._ExpandAndLockInstance()
10437
    self.needed_locks[locking.LEVEL_NODE] = []
10438
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10439

    
10440
  def DeclareLocks(self, level):
10441
    if level == locking.LEVEL_NODE:
10442
      self._LockInstancesNodes()
10443

    
10444
  def BuildHooksEnv(self):
10445
    """Build hooks env.
10446

10447
    This runs on the master, the primary and all the secondaries.
10448

10449
    """
10450
    env = {
10451
      "DISK": self.op.disk,
10452
      "AMOUNT": self.op.amount,
10453
      }
10454
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10455
    return env
10456

    
10457
  def BuildHooksNodes(self):
10458
    """Build hooks nodes.
10459

10460
    """
10461
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10462
    return (nl, nl)
10463

    
10464
  def CheckPrereq(self):
10465
    """Check prerequisites.
10466

10467
    This checks that the instance is in the cluster.
10468

10469
    """
10470
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10471
    assert instance is not None, \
10472
      "Cannot retrieve locked instance %s" % self.op.instance_name
10473
    nodenames = list(instance.all_nodes)
10474
    for node in nodenames:
10475
      _CheckNodeOnline(self, node)
10476

    
10477
    self.instance = instance
10478

    
10479
    if instance.disk_template not in constants.DTS_GROWABLE:
10480
      raise errors.OpPrereqError("Instance's disk layout does not support"
10481
                                 " growing", errors.ECODE_INVAL)
10482

    
10483
    self.disk = instance.FindDisk(self.op.disk)
10484

    
10485
    if instance.disk_template not in (constants.DT_FILE,
10486
                                      constants.DT_SHARED_FILE):
10487
      # TODO: check the free disk space for file, when that feature will be
10488
      # supported
10489
      _CheckNodesFreeDiskPerVG(self, nodenames,
10490
                               self.disk.ComputeGrowth(self.op.amount))
10491

    
10492
  def Exec(self, feedback_fn):
10493
    """Execute disk grow.
10494

10495
    """
10496
    instance = self.instance
10497
    disk = self.disk
10498

    
10499
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10500
    if not disks_ok:
10501
      raise errors.OpExecError("Cannot activate block device to grow")
10502

    
10503
    # First run all grow ops in dry-run mode
10504
    for node in instance.all_nodes:
10505
      self.cfg.SetDiskID(disk, node)
10506
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10507
      result.Raise("Grow request failed to node %s" % node)
10508

    
10509
    # We know that (as far as we can test) operations across different
10510
    # nodes will succeed, time to run it for real
10511
    for node in instance.all_nodes:
10512
      self.cfg.SetDiskID(disk, node)
10513
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10514
      result.Raise("Grow request failed to node %s" % node)
10515

    
10516
      # TODO: Rewrite code to work properly
10517
      # DRBD goes into sync mode for a short amount of time after executing the
10518
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10519
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10520
      # time is a work-around.
10521
      time.sleep(5)
10522

    
10523
    disk.RecordGrow(self.op.amount)
10524
    self.cfg.Update(instance, feedback_fn)
10525
    if self.op.wait_for_sync:
10526
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10527
      if disk_abort:
10528
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10529
                             " status; please check the instance")
10530
      if not instance.admin_up:
10531
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10532
    elif not instance.admin_up:
10533
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10534
                           " not supposed to be running because no wait for"
10535
                           " sync mode was requested")
10536

    
10537

    
10538
class LUInstanceQueryData(NoHooksLU):
10539
  """Query runtime instance data.
10540

10541
  """
10542
  REQ_BGL = False
10543

    
10544
  def ExpandNames(self):
10545
    self.needed_locks = {}
10546

    
10547
    # Use locking if requested or when non-static information is wanted
10548
    if not (self.op.static or self.op.use_locking):
10549
      self.LogWarning("Non-static data requested, locks need to be acquired")
10550
      self.op.use_locking = True
10551

    
10552
    if self.op.instances or not self.op.use_locking:
10553
      # Expand instance names right here
10554
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10555
    else:
10556
      # Will use acquired locks
10557
      self.wanted_names = None
10558

    
10559
    if self.op.use_locking:
10560
      self.share_locks = _ShareAll()
10561

    
10562
      if self.wanted_names is None:
10563
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10564
      else:
10565
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10566

    
10567
      self.needed_locks[locking.LEVEL_NODEGROUP] = []
10568
      self.needed_locks[locking.LEVEL_NODE] = []
10569
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10570

    
10571
  def DeclareLocks(self, level):
10572
    if self.op.use_locking:
10573
      if level == locking.LEVEL_NODEGROUP:
10574
        owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10575

    
10576
        # Lock all groups used by instances optimistically; this requires going
10577
        # via the node before it's locked, requiring verification later on
10578
        self.needed_locks[locking.LEVEL_NODEGROUP] = \
10579
          frozenset(group_uuid
10580
                    for instance_name in owned_instances
10581
                    for group_uuid in
10582
                      self.cfg.GetInstanceNodeGroups(instance_name))
10583

    
10584
      elif level == locking.LEVEL_NODE:
10585
        self._LockInstancesNodes()
10586

    
10587
  def CheckPrereq(self):
10588
    """Check prerequisites.
10589

10590
    This only checks the optional instance list against the existing names.
10591

10592
    """
10593
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
10594
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
10595
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
10596

    
10597
    if self.wanted_names is None:
10598
      assert self.op.use_locking, "Locking was not used"
10599
      self.wanted_names = owned_instances
10600

    
10601
    instances = dict(self.cfg.GetMultiInstanceInfo(self.wanted_names))
10602

    
10603
    if self.op.use_locking:
10604
      _CheckInstancesNodeGroups(self.cfg, instances, owned_groups, owned_nodes,
10605
                                None)
10606
    else:
10607
      assert not (owned_instances or owned_groups or owned_nodes)
10608

    
10609
    self.wanted_instances = instances.values()
10610

    
10611
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10612
    """Returns the status of a block device
10613

10614
    """
10615
    if self.op.static or not node:
10616
      return None
10617

    
10618
    self.cfg.SetDiskID(dev, node)
10619

    
10620
    result = self.rpc.call_blockdev_find(node, dev)
10621
    if result.offline:
10622
      return None
10623

    
10624
    result.Raise("Can't compute disk status for %s" % instance_name)
10625

    
10626
    status = result.payload
10627
    if status is None:
10628
      return None
10629

    
10630
    return (status.dev_path, status.major, status.minor,
10631
            status.sync_percent, status.estimated_time,
10632
            status.is_degraded, status.ldisk_status)
10633

    
10634
  def _ComputeDiskStatus(self, instance, snode, dev):
10635
    """Compute block device status.
10636

10637
    """
10638
    if dev.dev_type in constants.LDS_DRBD:
10639
      # we change the snode then (otherwise we use the one passed in)
10640
      if dev.logical_id[0] == instance.primary_node:
10641
        snode = dev.logical_id[1]
10642
      else:
10643
        snode = dev.logical_id[0]
10644

    
10645
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10646
                                              instance.name, dev)
10647
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10648

    
10649
    if dev.children:
10650
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10651
                                        instance, snode),
10652
                         dev.children)
10653
    else:
10654
      dev_children = []
10655

    
10656
    return {
10657
      "iv_name": dev.iv_name,
10658
      "dev_type": dev.dev_type,
10659
      "logical_id": dev.logical_id,
10660
      "physical_id": dev.physical_id,
10661
      "pstatus": dev_pstatus,
10662
      "sstatus": dev_sstatus,
10663
      "children": dev_children,
10664
      "mode": dev.mode,
10665
      "size": dev.size,
10666
      }
10667

    
10668
  def Exec(self, feedback_fn):
10669
    """Gather and return data"""
10670
    result = {}
10671

    
10672
    cluster = self.cfg.GetClusterInfo()
10673

    
10674
    node_names = itertools.chain(*(i.all_nodes for i in self.wanted_instances))
10675
    nodes = dict(self.cfg.GetMultiNodeInfo(node_names))
10676

    
10677
    groups = dict(self.cfg.GetMultiNodeGroupInfo(node.group
10678
                                                 for node in nodes.values()))
10679

    
10680
    group2name_fn = lambda uuid: groups[uuid].name
10681

    
10682
    for instance in self.wanted_instances:
10683
      pnode = nodes[instance.primary_node]
10684

    
10685
      if self.op.static or pnode.offline:
10686
        remote_state = None
10687
        if pnode.offline:
10688
          self.LogWarning("Primary node %s is marked offline, returning static"
10689
                          " information only for instance %s" %
10690
                          (pnode.name, instance.name))
10691
      else:
10692
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10693
                                                  instance.name,
10694
                                                  instance.hypervisor)
10695
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10696
        remote_info = remote_info.payload
10697
        if remote_info and "state" in remote_info:
10698
          remote_state = "up"
10699
        else:
10700
          remote_state = "down"
10701

    
10702
      if instance.admin_up:
10703
        config_state = "up"
10704
      else:
10705
        config_state = "down"
10706

    
10707
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10708
                  instance.disks)
10709

    
10710
      snodes_group_uuids = [nodes[snode_name].group
10711
                            for snode_name in instance.secondary_nodes]
10712

    
10713
      result[instance.name] = {
10714
        "name": instance.name,
10715
        "config_state": config_state,
10716
        "run_state": remote_state,
10717
        "pnode": instance.primary_node,
10718
        "pnode_group_uuid": pnode.group,
10719
        "pnode_group_name": group2name_fn(pnode.group),
10720
        "snodes": instance.secondary_nodes,
10721
        "snodes_group_uuids": snodes_group_uuids,
10722
        "snodes_group_names": map(group2name_fn, snodes_group_uuids),
10723
        "os": instance.os,
10724
        # this happens to be the same format used for hooks
10725
        "nics": _NICListToTuple(self, instance.nics),
10726
        "disk_template": instance.disk_template,
10727
        "disks": disks,
10728
        "hypervisor": instance.hypervisor,
10729
        "network_port": instance.network_port,
10730
        "hv_instance": instance.hvparams,
10731
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10732
        "be_instance": instance.beparams,
10733
        "be_actual": cluster.FillBE(instance),
10734
        "os_instance": instance.osparams,
10735
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10736
        "serial_no": instance.serial_no,
10737
        "mtime": instance.mtime,
10738
        "ctime": instance.ctime,
10739
        "uuid": instance.uuid,
10740
        }
10741

    
10742
    return result
10743

    
10744

    
10745
class LUInstanceSetParams(LogicalUnit):
10746
  """Modifies an instances's parameters.
10747

10748
  """
10749
  HPATH = "instance-modify"
10750
  HTYPE = constants.HTYPE_INSTANCE
10751
  REQ_BGL = False
10752

    
10753
  def CheckArguments(self):
10754
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10755
            self.op.hvparams or self.op.beparams or self.op.os_name):
10756
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10757

    
10758
    if self.op.hvparams:
10759
      _CheckGlobalHvParams(self.op.hvparams)
10760

    
10761
    # Disk validation
10762
    disk_addremove = 0
10763
    for disk_op, disk_dict in self.op.disks:
10764
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10765
      if disk_op == constants.DDM_REMOVE:
10766
        disk_addremove += 1
10767
        continue
10768
      elif disk_op == constants.DDM_ADD:
10769
        disk_addremove += 1
10770
      else:
10771
        if not isinstance(disk_op, int):
10772
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10773
        if not isinstance(disk_dict, dict):
10774
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10775
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10776

    
10777
      if disk_op == constants.DDM_ADD:
10778
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10779
        if mode not in constants.DISK_ACCESS_SET:
10780
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10781
                                     errors.ECODE_INVAL)
10782
        size = disk_dict.get(constants.IDISK_SIZE, None)
10783
        if size is None:
10784
          raise errors.OpPrereqError("Required disk parameter size missing",
10785
                                     errors.ECODE_INVAL)
10786
        try:
10787
          size = int(size)
10788
        except (TypeError, ValueError), err:
10789
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10790
                                     str(err), errors.ECODE_INVAL)
10791
        disk_dict[constants.IDISK_SIZE] = size
10792
      else:
10793
        # modification of disk
10794
        if constants.IDISK_SIZE in disk_dict:
10795
          raise errors.OpPrereqError("Disk size change not possible, use"
10796
                                     " grow-disk", errors.ECODE_INVAL)
10797

    
10798
    if disk_addremove > 1:
10799
      raise errors.OpPrereqError("Only one disk add or remove operation"
10800
                                 " supported at a time", errors.ECODE_INVAL)
10801

    
10802
    if self.op.disks and self.op.disk_template is not None:
10803
      raise errors.OpPrereqError("Disk template conversion and other disk"
10804
                                 " changes not supported at the same time",
10805
                                 errors.ECODE_INVAL)
10806

    
10807
    if (self.op.disk_template and
10808
        self.op.disk_template in constants.DTS_INT_MIRROR and
10809
        self.op.remote_node is None):
10810
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10811
                                 " one requires specifying a secondary node",
10812
                                 errors.ECODE_INVAL)
10813

    
10814
    # NIC validation
10815
    nic_addremove = 0
10816
    for nic_op, nic_dict in self.op.nics:
10817
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10818
      if nic_op == constants.DDM_REMOVE:
10819
        nic_addremove += 1
10820
        continue
10821
      elif nic_op == constants.DDM_ADD:
10822
        nic_addremove += 1
10823
      else:
10824
        if not isinstance(nic_op, int):
10825
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10826
        if not isinstance(nic_dict, dict):
10827
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10828
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10829

    
10830
      # nic_dict should be a dict
10831
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10832
      if nic_ip is not None:
10833
        if nic_ip.lower() == constants.VALUE_NONE:
10834
          nic_dict[constants.INIC_IP] = None
10835
        else:
10836
          if not netutils.IPAddress.IsValid(nic_ip):
10837
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10838
                                       errors.ECODE_INVAL)
10839

    
10840
      nic_bridge = nic_dict.get("bridge", None)
10841
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10842
      if nic_bridge and nic_link:
10843
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10844
                                   " at the same time", errors.ECODE_INVAL)
10845
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10846
        nic_dict["bridge"] = None
10847
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10848
        nic_dict[constants.INIC_LINK] = None
10849

    
10850
      if nic_op == constants.DDM_ADD:
10851
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10852
        if nic_mac is None:
10853
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10854

    
10855
      if constants.INIC_MAC in nic_dict:
10856
        nic_mac = nic_dict[constants.INIC_MAC]
10857
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10858
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10859

    
10860
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10861
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10862
                                     " modifying an existing nic",
10863
                                     errors.ECODE_INVAL)
10864

    
10865
    if nic_addremove > 1:
10866
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10867
                                 " supported at a time", errors.ECODE_INVAL)
10868

    
10869
  def ExpandNames(self):
10870
    self._ExpandAndLockInstance()
10871
    self.needed_locks[locking.LEVEL_NODE] = []
10872
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10873

    
10874
  def DeclareLocks(self, level):
10875
    if level == locking.LEVEL_NODE:
10876
      self._LockInstancesNodes()
10877
      if self.op.disk_template and self.op.remote_node:
10878
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10879
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10880

    
10881
  def BuildHooksEnv(self):
10882
    """Build hooks env.
10883

10884
    This runs on the master, primary and secondaries.
10885

10886
    """
10887
    args = dict()
10888
    if constants.BE_MEMORY in self.be_new:
10889
      args["memory"] = self.be_new[constants.BE_MEMORY]
10890
    if constants.BE_VCPUS in self.be_new:
10891
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10892
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10893
    # information at all.
10894
    if self.op.nics:
10895
      args["nics"] = []
10896
      nic_override = dict(self.op.nics)
10897
      for idx, nic in enumerate(self.instance.nics):
10898
        if idx in nic_override:
10899
          this_nic_override = nic_override[idx]
10900
        else:
10901
          this_nic_override = {}
10902
        if constants.INIC_IP in this_nic_override:
10903
          ip = this_nic_override[constants.INIC_IP]
10904
        else:
10905
          ip = nic.ip
10906
        if constants.INIC_MAC in this_nic_override:
10907
          mac = this_nic_override[constants.INIC_MAC]
10908
        else:
10909
          mac = nic.mac
10910
        if idx in self.nic_pnew:
10911
          nicparams = self.nic_pnew[idx]
10912
        else:
10913
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10914
        mode = nicparams[constants.NIC_MODE]
10915
        link = nicparams[constants.NIC_LINK]
10916
        args["nics"].append((ip, mac, mode, link))
10917
      if constants.DDM_ADD in nic_override:
10918
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10919
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10920
        nicparams = self.nic_pnew[constants.DDM_ADD]
10921
        mode = nicparams[constants.NIC_MODE]
10922
        link = nicparams[constants.NIC_LINK]
10923
        args["nics"].append((ip, mac, mode, link))
10924
      elif constants.DDM_REMOVE in nic_override:
10925
        del args["nics"][-1]
10926

    
10927
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10928
    if self.op.disk_template:
10929
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10930

    
10931
    return env
10932

    
10933
  def BuildHooksNodes(self):
10934
    """Build hooks nodes.
10935

10936
    """
10937
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10938
    return (nl, nl)
10939

    
10940
  def CheckPrereq(self):
10941
    """Check prerequisites.
10942

10943
    This only checks the instance list against the existing names.
10944

10945
    """
10946
    # checking the new params on the primary/secondary nodes
10947

    
10948
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10949
    cluster = self.cluster = self.cfg.GetClusterInfo()
10950
    assert self.instance is not None, \
10951
      "Cannot retrieve locked instance %s" % self.op.instance_name
10952
    pnode = instance.primary_node
10953
    nodelist = list(instance.all_nodes)
10954

    
10955
    # OS change
10956
    if self.op.os_name and not self.op.force:
10957
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10958
                      self.op.force_variant)
10959
      instance_os = self.op.os_name
10960
    else:
10961
      instance_os = instance.os
10962

    
10963
    if self.op.disk_template:
10964
      if instance.disk_template == self.op.disk_template:
10965
        raise errors.OpPrereqError("Instance already has disk template %s" %
10966
                                   instance.disk_template, errors.ECODE_INVAL)
10967

    
10968
      if (instance.disk_template,
10969
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10970
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10971
                                   " %s to %s" % (instance.disk_template,
10972
                                                  self.op.disk_template),
10973
                                   errors.ECODE_INVAL)
10974
      _CheckInstanceDown(self, instance, "cannot change disk template")
10975
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10976
        if self.op.remote_node == pnode:
10977
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10978
                                     " as the primary node of the instance" %
10979
                                     self.op.remote_node, errors.ECODE_STATE)
10980
        _CheckNodeOnline(self, self.op.remote_node)
10981
        _CheckNodeNotDrained(self, self.op.remote_node)
10982
        # FIXME: here we assume that the old instance type is DT_PLAIN
10983
        assert instance.disk_template == constants.DT_PLAIN
10984
        disks = [{constants.IDISK_SIZE: d.size,
10985
                  constants.IDISK_VG: d.logical_id[0]}
10986
                 for d in instance.disks]
10987
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10988
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10989

    
10990
    # hvparams processing
10991
    if self.op.hvparams:
10992
      hv_type = instance.hypervisor
10993
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10994
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10995
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10996

    
10997
      # local check
10998
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10999
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11000
      self.hv_new = hv_new # the new actual values
11001
      self.hv_inst = i_hvdict # the new dict (without defaults)
11002
    else:
11003
      self.hv_new = self.hv_inst = {}
11004

    
11005
    # beparams processing
11006
    if self.op.beparams:
11007
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11008
                                   use_none=True)
11009
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11010
      be_new = cluster.SimpleFillBE(i_bedict)
11011
      self.be_new = be_new # the new actual values
11012
      self.be_inst = i_bedict # the new dict (without defaults)
11013
    else:
11014
      self.be_new = self.be_inst = {}
11015
    be_old = cluster.FillBE(instance)
11016

    
11017
    # osparams processing
11018
    if self.op.osparams:
11019
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11020
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11021
      self.os_inst = i_osdict # the new dict (without defaults)
11022
    else:
11023
      self.os_inst = {}
11024

    
11025
    self.warn = []
11026

    
11027
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11028
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11029
      mem_check_list = [pnode]
11030
      if be_new[constants.BE_AUTO_BALANCE]:
11031
        # either we changed auto_balance to yes or it was from before
11032
        mem_check_list.extend(instance.secondary_nodes)
11033
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11034
                                                  instance.hypervisor)
11035
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11036
                                         instance.hypervisor)
11037
      pninfo = nodeinfo[pnode]
11038
      msg = pninfo.fail_msg
11039
      if msg:
11040
        # Assume the primary node is unreachable and go ahead
11041
        self.warn.append("Can't get info from primary node %s: %s" %
11042
                         (pnode, msg))
11043
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
11044
        self.warn.append("Node data from primary node %s doesn't contain"
11045
                         " free memory information" % pnode)
11046
      elif instance_info.fail_msg:
11047
        self.warn.append("Can't get instance runtime information: %s" %
11048
                        instance_info.fail_msg)
11049
      else:
11050
        if instance_info.payload:
11051
          current_mem = int(instance_info.payload["memory"])
11052
        else:
11053
          # Assume instance not running
11054
          # (there is a slight race condition here, but it's not very probable,
11055
          # and we have no other way to check)
11056
          current_mem = 0
11057
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11058
                    pninfo.payload["memory_free"])
11059
        if miss_mem > 0:
11060
          raise errors.OpPrereqError("This change will prevent the instance"
11061
                                     " from starting, due to %d MB of memory"
11062
                                     " missing on its primary node" % miss_mem,
11063
                                     errors.ECODE_NORES)
11064

    
11065
      if be_new[constants.BE_AUTO_BALANCE]:
11066
        for node, nres in nodeinfo.items():
11067
          if node not in instance.secondary_nodes:
11068
            continue
11069
          nres.Raise("Can't get info from secondary node %s" % node,
11070
                     prereq=True, ecode=errors.ECODE_STATE)
11071
          if not isinstance(nres.payload.get("memory_free", None), int):
11072
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11073
                                       " memory information" % node,
11074
                                       errors.ECODE_STATE)
11075
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11076
            raise errors.OpPrereqError("This change will prevent the instance"
11077
                                       " from failover to its secondary node"
11078
                                       " %s, due to not enough memory" % node,
11079
                                       errors.ECODE_STATE)
11080

    
11081
    # NIC processing
11082
    self.nic_pnew = {}
11083
    self.nic_pinst = {}
11084
    for nic_op, nic_dict in self.op.nics:
11085
      if nic_op == constants.DDM_REMOVE:
11086
        if not instance.nics:
11087
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11088
                                     errors.ECODE_INVAL)
11089
        continue
11090
      if nic_op != constants.DDM_ADD:
11091
        # an existing nic
11092
        if not instance.nics:
11093
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11094
                                     " no NICs" % nic_op,
11095
                                     errors.ECODE_INVAL)
11096
        if nic_op < 0 or nic_op >= len(instance.nics):
11097
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11098
                                     " are 0 to %d" %
11099
                                     (nic_op, len(instance.nics) - 1),
11100
                                     errors.ECODE_INVAL)
11101
        old_nic_params = instance.nics[nic_op].nicparams
11102
        old_nic_ip = instance.nics[nic_op].ip
11103
      else:
11104
        old_nic_params = {}
11105
        old_nic_ip = None
11106

    
11107
      update_params_dict = dict([(key, nic_dict[key])
11108
                                 for key in constants.NICS_PARAMETERS
11109
                                 if key in nic_dict])
11110

    
11111
      if "bridge" in nic_dict:
11112
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11113

    
11114
      new_nic_params = _GetUpdatedParams(old_nic_params,
11115
                                         update_params_dict)
11116
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11117
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11118
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11119
      self.nic_pinst[nic_op] = new_nic_params
11120
      self.nic_pnew[nic_op] = new_filled_nic_params
11121
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11122

    
11123
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11124
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11125
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11126
        if msg:
11127
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11128
          if self.op.force:
11129
            self.warn.append(msg)
11130
          else:
11131
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11132
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11133
        if constants.INIC_IP in nic_dict:
11134
          nic_ip = nic_dict[constants.INIC_IP]
11135
        else:
11136
          nic_ip = old_nic_ip
11137
        if nic_ip is None:
11138
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11139
                                     " on a routed nic", errors.ECODE_INVAL)
11140
      if constants.INIC_MAC in nic_dict:
11141
        nic_mac = nic_dict[constants.INIC_MAC]
11142
        if nic_mac is None:
11143
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11144
                                     errors.ECODE_INVAL)
11145
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11146
          # otherwise generate the mac
11147
          nic_dict[constants.INIC_MAC] = \
11148
            self.cfg.GenerateMAC(self.proc.GetECId())
11149
        else:
11150
          # or validate/reserve the current one
11151
          try:
11152
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11153
          except errors.ReservationError:
11154
            raise errors.OpPrereqError("MAC address %s already in use"
11155
                                       " in cluster" % nic_mac,
11156
                                       errors.ECODE_NOTUNIQUE)
11157

    
11158
    # DISK processing
11159
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11160
      raise errors.OpPrereqError("Disk operations not supported for"
11161
                                 " diskless instances",
11162
                                 errors.ECODE_INVAL)
11163
    for disk_op, _ in self.op.disks:
11164
      if disk_op == constants.DDM_REMOVE:
11165
        if len(instance.disks) == 1:
11166
          raise errors.OpPrereqError("Cannot remove the last disk of"
11167
                                     " an instance", errors.ECODE_INVAL)
11168
        _CheckInstanceDown(self, instance, "cannot remove disks")
11169

    
11170
      if (disk_op == constants.DDM_ADD and
11171
          len(instance.disks) >= constants.MAX_DISKS):
11172
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11173
                                   " add more" % constants.MAX_DISKS,
11174
                                   errors.ECODE_STATE)
11175
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11176
        # an existing disk
11177
        if disk_op < 0 or disk_op >= len(instance.disks):
11178
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11179
                                     " are 0 to %d" %
11180
                                     (disk_op, len(instance.disks)),
11181
                                     errors.ECODE_INVAL)
11182

    
11183
    return
11184

    
11185
  def _ConvertPlainToDrbd(self, feedback_fn):
11186
    """Converts an instance from plain to drbd.
11187

11188
    """
11189
    feedback_fn("Converting template to drbd")
11190
    instance = self.instance
11191
    pnode = instance.primary_node
11192
    snode = self.op.remote_node
11193

    
11194
    # create a fake disk info for _GenerateDiskTemplate
11195
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11196
                  constants.IDISK_VG: d.logical_id[0]}
11197
                 for d in instance.disks]
11198
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11199
                                      instance.name, pnode, [snode],
11200
                                      disk_info, None, None, 0, feedback_fn)
11201
    info = _GetInstanceInfoText(instance)
11202
    feedback_fn("Creating aditional volumes...")
11203
    # first, create the missing data and meta devices
11204
    for disk in new_disks:
11205
      # unfortunately this is... not too nice
11206
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11207
                            info, True)
11208
      for child in disk.children:
11209
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11210
    # at this stage, all new LVs have been created, we can rename the
11211
    # old ones
11212
    feedback_fn("Renaming original volumes...")
11213
    rename_list = [(o, n.children[0].logical_id)
11214
                   for (o, n) in zip(instance.disks, new_disks)]
11215
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11216
    result.Raise("Failed to rename original LVs")
11217

    
11218
    feedback_fn("Initializing DRBD devices...")
11219
    # all child devices are in place, we can now create the DRBD devices
11220
    for disk in new_disks:
11221
      for node in [pnode, snode]:
11222
        f_create = node == pnode
11223
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11224

    
11225
    # at this point, the instance has been modified
11226
    instance.disk_template = constants.DT_DRBD8
11227
    instance.disks = new_disks
11228
    self.cfg.Update(instance, feedback_fn)
11229

    
11230
    # disks are created, waiting for sync
11231
    disk_abort = not _WaitForSync(self, instance,
11232
                                  oneshot=not self.op.wait_for_sync)
11233
    if disk_abort:
11234
      raise errors.OpExecError("There are some degraded disks for"
11235
                               " this instance, please cleanup manually")
11236

    
11237
  def _ConvertDrbdToPlain(self, feedback_fn):
11238
    """Converts an instance from drbd to plain.
11239

11240
    """
11241
    instance = self.instance
11242
    assert len(instance.secondary_nodes) == 1
11243
    pnode = instance.primary_node
11244
    snode = instance.secondary_nodes[0]
11245
    feedback_fn("Converting template to plain")
11246

    
11247
    old_disks = instance.disks
11248
    new_disks = [d.children[0] for d in old_disks]
11249

    
11250
    # copy over size and mode
11251
    for parent, child in zip(old_disks, new_disks):
11252
      child.size = parent.size
11253
      child.mode = parent.mode
11254

    
11255
    # update instance structure
11256
    instance.disks = new_disks
11257
    instance.disk_template = constants.DT_PLAIN
11258
    self.cfg.Update(instance, feedback_fn)
11259

    
11260
    feedback_fn("Removing volumes on the secondary node...")
11261
    for disk in old_disks:
11262
      self.cfg.SetDiskID(disk, snode)
11263
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11264
      if msg:
11265
        self.LogWarning("Could not remove block device %s on node %s,"
11266
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11267

    
11268
    feedback_fn("Removing unneeded volumes on the primary node...")
11269
    for idx, disk in enumerate(old_disks):
11270
      meta = disk.children[1]
11271
      self.cfg.SetDiskID(meta, pnode)
11272
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11273
      if msg:
11274
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11275
                        " continuing anyway: %s", idx, pnode, msg)
11276

    
11277
    # this is a DRBD disk, return its port to the pool
11278
    for disk in old_disks:
11279
      tcp_port = disk.logical_id[2]
11280
      self.cfg.AddTcpUdpPort(tcp_port)
11281

    
11282
  def Exec(self, feedback_fn):
11283
    """Modifies an instance.
11284

11285
    All parameters take effect only at the next restart of the instance.
11286

11287
    """
11288
    # Process here the warnings from CheckPrereq, as we don't have a
11289
    # feedback_fn there.
11290
    for warn in self.warn:
11291
      feedback_fn("WARNING: %s" % warn)
11292

    
11293
    result = []
11294
    instance = self.instance
11295
    # disk changes
11296
    for disk_op, disk_dict in self.op.disks:
11297
      if disk_op == constants.DDM_REMOVE:
11298
        # remove the last disk
11299
        device = instance.disks.pop()
11300
        device_idx = len(instance.disks)
11301
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11302
          self.cfg.SetDiskID(disk, node)
11303
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11304
          if msg:
11305
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11306
                            " continuing anyway", device_idx, node, msg)
11307
        result.append(("disk/%d" % device_idx, "remove"))
11308

    
11309
        # if this is a DRBD disk, return its port to the pool
11310
        if device.dev_type in constants.LDS_DRBD:
11311
          tcp_port = device.logical_id[2]
11312
          self.cfg.AddTcpUdpPort(tcp_port)
11313
      elif disk_op == constants.DDM_ADD:
11314
        # add a new disk
11315
        if instance.disk_template in (constants.DT_FILE,
11316
                                        constants.DT_SHARED_FILE):
11317
          file_driver, file_path = instance.disks[0].logical_id
11318
          file_path = os.path.dirname(file_path)
11319
        else:
11320
          file_driver = file_path = None
11321
        disk_idx_base = len(instance.disks)
11322
        new_disk = _GenerateDiskTemplate(self,
11323
                                         instance.disk_template,
11324
                                         instance.name, instance.primary_node,
11325
                                         instance.secondary_nodes,
11326
                                         [disk_dict],
11327
                                         file_path,
11328
                                         file_driver,
11329
                                         disk_idx_base, feedback_fn)[0]
11330
        instance.disks.append(new_disk)
11331
        info = _GetInstanceInfoText(instance)
11332

    
11333
        logging.info("Creating volume %s for instance %s",
11334
                     new_disk.iv_name, instance.name)
11335
        # Note: this needs to be kept in sync with _CreateDisks
11336
        #HARDCODE
11337
        for node in instance.all_nodes:
11338
          f_create = node == instance.primary_node
11339
          try:
11340
            _CreateBlockDev(self, node, instance, new_disk,
11341
                            f_create, info, f_create)
11342
          except errors.OpExecError, err:
11343
            self.LogWarning("Failed to create volume %s (%s) on"
11344
                            " node %s: %s",
11345
                            new_disk.iv_name, new_disk, node, err)
11346
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11347
                       (new_disk.size, new_disk.mode)))
11348
      else:
11349
        # change a given disk
11350
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11351
        result.append(("disk.mode/%d" % disk_op,
11352
                       disk_dict[constants.IDISK_MODE]))
11353

    
11354
    if self.op.disk_template:
11355
      r_shut = _ShutdownInstanceDisks(self, instance)
11356
      if not r_shut:
11357
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11358
                                 " proceed with disk template conversion")
11359
      mode = (instance.disk_template, self.op.disk_template)
11360
      try:
11361
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11362
      except:
11363
        self.cfg.ReleaseDRBDMinors(instance.name)
11364
        raise
11365
      result.append(("disk_template", self.op.disk_template))
11366

    
11367
    # NIC changes
11368
    for nic_op, nic_dict in self.op.nics:
11369
      if nic_op == constants.DDM_REMOVE:
11370
        # remove the last nic
11371
        del instance.nics[-1]
11372
        result.append(("nic.%d" % len(instance.nics), "remove"))
11373
      elif nic_op == constants.DDM_ADD:
11374
        # mac and bridge should be set, by now
11375
        mac = nic_dict[constants.INIC_MAC]
11376
        ip = nic_dict.get(constants.INIC_IP, None)
11377
        nicparams = self.nic_pinst[constants.DDM_ADD]
11378
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11379
        instance.nics.append(new_nic)
11380
        result.append(("nic.%d" % (len(instance.nics) - 1),
11381
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11382
                       (new_nic.mac, new_nic.ip,
11383
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11384
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11385
                       )))
11386
      else:
11387
        for key in (constants.INIC_MAC, constants.INIC_IP):
11388
          if key in nic_dict:
11389
            setattr(instance.nics[nic_op], key, nic_dict[key])
11390
        if nic_op in self.nic_pinst:
11391
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11392
        for key, val in nic_dict.iteritems():
11393
          result.append(("nic.%s/%d" % (key, nic_op), val))
11394

    
11395
    # hvparams changes
11396
    if self.op.hvparams:
11397
      instance.hvparams = self.hv_inst
11398
      for key, val in self.op.hvparams.iteritems():
11399
        result.append(("hv/%s" % key, val))
11400

    
11401
    # beparams changes
11402
    if self.op.beparams:
11403
      instance.beparams = self.be_inst
11404
      for key, val in self.op.beparams.iteritems():
11405
        result.append(("be/%s" % key, val))
11406

    
11407
    # OS change
11408
    if self.op.os_name:
11409
      instance.os = self.op.os_name
11410

    
11411
    # osparams changes
11412
    if self.op.osparams:
11413
      instance.osparams = self.os_inst
11414
      for key, val in self.op.osparams.iteritems():
11415
        result.append(("os/%s" % key, val))
11416

    
11417
    self.cfg.Update(instance, feedback_fn)
11418

    
11419
    return result
11420

    
11421
  _DISK_CONVERSIONS = {
11422
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11423
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11424
    }
11425

    
11426

    
11427
class LUInstanceChangeGroup(LogicalUnit):
11428
  HPATH = "instance-change-group"
11429
  HTYPE = constants.HTYPE_INSTANCE
11430
  REQ_BGL = False
11431

    
11432
  def ExpandNames(self):
11433
    self.share_locks = _ShareAll()
11434
    self.needed_locks = {
11435
      locking.LEVEL_NODEGROUP: [],
11436
      locking.LEVEL_NODE: [],
11437
      }
11438

    
11439
    self._ExpandAndLockInstance()
11440

    
11441
    if self.op.target_groups:
11442
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11443
                                  self.op.target_groups)
11444
    else:
11445
      self.req_target_uuids = None
11446

    
11447
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11448

    
11449
  def DeclareLocks(self, level):
11450
    if level == locking.LEVEL_NODEGROUP:
11451
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11452

    
11453
      if self.req_target_uuids:
11454
        lock_groups = set(self.req_target_uuids)
11455

    
11456
        # Lock all groups used by instance optimistically; this requires going
11457
        # via the node before it's locked, requiring verification later on
11458
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11459
        lock_groups.update(instance_groups)
11460
      else:
11461
        # No target groups, need to lock all of them
11462
        lock_groups = locking.ALL_SET
11463

    
11464
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11465

    
11466
    elif level == locking.LEVEL_NODE:
11467
      if self.req_target_uuids:
11468
        # Lock all nodes used by instances
11469
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11470
        self._LockInstancesNodes()
11471

    
11472
        # Lock all nodes in all potential target groups
11473
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11474
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11475
        member_nodes = [node_name
11476
                        for group in lock_groups
11477
                        for node_name in self.cfg.GetNodeGroup(group).members]
11478
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11479
      else:
11480
        # Lock all nodes as all groups are potential targets
11481
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11482

    
11483
  def CheckPrereq(self):
11484
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11485
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11486
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11487

    
11488
    assert (self.req_target_uuids is None or
11489
            owned_groups.issuperset(self.req_target_uuids))
11490
    assert owned_instances == set([self.op.instance_name])
11491

    
11492
    # Get instance information
11493
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11494

    
11495
    # Check if node groups for locked instance are still correct
11496
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11497
      ("Instance %s's nodes changed while we kept the lock" %
11498
       self.op.instance_name)
11499

    
11500
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11501
                                           owned_groups)
11502

    
11503
    if self.req_target_uuids:
11504
      # User requested specific target groups
11505
      self.target_uuids = frozenset(self.req_target_uuids)
11506
    else:
11507
      # All groups except those used by the instance are potential targets
11508
      self.target_uuids = owned_groups - inst_groups
11509

    
11510
    conflicting_groups = self.target_uuids & inst_groups
11511
    if conflicting_groups:
11512
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11513
                                 " used by the instance '%s'" %
11514
                                 (utils.CommaJoin(conflicting_groups),
11515
                                  self.op.instance_name),
11516
                                 errors.ECODE_INVAL)
11517

    
11518
    if not self.target_uuids:
11519
      raise errors.OpPrereqError("There are no possible target groups",
11520
                                 errors.ECODE_INVAL)
11521

    
11522
  def BuildHooksEnv(self):
11523
    """Build hooks env.
11524

11525
    """
11526
    assert self.target_uuids
11527

    
11528
    env = {
11529
      "TARGET_GROUPS": " ".join(self.target_uuids),
11530
      }
11531

    
11532
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11533

    
11534
    return env
11535

    
11536
  def BuildHooksNodes(self):
11537
    """Build hooks nodes.
11538

11539
    """
11540
    mn = self.cfg.GetMasterNode()
11541
    return ([mn], [mn])
11542

    
11543
  def Exec(self, feedback_fn):
11544
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11545

    
11546
    assert instances == [self.op.instance_name], "Instance not locked"
11547

    
11548
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11549
                     instances=instances, target_groups=list(self.target_uuids))
11550

    
11551
    ial.Run(self.op.iallocator)
11552

    
11553
    if not ial.success:
11554
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11555
                                 " instance '%s' using iallocator '%s': %s" %
11556
                                 (self.op.instance_name, self.op.iallocator,
11557
                                  ial.info),
11558
                                 errors.ECODE_NORES)
11559

    
11560
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11561

    
11562
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11563
                 " instance '%s'", len(jobs), self.op.instance_name)
11564

    
11565
    return ResultWithJobs(jobs)
11566

    
11567

    
11568
class LUBackupQuery(NoHooksLU):
11569
  """Query the exports list
11570

11571
  """
11572
  REQ_BGL = False
11573

    
11574
  def ExpandNames(self):
11575
    self.needed_locks = {}
11576
    self.share_locks[locking.LEVEL_NODE] = 1
11577
    if not self.op.nodes:
11578
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11579
    else:
11580
      self.needed_locks[locking.LEVEL_NODE] = \
11581
        _GetWantedNodes(self, self.op.nodes)
11582

    
11583
  def Exec(self, feedback_fn):
11584
    """Compute the list of all the exported system images.
11585

11586
    @rtype: dict
11587
    @return: a dictionary with the structure node->(export-list)
11588
        where export-list is a list of the instances exported on
11589
        that node.
11590

11591
    """
11592
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11593
    rpcresult = self.rpc.call_export_list(self.nodes)
11594
    result = {}
11595
    for node in rpcresult:
11596
      if rpcresult[node].fail_msg:
11597
        result[node] = False
11598
      else:
11599
        result[node] = rpcresult[node].payload
11600

    
11601
    return result
11602

    
11603

    
11604
class LUBackupPrepare(NoHooksLU):
11605
  """Prepares an instance for an export and returns useful information.
11606

11607
  """
11608
  REQ_BGL = False
11609

    
11610
  def ExpandNames(self):
11611
    self._ExpandAndLockInstance()
11612

    
11613
  def CheckPrereq(self):
11614
    """Check prerequisites.
11615

11616
    """
11617
    instance_name = self.op.instance_name
11618

    
11619
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11620
    assert self.instance is not None, \
11621
          "Cannot retrieve locked instance %s" % self.op.instance_name
11622
    _CheckNodeOnline(self, self.instance.primary_node)
11623

    
11624
    self._cds = _GetClusterDomainSecret()
11625

    
11626
  def Exec(self, feedback_fn):
11627
    """Prepares an instance for an export.
11628

11629
    """
11630
    instance = self.instance
11631

    
11632
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11633
      salt = utils.GenerateSecret(8)
11634

    
11635
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11636
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11637
                                              constants.RIE_CERT_VALIDITY)
11638
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11639

    
11640
      (name, cert_pem) = result.payload
11641

    
11642
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11643
                                             cert_pem)
11644

    
11645
      return {
11646
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11647
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11648
                          salt),
11649
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11650
        }
11651

    
11652
    return None
11653

    
11654

    
11655
class LUBackupExport(LogicalUnit):
11656
  """Export an instance to an image in the cluster.
11657

11658
  """
11659
  HPATH = "instance-export"
11660
  HTYPE = constants.HTYPE_INSTANCE
11661
  REQ_BGL = False
11662

    
11663
  def CheckArguments(self):
11664
    """Check the arguments.
11665

11666
    """
11667
    self.x509_key_name = self.op.x509_key_name
11668
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11669

    
11670
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11671
      if not self.x509_key_name:
11672
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11673
                                   errors.ECODE_INVAL)
11674

    
11675
      if not self.dest_x509_ca_pem:
11676
        raise errors.OpPrereqError("Missing destination X509 CA",
11677
                                   errors.ECODE_INVAL)
11678

    
11679
  def ExpandNames(self):
11680
    self._ExpandAndLockInstance()
11681

    
11682
    # Lock all nodes for local exports
11683
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11684
      # FIXME: lock only instance primary and destination node
11685
      #
11686
      # Sad but true, for now we have do lock all nodes, as we don't know where
11687
      # the previous export might be, and in this LU we search for it and
11688
      # remove it from its current node. In the future we could fix this by:
11689
      #  - making a tasklet to search (share-lock all), then create the
11690
      #    new one, then one to remove, after
11691
      #  - removing the removal operation altogether
11692
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11693

    
11694
  def DeclareLocks(self, level):
11695
    """Last minute lock declaration."""
11696
    # All nodes are locked anyway, so nothing to do here.
11697

    
11698
  def BuildHooksEnv(self):
11699
    """Build hooks env.
11700

11701
    This will run on the master, primary node and target node.
11702

11703
    """
11704
    env = {
11705
      "EXPORT_MODE": self.op.mode,
11706
      "EXPORT_NODE": self.op.target_node,
11707
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11708
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11709
      # TODO: Generic function for boolean env variables
11710
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11711
      }
11712

    
11713
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11714

    
11715
    return env
11716

    
11717
  def BuildHooksNodes(self):
11718
    """Build hooks nodes.
11719

11720
    """
11721
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11722

    
11723
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11724
      nl.append(self.op.target_node)
11725

    
11726
    return (nl, nl)
11727

    
11728
  def CheckPrereq(self):
11729
    """Check prerequisites.
11730

11731
    This checks that the instance and node names are valid.
11732

11733
    """
11734
    instance_name = self.op.instance_name
11735

    
11736
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11737
    assert self.instance is not None, \
11738
          "Cannot retrieve locked instance %s" % self.op.instance_name
11739
    _CheckNodeOnline(self, self.instance.primary_node)
11740

    
11741
    if (self.op.remove_instance and self.instance.admin_up and
11742
        not self.op.shutdown):
11743
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11744
                                 " down before")
11745

    
11746
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11747
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11748
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11749
      assert self.dst_node is not None
11750

    
11751
      _CheckNodeOnline(self, self.dst_node.name)
11752
      _CheckNodeNotDrained(self, self.dst_node.name)
11753

    
11754
      self._cds = None
11755
      self.dest_disk_info = None
11756
      self.dest_x509_ca = None
11757

    
11758
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11759
      self.dst_node = None
11760

    
11761
      if len(self.op.target_node) != len(self.instance.disks):
11762
        raise errors.OpPrereqError(("Received destination information for %s"
11763
                                    " disks, but instance %s has %s disks") %
11764
                                   (len(self.op.target_node), instance_name,
11765
                                    len(self.instance.disks)),
11766
                                   errors.ECODE_INVAL)
11767

    
11768
      cds = _GetClusterDomainSecret()
11769

    
11770
      # Check X509 key name
11771
      try:
11772
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11773
      except (TypeError, ValueError), err:
11774
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11775

    
11776
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11777
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11778
                                   errors.ECODE_INVAL)
11779

    
11780
      # Load and verify CA
11781
      try:
11782
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11783
      except OpenSSL.crypto.Error, err:
11784
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11785
                                   (err, ), errors.ECODE_INVAL)
11786

    
11787
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11788
      if errcode is not None:
11789
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11790
                                   (msg, ), errors.ECODE_INVAL)
11791

    
11792
      self.dest_x509_ca = cert
11793

    
11794
      # Verify target information
11795
      disk_info = []
11796
      for idx, disk_data in enumerate(self.op.target_node):
11797
        try:
11798
          (host, port, magic) = \
11799
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11800
        except errors.GenericError, err:
11801
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11802
                                     (idx, err), errors.ECODE_INVAL)
11803

    
11804
        disk_info.append((host, port, magic))
11805

    
11806
      assert len(disk_info) == len(self.op.target_node)
11807
      self.dest_disk_info = disk_info
11808

    
11809
    else:
11810
      raise errors.ProgrammerError("Unhandled export mode %r" %
11811
                                   self.op.mode)
11812

    
11813
    # instance disk type verification
11814
    # TODO: Implement export support for file-based disks
11815
    for disk in self.instance.disks:
11816
      if disk.dev_type == constants.LD_FILE:
11817
        raise errors.OpPrereqError("Export not supported for instances with"
11818
                                   " file-based disks", errors.ECODE_INVAL)
11819

    
11820
  def _CleanupExports(self, feedback_fn):
11821
    """Removes exports of current instance from all other nodes.
11822

11823
    If an instance in a cluster with nodes A..D was exported to node C, its
11824
    exports will be removed from the nodes A, B and D.
11825

11826
    """
11827
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11828

    
11829
    nodelist = self.cfg.GetNodeList()
11830
    nodelist.remove(self.dst_node.name)
11831

    
11832
    # on one-node clusters nodelist will be empty after the removal
11833
    # if we proceed the backup would be removed because OpBackupQuery
11834
    # substitutes an empty list with the full cluster node list.
11835
    iname = self.instance.name
11836
    if nodelist:
11837
      feedback_fn("Removing old exports for instance %s" % iname)
11838
      exportlist = self.rpc.call_export_list(nodelist)
11839
      for node in exportlist:
11840
        if exportlist[node].fail_msg:
11841
          continue
11842
        if iname in exportlist[node].payload:
11843
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11844
          if msg:
11845
            self.LogWarning("Could not remove older export for instance %s"
11846
                            " on node %s: %s", iname, node, msg)
11847

    
11848
  def Exec(self, feedback_fn):
11849
    """Export an instance to an image in the cluster.
11850

11851
    """
11852
    assert self.op.mode in constants.EXPORT_MODES
11853

    
11854
    instance = self.instance
11855
    src_node = instance.primary_node
11856

    
11857
    if self.op.shutdown:
11858
      # shutdown the instance, but not the disks
11859
      feedback_fn("Shutting down instance %s" % instance.name)
11860
      result = self.rpc.call_instance_shutdown(src_node, instance,
11861
                                               self.op.shutdown_timeout)
11862
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11863
      result.Raise("Could not shutdown instance %s on"
11864
                   " node %s" % (instance.name, src_node))
11865

    
11866
    # set the disks ID correctly since call_instance_start needs the
11867
    # correct drbd minor to create the symlinks
11868
    for disk in instance.disks:
11869
      self.cfg.SetDiskID(disk, src_node)
11870

    
11871
    activate_disks = (not instance.admin_up)
11872

    
11873
    if activate_disks:
11874
      # Activate the instance disks if we'exporting a stopped instance
11875
      feedback_fn("Activating disks for %s" % instance.name)
11876
      _StartInstanceDisks(self, instance, None)
11877

    
11878
    try:
11879
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11880
                                                     instance)
11881

    
11882
      helper.CreateSnapshots()
11883
      try:
11884
        if (self.op.shutdown and instance.admin_up and
11885
            not self.op.remove_instance):
11886
          assert not activate_disks
11887
          feedback_fn("Starting instance %s" % instance.name)
11888
          result = self.rpc.call_instance_start(src_node, instance,
11889
                                                None, None, False)
11890
          msg = result.fail_msg
11891
          if msg:
11892
            feedback_fn("Failed to start instance: %s" % msg)
11893
            _ShutdownInstanceDisks(self, instance)
11894
            raise errors.OpExecError("Could not start instance: %s" % msg)
11895

    
11896
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11897
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11898
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11899
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11900
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11901

    
11902
          (key_name, _, _) = self.x509_key_name
11903

    
11904
          dest_ca_pem = \
11905
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11906
                                            self.dest_x509_ca)
11907

    
11908
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11909
                                                     key_name, dest_ca_pem,
11910
                                                     timeouts)
11911
      finally:
11912
        helper.Cleanup()
11913

    
11914
      # Check for backwards compatibility
11915
      assert len(dresults) == len(instance.disks)
11916
      assert compat.all(isinstance(i, bool) for i in dresults), \
11917
             "Not all results are boolean: %r" % dresults
11918

    
11919
    finally:
11920
      if activate_disks:
11921
        feedback_fn("Deactivating disks for %s" % instance.name)
11922
        _ShutdownInstanceDisks(self, instance)
11923

    
11924
    if not (compat.all(dresults) and fin_resu):
11925
      failures = []
11926
      if not fin_resu:
11927
        failures.append("export finalization")
11928
      if not compat.all(dresults):
11929
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11930
                               if not dsk)
11931
        failures.append("disk export: disk(s) %s" % fdsk)
11932

    
11933
      raise errors.OpExecError("Export failed, errors in %s" %
11934
                               utils.CommaJoin(failures))
11935

    
11936
    # At this point, the export was successful, we can cleanup/finish
11937

    
11938
    # Remove instance if requested
11939
    if self.op.remove_instance:
11940
      feedback_fn("Removing instance %s" % instance.name)
11941
      _RemoveInstance(self, feedback_fn, instance,
11942
                      self.op.ignore_remove_failures)
11943

    
11944
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11945
      self._CleanupExports(feedback_fn)
11946

    
11947
    return fin_resu, dresults
11948

    
11949

    
11950
class LUBackupRemove(NoHooksLU):
11951
  """Remove exports related to the named instance.
11952

11953
  """
11954
  REQ_BGL = False
11955

    
11956
  def ExpandNames(self):
11957
    self.needed_locks = {}
11958
    # We need all nodes to be locked in order for RemoveExport to work, but we
11959
    # don't need to lock the instance itself, as nothing will happen to it (and
11960
    # we can remove exports also for a removed instance)
11961
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11962

    
11963
  def Exec(self, feedback_fn):
11964
    """Remove any export.
11965

11966
    """
11967
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11968
    # If the instance was not found we'll try with the name that was passed in.
11969
    # This will only work if it was an FQDN, though.
11970
    fqdn_warn = False
11971
    if not instance_name:
11972
      fqdn_warn = True
11973
      instance_name = self.op.instance_name
11974

    
11975
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11976
    exportlist = self.rpc.call_export_list(locked_nodes)
11977
    found = False
11978
    for node in exportlist:
11979
      msg = exportlist[node].fail_msg
11980
      if msg:
11981
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11982
        continue
11983
      if instance_name in exportlist[node].payload:
11984
        found = True
11985
        result = self.rpc.call_export_remove(node, instance_name)
11986
        msg = result.fail_msg
11987
        if msg:
11988
          logging.error("Could not remove export for instance %s"
11989
                        " on node %s: %s", instance_name, node, msg)
11990

    
11991
    if fqdn_warn and not found:
11992
      feedback_fn("Export not found. If trying to remove an export belonging"
11993
                  " to a deleted instance please use its Fully Qualified"
11994
                  " Domain Name.")
11995

    
11996

    
11997
class LUGroupAdd(LogicalUnit):
11998
  """Logical unit for creating node groups.
11999

12000
  """
12001
  HPATH = "group-add"
12002
  HTYPE = constants.HTYPE_GROUP
12003
  REQ_BGL = False
12004

    
12005
  def ExpandNames(self):
12006
    # We need the new group's UUID here so that we can create and acquire the
12007
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12008
    # that it should not check whether the UUID exists in the configuration.
12009
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12010
    self.needed_locks = {}
12011
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12012

    
12013
  def CheckPrereq(self):
12014
    """Check prerequisites.
12015

12016
    This checks that the given group name is not an existing node group
12017
    already.
12018

12019
    """
12020
    try:
12021
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12022
    except errors.OpPrereqError:
12023
      pass
12024
    else:
12025
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12026
                                 " node group (UUID: %s)" %
12027
                                 (self.op.group_name, existing_uuid),
12028
                                 errors.ECODE_EXISTS)
12029

    
12030
    if self.op.ndparams:
12031
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12032

    
12033
  def BuildHooksEnv(self):
12034
    """Build hooks env.
12035

12036
    """
12037
    return {
12038
      "GROUP_NAME": self.op.group_name,
12039
      }
12040

    
12041
  def BuildHooksNodes(self):
12042
    """Build hooks nodes.
12043

12044
    """
12045
    mn = self.cfg.GetMasterNode()
12046
    return ([mn], [mn])
12047

    
12048
  def Exec(self, feedback_fn):
12049
    """Add the node group to the cluster.
12050

12051
    """
12052
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12053
                                  uuid=self.group_uuid,
12054
                                  alloc_policy=self.op.alloc_policy,
12055
                                  ndparams=self.op.ndparams)
12056

    
12057
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12058
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12059

    
12060

    
12061
class LUGroupAssignNodes(NoHooksLU):
12062
  """Logical unit for assigning nodes to groups.
12063

12064
  """
12065
  REQ_BGL = False
12066

    
12067
  def ExpandNames(self):
12068
    # These raise errors.OpPrereqError on their own:
12069
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12070
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12071

    
12072
    # We want to lock all the affected nodes and groups. We have readily
12073
    # available the list of nodes, and the *destination* group. To gather the
12074
    # list of "source" groups, we need to fetch node information later on.
12075
    self.needed_locks = {
12076
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12077
      locking.LEVEL_NODE: self.op.nodes,
12078
      }
12079

    
12080
  def DeclareLocks(self, level):
12081
    if level == locking.LEVEL_NODEGROUP:
12082
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12083

    
12084
      # Try to get all affected nodes' groups without having the group or node
12085
      # lock yet. Needs verification later in the code flow.
12086
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12087

    
12088
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12089

    
12090
  def CheckPrereq(self):
12091
    """Check prerequisites.
12092

12093
    """
12094
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12095
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12096
            frozenset(self.op.nodes))
12097

    
12098
    expected_locks = (set([self.group_uuid]) |
12099
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12100
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12101
    if actual_locks != expected_locks:
12102
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12103
                               " current groups are '%s', used to be '%s'" %
12104
                               (utils.CommaJoin(expected_locks),
12105
                                utils.CommaJoin(actual_locks)))
12106

    
12107
    self.node_data = self.cfg.GetAllNodesInfo()
12108
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12109
    instance_data = self.cfg.GetAllInstancesInfo()
12110

    
12111
    if self.group is None:
12112
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12113
                               (self.op.group_name, self.group_uuid))
12114

    
12115
    (new_splits, previous_splits) = \
12116
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12117
                                             for node in self.op.nodes],
12118
                                            self.node_data, instance_data)
12119

    
12120
    if new_splits:
12121
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12122

    
12123
      if not self.op.force:
12124
        raise errors.OpExecError("The following instances get split by this"
12125
                                 " change and --force was not given: %s" %
12126
                                 fmt_new_splits)
12127
      else:
12128
        self.LogWarning("This operation will split the following instances: %s",
12129
                        fmt_new_splits)
12130

    
12131
        if previous_splits:
12132
          self.LogWarning("In addition, these already-split instances continue"
12133
                          " to be split across groups: %s",
12134
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12135

    
12136
  def Exec(self, feedback_fn):
12137
    """Assign nodes to a new group.
12138

12139
    """
12140
    mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12141

    
12142
    self.cfg.AssignGroupNodes(mods)
12143

    
12144
  @staticmethod
12145
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12146
    """Check for split instances after a node assignment.
12147

12148
    This method considers a series of node assignments as an atomic operation,
12149
    and returns information about split instances after applying the set of
12150
    changes.
12151

12152
    In particular, it returns information about newly split instances, and
12153
    instances that were already split, and remain so after the change.
12154

12155
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12156
    considered.
12157

12158
    @type changes: list of (node_name, new_group_uuid) pairs.
12159
    @param changes: list of node assignments to consider.
12160
    @param node_data: a dict with data for all nodes
12161
    @param instance_data: a dict with all instances to consider
12162
    @rtype: a two-tuple
12163
    @return: a list of instances that were previously okay and result split as a
12164
      consequence of this change, and a list of instances that were previously
12165
      split and this change does not fix.
12166

12167
    """
12168
    changed_nodes = dict((node, group) for node, group in changes
12169
                         if node_data[node].group != group)
12170

    
12171
    all_split_instances = set()
12172
    previously_split_instances = set()
12173

    
12174
    def InstanceNodes(instance):
12175
      return [instance.primary_node] + list(instance.secondary_nodes)
12176

    
12177
    for inst in instance_data.values():
12178
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12179
        continue
12180

    
12181
      instance_nodes = InstanceNodes(inst)
12182

    
12183
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12184
        previously_split_instances.add(inst.name)
12185

    
12186
      if len(set(changed_nodes.get(node, node_data[node].group)
12187
                 for node in instance_nodes)) > 1:
12188
        all_split_instances.add(inst.name)
12189

    
12190
    return (list(all_split_instances - previously_split_instances),
12191
            list(previously_split_instances & all_split_instances))
12192

    
12193

    
12194
class _GroupQuery(_QueryBase):
12195
  FIELDS = query.GROUP_FIELDS
12196

    
12197
  def ExpandNames(self, lu):
12198
    lu.needed_locks = {}
12199

    
12200
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12201
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12202

    
12203
    if not self.names:
12204
      self.wanted = [name_to_uuid[name]
12205
                     for name in utils.NiceSort(name_to_uuid.keys())]
12206
    else:
12207
      # Accept names to be either names or UUIDs.
12208
      missing = []
12209
      self.wanted = []
12210
      all_uuid = frozenset(self._all_groups.keys())
12211

    
12212
      for name in self.names:
12213
        if name in all_uuid:
12214
          self.wanted.append(name)
12215
        elif name in name_to_uuid:
12216
          self.wanted.append(name_to_uuid[name])
12217
        else:
12218
          missing.append(name)
12219

    
12220
      if missing:
12221
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12222
                                   utils.CommaJoin(missing),
12223
                                   errors.ECODE_NOENT)
12224

    
12225
  def DeclareLocks(self, lu, level):
12226
    pass
12227

    
12228
  def _GetQueryData(self, lu):
12229
    """Computes the list of node groups and their attributes.
12230

12231
    """
12232
    do_nodes = query.GQ_NODE in self.requested_data
12233
    do_instances = query.GQ_INST in self.requested_data
12234

    
12235
    group_to_nodes = None
12236
    group_to_instances = None
12237

    
12238
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12239
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12240
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12241
    # instance->node. Hence, we will need to process nodes even if we only need
12242
    # instance information.
12243
    if do_nodes or do_instances:
12244
      all_nodes = lu.cfg.GetAllNodesInfo()
12245
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12246
      node_to_group = {}
12247

    
12248
      for node in all_nodes.values():
12249
        if node.group in group_to_nodes:
12250
          group_to_nodes[node.group].append(node.name)
12251
          node_to_group[node.name] = node.group
12252

    
12253
      if do_instances:
12254
        all_instances = lu.cfg.GetAllInstancesInfo()
12255
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12256

    
12257
        for instance in all_instances.values():
12258
          node = instance.primary_node
12259
          if node in node_to_group:
12260
            group_to_instances[node_to_group[node]].append(instance.name)
12261

    
12262
        if not do_nodes:
12263
          # Do not pass on node information if it was not requested.
12264
          group_to_nodes = None
12265

    
12266
    return query.GroupQueryData([self._all_groups[uuid]
12267
                                 for uuid in self.wanted],
12268
                                group_to_nodes, group_to_instances)
12269

    
12270

    
12271
class LUGroupQuery(NoHooksLU):
12272
  """Logical unit for querying node groups.
12273

12274
  """
12275
  REQ_BGL = False
12276

    
12277
  def CheckArguments(self):
12278
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12279
                          self.op.output_fields, False)
12280

    
12281
  def ExpandNames(self):
12282
    self.gq.ExpandNames(self)
12283

    
12284
  def DeclareLocks(self, level):
12285
    self.gq.DeclareLocks(self, level)
12286

    
12287
  def Exec(self, feedback_fn):
12288
    return self.gq.OldStyleQuery(self)
12289

    
12290

    
12291
class LUGroupSetParams(LogicalUnit):
12292
  """Modifies the parameters of a node group.
12293

12294
  """
12295
  HPATH = "group-modify"
12296
  HTYPE = constants.HTYPE_GROUP
12297
  REQ_BGL = False
12298

    
12299
  def CheckArguments(self):
12300
    all_changes = [
12301
      self.op.ndparams,
12302
      self.op.alloc_policy,
12303
      ]
12304

    
12305
    if all_changes.count(None) == len(all_changes):
12306
      raise errors.OpPrereqError("Please pass at least one modification",
12307
                                 errors.ECODE_INVAL)
12308

    
12309
  def ExpandNames(self):
12310
    # This raises errors.OpPrereqError on its own:
12311
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12312

    
12313
    self.needed_locks = {
12314
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12315
      }
12316

    
12317
  def CheckPrereq(self):
12318
    """Check prerequisites.
12319

12320
    """
12321
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12322

    
12323
    if self.group is None:
12324
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12325
                               (self.op.group_name, self.group_uuid))
12326

    
12327
    if self.op.ndparams:
12328
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12329
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12330
      self.new_ndparams = new_ndparams
12331

    
12332
  def BuildHooksEnv(self):
12333
    """Build hooks env.
12334

12335
    """
12336
    return {
12337
      "GROUP_NAME": self.op.group_name,
12338
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12339
      }
12340

    
12341
  def BuildHooksNodes(self):
12342
    """Build hooks nodes.
12343

12344
    """
12345
    mn = self.cfg.GetMasterNode()
12346
    return ([mn], [mn])
12347

    
12348
  def Exec(self, feedback_fn):
12349
    """Modifies the node group.
12350

12351
    """
12352
    result = []
12353

    
12354
    if self.op.ndparams:
12355
      self.group.ndparams = self.new_ndparams
12356
      result.append(("ndparams", str(self.group.ndparams)))
12357

    
12358
    if self.op.alloc_policy:
12359
      self.group.alloc_policy = self.op.alloc_policy
12360

    
12361
    self.cfg.Update(self.group, feedback_fn)
12362
    return result
12363

    
12364

    
12365
class LUGroupRemove(LogicalUnit):
12366
  HPATH = "group-remove"
12367
  HTYPE = constants.HTYPE_GROUP
12368
  REQ_BGL = False
12369

    
12370
  def ExpandNames(self):
12371
    # This will raises errors.OpPrereqError on its own:
12372
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12373
    self.needed_locks = {
12374
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12375
      }
12376

    
12377
  def CheckPrereq(self):
12378
    """Check prerequisites.
12379

12380
    This checks that the given group name exists as a node group, that is
12381
    empty (i.e., contains no nodes), and that is not the last group of the
12382
    cluster.
12383

12384
    """
12385
    # Verify that the group is empty.
12386
    group_nodes = [node.name
12387
                   for node in self.cfg.GetAllNodesInfo().values()
12388
                   if node.group == self.group_uuid]
12389

    
12390
    if group_nodes:
12391
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12392
                                 " nodes: %s" %
12393
                                 (self.op.group_name,
12394
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12395
                                 errors.ECODE_STATE)
12396

    
12397
    # Verify the cluster would not be left group-less.
12398
    if len(self.cfg.GetNodeGroupList()) == 1:
12399
      raise errors.OpPrereqError("Group '%s' is the only group,"
12400
                                 " cannot be removed" %
12401
                                 self.op.group_name,
12402
                                 errors.ECODE_STATE)
12403

    
12404
  def BuildHooksEnv(self):
12405
    """Build hooks env.
12406

12407
    """
12408
    return {
12409
      "GROUP_NAME": self.op.group_name,
12410
      }
12411

    
12412
  def BuildHooksNodes(self):
12413
    """Build hooks nodes.
12414

12415
    """
12416
    mn = self.cfg.GetMasterNode()
12417
    return ([mn], [mn])
12418

    
12419
  def Exec(self, feedback_fn):
12420
    """Remove the node group.
12421

12422
    """
12423
    try:
12424
      self.cfg.RemoveNodeGroup(self.group_uuid)
12425
    except errors.ConfigurationError:
12426
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12427
                               (self.op.group_name, self.group_uuid))
12428

    
12429
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12430

    
12431

    
12432
class LUGroupRename(LogicalUnit):
12433
  HPATH = "group-rename"
12434
  HTYPE = constants.HTYPE_GROUP
12435
  REQ_BGL = False
12436

    
12437
  def ExpandNames(self):
12438
    # This raises errors.OpPrereqError on its own:
12439
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12440

    
12441
    self.needed_locks = {
12442
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12443
      }
12444

    
12445
  def CheckPrereq(self):
12446
    """Check prerequisites.
12447

12448
    Ensures requested new name is not yet used.
12449

12450
    """
12451
    try:
12452
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12453
    except errors.OpPrereqError:
12454
      pass
12455
    else:
12456
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12457
                                 " node group (UUID: %s)" %
12458
                                 (self.op.new_name, new_name_uuid),
12459
                                 errors.ECODE_EXISTS)
12460

    
12461
  def BuildHooksEnv(self):
12462
    """Build hooks env.
12463

12464
    """
12465
    return {
12466
      "OLD_NAME": self.op.group_name,
12467
      "NEW_NAME": self.op.new_name,
12468
      }
12469

    
12470
  def BuildHooksNodes(self):
12471
    """Build hooks nodes.
12472

12473
    """
12474
    mn = self.cfg.GetMasterNode()
12475

    
12476
    all_nodes = self.cfg.GetAllNodesInfo()
12477
    all_nodes.pop(mn, None)
12478

    
12479
    run_nodes = [mn]
12480
    run_nodes.extend(node.name for node in all_nodes.values()
12481
                     if node.group == self.group_uuid)
12482

    
12483
    return (run_nodes, run_nodes)
12484

    
12485
  def Exec(self, feedback_fn):
12486
    """Rename the node group.
12487

12488
    """
12489
    group = self.cfg.GetNodeGroup(self.group_uuid)
12490

    
12491
    if group is None:
12492
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12493
                               (self.op.group_name, self.group_uuid))
12494

    
12495
    group.name = self.op.new_name
12496
    self.cfg.Update(group, feedback_fn)
12497

    
12498
    return self.op.new_name
12499

    
12500

    
12501
class LUGroupEvacuate(LogicalUnit):
12502
  HPATH = "group-evacuate"
12503
  HTYPE = constants.HTYPE_GROUP
12504
  REQ_BGL = False
12505

    
12506
  def ExpandNames(self):
12507
    # This raises errors.OpPrereqError on its own:
12508
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12509

    
12510
    if self.op.target_groups:
12511
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12512
                                  self.op.target_groups)
12513
    else:
12514
      self.req_target_uuids = []
12515

    
12516
    if self.group_uuid in self.req_target_uuids:
12517
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12518
                                 " as a target group (targets are %s)" %
12519
                                 (self.group_uuid,
12520
                                  utils.CommaJoin(self.req_target_uuids)),
12521
                                 errors.ECODE_INVAL)
12522

    
12523
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12524

    
12525
    self.share_locks = _ShareAll()
12526
    self.needed_locks = {
12527
      locking.LEVEL_INSTANCE: [],
12528
      locking.LEVEL_NODEGROUP: [],
12529
      locking.LEVEL_NODE: [],
12530
      }
12531

    
12532
  def DeclareLocks(self, level):
12533
    if level == locking.LEVEL_INSTANCE:
12534
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12535

    
12536
      # Lock instances optimistically, needs verification once node and group
12537
      # locks have been acquired
12538
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12539
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12540

    
12541
    elif level == locking.LEVEL_NODEGROUP:
12542
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12543

    
12544
      if self.req_target_uuids:
12545
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12546

    
12547
        # Lock all groups used by instances optimistically; this requires going
12548
        # via the node before it's locked, requiring verification later on
12549
        lock_groups.update(group_uuid
12550
                           for instance_name in
12551
                             self.owned_locks(locking.LEVEL_INSTANCE)
12552
                           for group_uuid in
12553
                             self.cfg.GetInstanceNodeGroups(instance_name))
12554
      else:
12555
        # No target groups, need to lock all of them
12556
        lock_groups = locking.ALL_SET
12557

    
12558
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12559

    
12560
    elif level == locking.LEVEL_NODE:
12561
      # This will only lock the nodes in the group to be evacuated which
12562
      # contain actual instances
12563
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12564
      self._LockInstancesNodes()
12565

    
12566
      # Lock all nodes in group to be evacuated and target groups
12567
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12568
      assert self.group_uuid in owned_groups
12569
      member_nodes = [node_name
12570
                      for group in owned_groups
12571
                      for node_name in self.cfg.GetNodeGroup(group).members]
12572
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12573

    
12574
  def CheckPrereq(self):
12575
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12576
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12577
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12578

    
12579
    assert owned_groups.issuperset(self.req_target_uuids)
12580
    assert self.group_uuid in owned_groups
12581

    
12582
    # Check if locked instances are still correct
12583
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12584

    
12585
    # Get instance information
12586
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12587

    
12588
    # Check if node groups for locked instances are still correct
12589
    _CheckInstancesNodeGroups(self.cfg, self.instances,
12590
                              owned_groups, owned_nodes, self.group_uuid)
12591

    
12592
    if self.req_target_uuids:
12593
      # User requested specific target groups
12594
      self.target_uuids = self.req_target_uuids
12595
    else:
12596
      # All groups except the one to be evacuated are potential targets
12597
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12598
                           if group_uuid != self.group_uuid]
12599

    
12600
      if not self.target_uuids:
12601
        raise errors.OpPrereqError("There are no possible target groups",
12602
                                   errors.ECODE_INVAL)
12603

    
12604
  def BuildHooksEnv(self):
12605
    """Build hooks env.
12606

12607
    """
12608
    return {
12609
      "GROUP_NAME": self.op.group_name,
12610
      "TARGET_GROUPS": " ".join(self.target_uuids),
12611
      }
12612

    
12613
  def BuildHooksNodes(self):
12614
    """Build hooks nodes.
12615

12616
    """
12617
    mn = self.cfg.GetMasterNode()
12618

    
12619
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12620

    
12621
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12622

    
12623
    return (run_nodes, run_nodes)
12624

    
12625
  def Exec(self, feedback_fn):
12626
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12627

    
12628
    assert self.group_uuid not in self.target_uuids
12629

    
12630
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12631
                     instances=instances, target_groups=self.target_uuids)
12632

    
12633
    ial.Run(self.op.iallocator)
12634

    
12635
    if not ial.success:
12636
      raise errors.OpPrereqError("Can't compute group evacuation using"
12637
                                 " iallocator '%s': %s" %
12638
                                 (self.op.iallocator, ial.info),
12639
                                 errors.ECODE_NORES)
12640

    
12641
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12642

    
12643
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12644
                 len(jobs), self.op.group_name)
12645

    
12646
    return ResultWithJobs(jobs)
12647

    
12648

    
12649
class TagsLU(NoHooksLU): # pylint: disable=W0223
12650
  """Generic tags LU.
12651

12652
  This is an abstract class which is the parent of all the other tags LUs.
12653

12654
  """
12655
  def ExpandNames(self):
12656
    self.group_uuid = None
12657
    self.needed_locks = {}
12658
    if self.op.kind == constants.TAG_NODE:
12659
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12660
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12661
    elif self.op.kind == constants.TAG_INSTANCE:
12662
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12663
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12664
    elif self.op.kind == constants.TAG_NODEGROUP:
12665
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12666

    
12667
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12668
    # not possible to acquire the BGL based on opcode parameters)
12669

    
12670
  def CheckPrereq(self):
12671
    """Check prerequisites.
12672

12673
    """
12674
    if self.op.kind == constants.TAG_CLUSTER:
12675
      self.target = self.cfg.GetClusterInfo()
12676
    elif self.op.kind == constants.TAG_NODE:
12677
      self.target = self.cfg.GetNodeInfo(self.op.name)
12678
    elif self.op.kind == constants.TAG_INSTANCE:
12679
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12680
    elif self.op.kind == constants.TAG_NODEGROUP:
12681
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12682
    else:
12683
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12684
                                 str(self.op.kind), errors.ECODE_INVAL)
12685

    
12686

    
12687
class LUTagsGet(TagsLU):
12688
  """Returns the tags of a given object.
12689

12690
  """
12691
  REQ_BGL = False
12692

    
12693
  def ExpandNames(self):
12694
    TagsLU.ExpandNames(self)
12695

    
12696
    # Share locks as this is only a read operation
12697
    self.share_locks = _ShareAll()
12698

    
12699
  def Exec(self, feedback_fn):
12700
    """Returns the tag list.
12701

12702
    """
12703
    return list(self.target.GetTags())
12704

    
12705

    
12706
class LUTagsSearch(NoHooksLU):
12707
  """Searches the tags for a given pattern.
12708

12709
  """
12710
  REQ_BGL = False
12711

    
12712
  def ExpandNames(self):
12713
    self.needed_locks = {}
12714

    
12715
  def CheckPrereq(self):
12716
    """Check prerequisites.
12717

12718
    This checks the pattern passed for validity by compiling it.
12719

12720
    """
12721
    try:
12722
      self.re = re.compile(self.op.pattern)
12723
    except re.error, err:
12724
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12725
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12726

    
12727
  def Exec(self, feedback_fn):
12728
    """Returns the tag list.
12729

12730
    """
12731
    cfg = self.cfg
12732
    tgts = [("/cluster", cfg.GetClusterInfo())]
12733
    ilist = cfg.GetAllInstancesInfo().values()
12734
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12735
    nlist = cfg.GetAllNodesInfo().values()
12736
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12737
    tgts.extend(("/nodegroup/%s" % n.name, n)
12738
                for n in cfg.GetAllNodeGroupsInfo().values())
12739
    results = []
12740
    for path, target in tgts:
12741
      for tag in target.GetTags():
12742
        if self.re.search(tag):
12743
          results.append((path, tag))
12744
    return results
12745

    
12746

    
12747
class LUTagsSet(TagsLU):
12748
  """Sets a tag on a given object.
12749

12750
  """
12751
  REQ_BGL = False
12752

    
12753
  def CheckPrereq(self):
12754
    """Check prerequisites.
12755

12756
    This checks the type and length of the tag name and value.
12757

12758
    """
12759
    TagsLU.CheckPrereq(self)
12760
    for tag in self.op.tags:
12761
      objects.TaggableObject.ValidateTag(tag)
12762

    
12763
  def Exec(self, feedback_fn):
12764
    """Sets the tag.
12765

12766
    """
12767
    try:
12768
      for tag in self.op.tags:
12769
        self.target.AddTag(tag)
12770
    except errors.TagError, err:
12771
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12772
    self.cfg.Update(self.target, feedback_fn)
12773

    
12774

    
12775
class LUTagsDel(TagsLU):
12776
  """Delete a list of tags from a given object.
12777

12778
  """
12779
  REQ_BGL = False
12780

    
12781
  def CheckPrereq(self):
12782
    """Check prerequisites.
12783

12784
    This checks that we have the given tag.
12785

12786
    """
12787
    TagsLU.CheckPrereq(self)
12788
    for tag in self.op.tags:
12789
      objects.TaggableObject.ValidateTag(tag)
12790
    del_tags = frozenset(self.op.tags)
12791
    cur_tags = self.target.GetTags()
12792

    
12793
    diff_tags = del_tags - cur_tags
12794
    if diff_tags:
12795
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12796
      raise errors.OpPrereqError("Tag(s) %s not found" %
12797
                                 (utils.CommaJoin(diff_names), ),
12798
                                 errors.ECODE_NOENT)
12799

    
12800
  def Exec(self, feedback_fn):
12801
    """Remove the tag from the object.
12802

12803
    """
12804
    for tag in self.op.tags:
12805
      self.target.RemoveTag(tag)
12806
    self.cfg.Update(self.target, feedback_fn)
12807

    
12808

    
12809
class LUTestDelay(NoHooksLU):
12810
  """Sleep for a specified amount of time.
12811

12812
  This LU sleeps on the master and/or nodes for a specified amount of
12813
  time.
12814

12815
  """
12816
  REQ_BGL = False
12817

    
12818
  def ExpandNames(self):
12819
    """Expand names and set required locks.
12820

12821
    This expands the node list, if any.
12822

12823
    """
12824
    self.needed_locks = {}
12825
    if self.op.on_nodes:
12826
      # _GetWantedNodes can be used here, but is not always appropriate to use
12827
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12828
      # more information.
12829
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12830
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12831

    
12832
  def _TestDelay(self):
12833
    """Do the actual sleep.
12834

12835
    """
12836
    if self.op.on_master:
12837
      if not utils.TestDelay(self.op.duration):
12838
        raise errors.OpExecError("Error during master delay test")
12839
    if self.op.on_nodes:
12840
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12841
      for node, node_result in result.items():
12842
        node_result.Raise("Failure during rpc call to node %s" % node)
12843

    
12844
  def Exec(self, feedback_fn):
12845
    """Execute the test delay opcode, with the wanted repetitions.
12846

12847
    """
12848
    if self.op.repeat == 0:
12849
      self._TestDelay()
12850
    else:
12851
      top_value = self.op.repeat - 1
12852
      for i in range(self.op.repeat):
12853
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12854
        self._TestDelay()
12855

    
12856

    
12857
class LUTestJqueue(NoHooksLU):
12858
  """Utility LU to test some aspects of the job queue.
12859

12860
  """
12861
  REQ_BGL = False
12862

    
12863
  # Must be lower than default timeout for WaitForJobChange to see whether it
12864
  # notices changed jobs
12865
  _CLIENT_CONNECT_TIMEOUT = 20.0
12866
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12867

    
12868
  @classmethod
12869
  def _NotifyUsingSocket(cls, cb, errcls):
12870
    """Opens a Unix socket and waits for another program to connect.
12871

12872
    @type cb: callable
12873
    @param cb: Callback to send socket name to client
12874
    @type errcls: class
12875
    @param errcls: Exception class to use for errors
12876

12877
    """
12878
    # Using a temporary directory as there's no easy way to create temporary
12879
    # sockets without writing a custom loop around tempfile.mktemp and
12880
    # socket.bind
12881
    tmpdir = tempfile.mkdtemp()
12882
    try:
12883
      tmpsock = utils.PathJoin(tmpdir, "sock")
12884

    
12885
      logging.debug("Creating temporary socket at %s", tmpsock)
12886
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12887
      try:
12888
        sock.bind(tmpsock)
12889
        sock.listen(1)
12890

    
12891
        # Send details to client
12892
        cb(tmpsock)
12893

    
12894
        # Wait for client to connect before continuing
12895
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12896
        try:
12897
          (conn, _) = sock.accept()
12898
        except socket.error, err:
12899
          raise errcls("Client didn't connect in time (%s)" % err)
12900
      finally:
12901
        sock.close()
12902
    finally:
12903
      # Remove as soon as client is connected
12904
      shutil.rmtree(tmpdir)
12905

    
12906
    # Wait for client to close
12907
    try:
12908
      try:
12909
        # pylint: disable=E1101
12910
        # Instance of '_socketobject' has no ... member
12911
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12912
        conn.recv(1)
12913
      except socket.error, err:
12914
        raise errcls("Client failed to confirm notification (%s)" % err)
12915
    finally:
12916
      conn.close()
12917

    
12918
  def _SendNotification(self, test, arg, sockname):
12919
    """Sends a notification to the client.
12920

12921
    @type test: string
12922
    @param test: Test name
12923
    @param arg: Test argument (depends on test)
12924
    @type sockname: string
12925
    @param sockname: Socket path
12926

12927
    """
12928
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12929

    
12930
  def _Notify(self, prereq, test, arg):
12931
    """Notifies the client of a test.
12932

12933
    @type prereq: bool
12934
    @param prereq: Whether this is a prereq-phase test
12935
    @type test: string
12936
    @param test: Test name
12937
    @param arg: Test argument (depends on test)
12938

12939
    """
12940
    if prereq:
12941
      errcls = errors.OpPrereqError
12942
    else:
12943
      errcls = errors.OpExecError
12944

    
12945
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12946
                                                  test, arg),
12947
                                   errcls)
12948

    
12949
  def CheckArguments(self):
12950
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12951
    self.expandnames_calls = 0
12952

    
12953
  def ExpandNames(self):
12954
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12955
    if checkargs_calls < 1:
12956
      raise errors.ProgrammerError("CheckArguments was not called")
12957

    
12958
    self.expandnames_calls += 1
12959

    
12960
    if self.op.notify_waitlock:
12961
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12962

    
12963
    self.LogInfo("Expanding names")
12964

    
12965
    # Get lock on master node (just to get a lock, not for a particular reason)
12966
    self.needed_locks = {
12967
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12968
      }
12969

    
12970
  def Exec(self, feedback_fn):
12971
    if self.expandnames_calls < 1:
12972
      raise errors.ProgrammerError("ExpandNames was not called")
12973

    
12974
    if self.op.notify_exec:
12975
      self._Notify(False, constants.JQT_EXEC, None)
12976

    
12977
    self.LogInfo("Executing")
12978

    
12979
    if self.op.log_messages:
12980
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12981
      for idx, msg in enumerate(self.op.log_messages):
12982
        self.LogInfo("Sending log message %s", idx + 1)
12983
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12984
        # Report how many test messages have been sent
12985
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12986

    
12987
    if self.op.fail:
12988
      raise errors.OpExecError("Opcode failure was requested")
12989

    
12990
    return True
12991

    
12992

    
12993
class IAllocator(object):
12994
  """IAllocator framework.
12995

12996
  An IAllocator instance has three sets of attributes:
12997
    - cfg that is needed to query the cluster
12998
    - input data (all members of the _KEYS class attribute are required)
12999
    - four buffer attributes (in|out_data|text), that represent the
13000
      input (to the external script) in text and data structure format,
13001
      and the output from it, again in two formats
13002
    - the result variables from the script (success, info, nodes) for
13003
      easy usage
13004

13005
  """
13006
  # pylint: disable=R0902
13007
  # lots of instance attributes
13008

    
13009
  def __init__(self, cfg, rpc, mode, **kwargs):
13010
    self.cfg = cfg
13011
    self.rpc = rpc
13012
    # init buffer variables
13013
    self.in_text = self.out_text = self.in_data = self.out_data = None
13014
    # init all input fields so that pylint is happy
13015
    self.mode = mode
13016
    self.memory = self.disks = self.disk_template = None
13017
    self.os = self.tags = self.nics = self.vcpus = None
13018
    self.hypervisor = None
13019
    self.relocate_from = None
13020
    self.name = None
13021
    self.instances = None
13022
    self.evac_mode = None
13023
    self.target_groups = []
13024
    # computed fields
13025
    self.required_nodes = None
13026
    # init result fields
13027
    self.success = self.info = self.result = None
13028

    
13029
    try:
13030
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13031
    except KeyError:
13032
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13033
                                   " IAllocator" % self.mode)
13034

    
13035
    keyset = [n for (n, _) in keydata]
13036

    
13037
    for key in kwargs:
13038
      if key not in keyset:
13039
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13040
                                     " IAllocator" % key)
13041
      setattr(self, key, kwargs[key])
13042

    
13043
    for key in keyset:
13044
      if key not in kwargs:
13045
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13046
                                     " IAllocator" % key)
13047
    self._BuildInputData(compat.partial(fn, self), keydata)
13048

    
13049
  def _ComputeClusterData(self):
13050
    """Compute the generic allocator input data.
13051

13052
    This is the data that is independent of the actual operation.
13053

13054
    """
13055
    cfg = self.cfg
13056
    cluster_info = cfg.GetClusterInfo()
13057
    # cluster data
13058
    data = {
13059
      "version": constants.IALLOCATOR_VERSION,
13060
      "cluster_name": cfg.GetClusterName(),
13061
      "cluster_tags": list(cluster_info.GetTags()),
13062
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13063
      # we don't have job IDs
13064
      }
13065
    ninfo = cfg.GetAllNodesInfo()
13066
    iinfo = cfg.GetAllInstancesInfo().values()
13067
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13068

    
13069
    # node data
13070
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13071

    
13072
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13073
      hypervisor_name = self.hypervisor
13074
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13075
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13076
    else:
13077
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13078

    
13079
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13080
                                        hypervisor_name)
13081
    node_iinfo = \
13082
      self.rpc.call_all_instances_info(node_list,
13083
                                       cluster_info.enabled_hypervisors)
13084

    
13085
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13086

    
13087
    config_ndata = self._ComputeBasicNodeData(ninfo)
13088
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13089
                                                 i_list, config_ndata)
13090
    assert len(data["nodes"]) == len(ninfo), \
13091
        "Incomplete node data computed"
13092

    
13093
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13094

    
13095
    self.in_data = data
13096

    
13097
  @staticmethod
13098
  def _ComputeNodeGroupData(cfg):
13099
    """Compute node groups data.
13100

13101
    """
13102
    ng = dict((guuid, {
13103
      "name": gdata.name,
13104
      "alloc_policy": gdata.alloc_policy,
13105
      })
13106
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13107

    
13108
    return ng
13109

    
13110
  @staticmethod
13111
  def _ComputeBasicNodeData(node_cfg):
13112
    """Compute global node data.
13113

13114
    @rtype: dict
13115
    @returns: a dict of name: (node dict, node config)
13116

13117
    """
13118
    # fill in static (config-based) values
13119
    node_results = dict((ninfo.name, {
13120
      "tags": list(ninfo.GetTags()),
13121
      "primary_ip": ninfo.primary_ip,
13122
      "secondary_ip": ninfo.secondary_ip,
13123
      "offline": ninfo.offline,
13124
      "drained": ninfo.drained,
13125
      "master_candidate": ninfo.master_candidate,
13126
      "group": ninfo.group,
13127
      "master_capable": ninfo.master_capable,
13128
      "vm_capable": ninfo.vm_capable,
13129
      })
13130
      for ninfo in node_cfg.values())
13131

    
13132
    return node_results
13133

    
13134
  @staticmethod
13135
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13136
                              node_results):
13137
    """Compute global node data.
13138

13139
    @param node_results: the basic node structures as filled from the config
13140

13141
    """
13142
    # make a copy of the current dict
13143
    node_results = dict(node_results)
13144
    for nname, nresult in node_data.items():
13145
      assert nname in node_results, "Missing basic data for node %s" % nname
13146
      ninfo = node_cfg[nname]
13147

    
13148
      if not (ninfo.offline or ninfo.drained):
13149
        nresult.Raise("Can't get data for node %s" % nname)
13150
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13151
                                nname)
13152
        remote_info = nresult.payload
13153

    
13154
        for attr in ["memory_total", "memory_free", "memory_dom0",
13155
                     "vg_size", "vg_free", "cpu_total"]:
13156
          if attr not in remote_info:
13157
            raise errors.OpExecError("Node '%s' didn't return attribute"
13158
                                     " '%s'" % (nname, attr))
13159
          if not isinstance(remote_info[attr], int):
13160
            raise errors.OpExecError("Node '%s' returned invalid value"
13161
                                     " for '%s': %s" %
13162
                                     (nname, attr, remote_info[attr]))
13163
        # compute memory used by primary instances
13164
        i_p_mem = i_p_up_mem = 0
13165
        for iinfo, beinfo in i_list:
13166
          if iinfo.primary_node == nname:
13167
            i_p_mem += beinfo[constants.BE_MEMORY]
13168
            if iinfo.name not in node_iinfo[nname].payload:
13169
              i_used_mem = 0
13170
            else:
13171
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13172
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13173
            remote_info["memory_free"] -= max(0, i_mem_diff)
13174

    
13175
            if iinfo.admin_up:
13176
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13177

    
13178
        # compute memory used by instances
13179
        pnr_dyn = {
13180
          "total_memory": remote_info["memory_total"],
13181
          "reserved_memory": remote_info["memory_dom0"],
13182
          "free_memory": remote_info["memory_free"],
13183
          "total_disk": remote_info["vg_size"],
13184
          "free_disk": remote_info["vg_free"],
13185
          "total_cpus": remote_info["cpu_total"],
13186
          "i_pri_memory": i_p_mem,
13187
          "i_pri_up_memory": i_p_up_mem,
13188
          }
13189
        pnr_dyn.update(node_results[nname])
13190
        node_results[nname] = pnr_dyn
13191

    
13192
    return node_results
13193

    
13194
  @staticmethod
13195
  def _ComputeInstanceData(cluster_info, i_list):
13196
    """Compute global instance data.
13197

13198
    """
13199
    instance_data = {}
13200
    for iinfo, beinfo in i_list:
13201
      nic_data = []
13202
      for nic in iinfo.nics:
13203
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13204
        nic_dict = {
13205
          "mac": nic.mac,
13206
          "ip": nic.ip,
13207
          "mode": filled_params[constants.NIC_MODE],
13208
          "link": filled_params[constants.NIC_LINK],
13209
          }
13210
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13211
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13212
        nic_data.append(nic_dict)
13213
      pir = {
13214
        "tags": list(iinfo.GetTags()),
13215
        "admin_up": iinfo.admin_up,
13216
        "vcpus": beinfo[constants.BE_VCPUS],
13217
        "memory": beinfo[constants.BE_MEMORY],
13218
        "os": iinfo.os,
13219
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13220
        "nics": nic_data,
13221
        "disks": [{constants.IDISK_SIZE: dsk.size,
13222
                   constants.IDISK_MODE: dsk.mode}
13223
                  for dsk in iinfo.disks],
13224
        "disk_template": iinfo.disk_template,
13225
        "hypervisor": iinfo.hypervisor,
13226
        }
13227
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13228
                                                 pir["disks"])
13229
      instance_data[iinfo.name] = pir
13230

    
13231
    return instance_data
13232

    
13233
  def _AddNewInstance(self):
13234
    """Add new instance data to allocator structure.
13235

13236
    This in combination with _AllocatorGetClusterData will create the
13237
    correct structure needed as input for the allocator.
13238

13239
    The checks for the completeness of the opcode must have already been
13240
    done.
13241

13242
    """
13243
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13244

    
13245
    if self.disk_template in constants.DTS_INT_MIRROR:
13246
      self.required_nodes = 2
13247
    else:
13248
      self.required_nodes = 1
13249

    
13250
    request = {
13251
      "name": self.name,
13252
      "disk_template": self.disk_template,
13253
      "tags": self.tags,
13254
      "os": self.os,
13255
      "vcpus": self.vcpus,
13256
      "memory": self.memory,
13257
      "disks": self.disks,
13258
      "disk_space_total": disk_space,
13259
      "nics": self.nics,
13260
      "required_nodes": self.required_nodes,
13261
      "hypervisor": self.hypervisor,
13262
      }
13263

    
13264
    return request
13265

    
13266
  def _AddRelocateInstance(self):
13267
    """Add relocate instance data to allocator structure.
13268

13269
    This in combination with _IAllocatorGetClusterData will create the
13270
    correct structure needed as input for the allocator.
13271

13272
    The checks for the completeness of the opcode must have already been
13273
    done.
13274

13275
    """
13276
    instance = self.cfg.GetInstanceInfo(self.name)
13277
    if instance is None:
13278
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13279
                                   " IAllocator" % self.name)
13280

    
13281
    if instance.disk_template not in constants.DTS_MIRRORED:
13282
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13283
                                 errors.ECODE_INVAL)
13284

    
13285
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13286
        len(instance.secondary_nodes) != 1:
13287
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13288
                                 errors.ECODE_STATE)
13289

    
13290
    self.required_nodes = 1
13291
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13292
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13293

    
13294
    request = {
13295
      "name": self.name,
13296
      "disk_space_total": disk_space,
13297
      "required_nodes": self.required_nodes,
13298
      "relocate_from": self.relocate_from,
13299
      }
13300
    return request
13301

    
13302
  def _AddNodeEvacuate(self):
13303
    """Get data for node-evacuate requests.
13304

13305
    """
13306
    return {
13307
      "instances": self.instances,
13308
      "evac_mode": self.evac_mode,
13309
      }
13310

    
13311
  def _AddChangeGroup(self):
13312
    """Get data for node-evacuate requests.
13313

13314
    """
13315
    return {
13316
      "instances": self.instances,
13317
      "target_groups": self.target_groups,
13318
      }
13319

    
13320
  def _BuildInputData(self, fn, keydata):
13321
    """Build input data structures.
13322

13323
    """
13324
    self._ComputeClusterData()
13325

    
13326
    request = fn()
13327
    request["type"] = self.mode
13328
    for keyname, keytype in keydata:
13329
      if keyname not in request:
13330
        raise errors.ProgrammerError("Request parameter %s is missing" %
13331
                                     keyname)
13332
      val = request[keyname]
13333
      if not keytype(val):
13334
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13335
                                     " validation, value %s, expected"
13336
                                     " type %s" % (keyname, val, keytype))
13337
    self.in_data["request"] = request
13338

    
13339
    self.in_text = serializer.Dump(self.in_data)
13340

    
13341
  _STRING_LIST = ht.TListOf(ht.TString)
13342
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13343
     # pylint: disable=E1101
13344
     # Class '...' has no 'OP_ID' member
13345
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13346
                          opcodes.OpInstanceMigrate.OP_ID,
13347
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13348
     })))
13349

    
13350
  _NEVAC_MOVED = \
13351
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13352
                       ht.TItems([ht.TNonEmptyString,
13353
                                  ht.TNonEmptyString,
13354
                                  ht.TListOf(ht.TNonEmptyString),
13355
                                 ])))
13356
  _NEVAC_FAILED = \
13357
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13358
                       ht.TItems([ht.TNonEmptyString,
13359
                                  ht.TMaybeString,
13360
                                 ])))
13361
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13362
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13363

    
13364
  _MODE_DATA = {
13365
    constants.IALLOCATOR_MODE_ALLOC:
13366
      (_AddNewInstance,
13367
       [
13368
        ("name", ht.TString),
13369
        ("memory", ht.TInt),
13370
        ("disks", ht.TListOf(ht.TDict)),
13371
        ("disk_template", ht.TString),
13372
        ("os", ht.TString),
13373
        ("tags", _STRING_LIST),
13374
        ("nics", ht.TListOf(ht.TDict)),
13375
        ("vcpus", ht.TInt),
13376
        ("hypervisor", ht.TString),
13377
        ], ht.TList),
13378
    constants.IALLOCATOR_MODE_RELOC:
13379
      (_AddRelocateInstance,
13380
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13381
       ht.TList),
13382
     constants.IALLOCATOR_MODE_NODE_EVAC:
13383
      (_AddNodeEvacuate, [
13384
        ("instances", _STRING_LIST),
13385
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13386
        ], _NEVAC_RESULT),
13387
     constants.IALLOCATOR_MODE_CHG_GROUP:
13388
      (_AddChangeGroup, [
13389
        ("instances", _STRING_LIST),
13390
        ("target_groups", _STRING_LIST),
13391
        ], _NEVAC_RESULT),
13392
    }
13393

    
13394
  def Run(self, name, validate=True, call_fn=None):
13395
    """Run an instance allocator and return the results.
13396

13397
    """
13398
    if call_fn is None:
13399
      call_fn = self.rpc.call_iallocator_runner
13400

    
13401
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13402
    result.Raise("Failure while running the iallocator script")
13403

    
13404
    self.out_text = result.payload
13405
    if validate:
13406
      self._ValidateResult()
13407

    
13408
  def _ValidateResult(self):
13409
    """Process the allocator results.
13410

13411
    This will process and if successful save the result in
13412
    self.out_data and the other parameters.
13413

13414
    """
13415
    try:
13416
      rdict = serializer.Load(self.out_text)
13417
    except Exception, err:
13418
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13419

    
13420
    if not isinstance(rdict, dict):
13421
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13422

    
13423
    # TODO: remove backwards compatiblity in later versions
13424
    if "nodes" in rdict and "result" not in rdict:
13425
      rdict["result"] = rdict["nodes"]
13426
      del rdict["nodes"]
13427

    
13428
    for key in "success", "info", "result":
13429
      if key not in rdict:
13430
        raise errors.OpExecError("Can't parse iallocator results:"
13431
                                 " missing key '%s'" % key)
13432
      setattr(self, key, rdict[key])
13433

    
13434
    if not self._result_check(self.result):
13435
      raise errors.OpExecError("Iallocator returned invalid result,"
13436
                               " expected %s, got %s" %
13437
                               (self._result_check, self.result),
13438
                               errors.ECODE_INVAL)
13439

    
13440
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13441
      assert self.relocate_from is not None
13442
      assert self.required_nodes == 1
13443

    
13444
      node2group = dict((name, ndata["group"])
13445
                        for (name, ndata) in self.in_data["nodes"].items())
13446

    
13447
      fn = compat.partial(self._NodesToGroups, node2group,
13448
                          self.in_data["nodegroups"])
13449

    
13450
      instance = self.cfg.GetInstanceInfo(self.name)
13451
      request_groups = fn(self.relocate_from + [instance.primary_node])
13452
      result_groups = fn(rdict["result"] + [instance.primary_node])
13453

    
13454
      if self.success and not set(result_groups).issubset(request_groups):
13455
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13456
                                 " differ from original groups (%s)" %
13457
                                 (utils.CommaJoin(result_groups),
13458
                                  utils.CommaJoin(request_groups)))
13459

    
13460
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13461
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13462

    
13463
    self.out_data = rdict
13464

    
13465
  @staticmethod
13466
  def _NodesToGroups(node2group, groups, nodes):
13467
    """Returns a list of unique group names for a list of nodes.
13468

13469
    @type node2group: dict
13470
    @param node2group: Map from node name to group UUID
13471
    @type groups: dict
13472
    @param groups: Group information
13473
    @type nodes: list
13474
    @param nodes: Node names
13475

13476
    """
13477
    result = set()
13478

    
13479
    for node in nodes:
13480
      try:
13481
        group_uuid = node2group[node]
13482
      except KeyError:
13483
        # Ignore unknown node
13484
        pass
13485
      else:
13486
        try:
13487
          group = groups[group_uuid]
13488
        except KeyError:
13489
          # Can't find group, let's use UUID
13490
          group_name = group_uuid
13491
        else:
13492
          group_name = group["name"]
13493

    
13494
        result.add(group_name)
13495

    
13496
    return sorted(result)
13497

    
13498

    
13499
class LUTestAllocator(NoHooksLU):
13500
  """Run allocator tests.
13501

13502
  This LU runs the allocator tests
13503

13504
  """
13505
  def CheckPrereq(self):
13506
    """Check prerequisites.
13507

13508
    This checks the opcode parameters depending on the director and mode test.
13509

13510
    """
13511
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13512
      for attr in ["memory", "disks", "disk_template",
13513
                   "os", "tags", "nics", "vcpus"]:
13514
        if not hasattr(self.op, attr):
13515
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13516
                                     attr, errors.ECODE_INVAL)
13517
      iname = self.cfg.ExpandInstanceName(self.op.name)
13518
      if iname is not None:
13519
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13520
                                   iname, errors.ECODE_EXISTS)
13521
      if not isinstance(self.op.nics, list):
13522
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13523
                                   errors.ECODE_INVAL)
13524
      if not isinstance(self.op.disks, list):
13525
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13526
                                   errors.ECODE_INVAL)
13527
      for row in self.op.disks:
13528
        if (not isinstance(row, dict) or
13529
            constants.IDISK_SIZE not in row or
13530
            not isinstance(row[constants.IDISK_SIZE], int) or
13531
            constants.IDISK_MODE not in row or
13532
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13533
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13534
                                     " parameter", errors.ECODE_INVAL)
13535
      if self.op.hypervisor is None:
13536
        self.op.hypervisor = self.cfg.GetHypervisorType()
13537
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13538
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13539
      self.op.name = fname
13540
      self.relocate_from = \
13541
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13542
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13543
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13544
      if not self.op.instances:
13545
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13546
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13547
    else:
13548
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13549
                                 self.op.mode, errors.ECODE_INVAL)
13550

    
13551
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13552
      if self.op.allocator is None:
13553
        raise errors.OpPrereqError("Missing allocator name",
13554
                                   errors.ECODE_INVAL)
13555
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13556
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13557
                                 self.op.direction, errors.ECODE_INVAL)
13558

    
13559
  def Exec(self, feedback_fn):
13560
    """Run the allocator test.
13561

13562
    """
13563
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13564
      ial = IAllocator(self.cfg, self.rpc,
13565
                       mode=self.op.mode,
13566
                       name=self.op.name,
13567
                       memory=self.op.memory,
13568
                       disks=self.op.disks,
13569
                       disk_template=self.op.disk_template,
13570
                       os=self.op.os,
13571
                       tags=self.op.tags,
13572
                       nics=self.op.nics,
13573
                       vcpus=self.op.vcpus,
13574
                       hypervisor=self.op.hypervisor,
13575
                       )
13576
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13577
      ial = IAllocator(self.cfg, self.rpc,
13578
                       mode=self.op.mode,
13579
                       name=self.op.name,
13580
                       relocate_from=list(self.relocate_from),
13581
                       )
13582
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13583
      ial = IAllocator(self.cfg, self.rpc,
13584
                       mode=self.op.mode,
13585
                       instances=self.op.instances,
13586
                       target_groups=self.op.target_groups)
13587
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13588
      ial = IAllocator(self.cfg, self.rpc,
13589
                       mode=self.op.mode,
13590
                       instances=self.op.instances,
13591
                       evac_mode=self.op.evac_mode)
13592
    else:
13593
      raise errors.ProgrammerError("Uncatched mode %s in"
13594
                                   " LUTestAllocator.Exec", self.op.mode)
13595

    
13596
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13597
      result = ial.in_text
13598
    else:
13599
      ial.Run(self.op.allocator, validate=False)
13600
      result = ial.out_text
13601
    return result
13602

    
13603

    
13604
#: Query type implementations
13605
_QUERY_IMPL = {
13606
  constants.QR_INSTANCE: _InstanceQuery,
13607
  constants.QR_NODE: _NodeQuery,
13608
  constants.QR_GROUP: _GroupQuery,
13609
  constants.QR_OS: _OsQuery,
13610
  }
13611

    
13612
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13613

    
13614

    
13615
def _GetQueryImplementation(name):
13616
  """Returns the implemtnation for a query type.
13617

13618
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13619

13620
  """
13621
  try:
13622
    return _QUERY_IMPL[name]
13623
  except KeyError:
13624
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13625
                               errors.ECODE_INVAL)