Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 93f2399e

History | View | Annotate | Download (480.6 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    # readability alias
123
    self.owned_locks = context.glm.list_owned
124
    self.context = context
125
    self.rpc = rpc
126
    # Dicts used to declare locking needs to mcpu
127
    self.needed_locks = None
128
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
129
    self.add_locks = {}
130
    self.remove_locks = {}
131
    # Used to force good behavior when calling helper functions
132
    self.recalculate_locks = {}
133
    # logging
134
    self.Log = processor.Log # pylint: disable=C0103
135
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
136
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
137
    self.LogStep = processor.LogStep # pylint: disable=C0103
138
    # support for dry-run
139
    self.dry_run_result = None
140
    # support for generic debug attribute
141
    if (not hasattr(self.op, "debug_level") or
142
        not isinstance(self.op.debug_level, int)):
143
      self.op.debug_level = 0
144

    
145
    # Tasklets
146
    self.tasklets = None
147

    
148
    # Validate opcode parameters and set defaults
149
    self.op.Validate(True)
150

    
151
    self.CheckArguments()
152

    
153
  def CheckArguments(self):
154
    """Check syntactic validity for the opcode arguments.
155

156
    This method is for doing a simple syntactic check and ensure
157
    validity of opcode parameters, without any cluster-related
158
    checks. While the same can be accomplished in ExpandNames and/or
159
    CheckPrereq, doing these separate is better because:
160

161
      - ExpandNames is left as as purely a lock-related function
162
      - CheckPrereq is run after we have acquired locks (and possible
163
        waited for them)
164

165
    The function is allowed to change the self.op attribute so that
166
    later methods can no longer worry about missing parameters.
167

168
    """
169
    pass
170

    
171
  def ExpandNames(self):
172
    """Expand names for this LU.
173

174
    This method is called before starting to execute the opcode, and it should
175
    update all the parameters of the opcode to their canonical form (e.g. a
176
    short node name must be fully expanded after this method has successfully
177
    completed). This way locking, hooks, logging, etc. can work correctly.
178

179
    LUs which implement this method must also populate the self.needed_locks
180
    member, as a dict with lock levels as keys, and a list of needed lock names
181
    as values. Rules:
182

183
      - use an empty dict if you don't need any lock
184
      - if you don't need any lock at a particular level omit that level
185
      - don't put anything for the BGL level
186
      - if you want all locks at a level use locking.ALL_SET as a value
187

188
    If you need to share locks (rather than acquire them exclusively) at one
189
    level you can modify self.share_locks, setting a true value (usually 1) for
190
    that level. By default locks are not shared.
191

192
    This function can also define a list of tasklets, which then will be
193
    executed in order instead of the usual LU-level CheckPrereq and Exec
194
    functions, if those are not defined by the LU.
195

196
    Examples::
197

198
      # Acquire all nodes and one instance
199
      self.needed_locks = {
200
        locking.LEVEL_NODE: locking.ALL_SET,
201
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
202
      }
203
      # Acquire just two nodes
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206
      }
207
      # Acquire no locks
208
      self.needed_locks = {} # No, you can't leave it to the default value None
209

210
    """
211
    # The implementation of this method is mandatory only if the new LU is
212
    # concurrent, so that old LUs don't need to be changed all at the same
213
    # time.
214
    if self.REQ_BGL:
215
      self.needed_locks = {} # Exclusive LUs don't need locks.
216
    else:
217
      raise NotImplementedError
218

    
219
  def DeclareLocks(self, level):
220
    """Declare LU locking needs for a level
221

222
    While most LUs can just declare their locking needs at ExpandNames time,
223
    sometimes there's the need to calculate some locks after having acquired
224
    the ones before. This function is called just before acquiring locks at a
225
    particular level, but after acquiring the ones at lower levels, and permits
226
    such calculations. It can be used to modify self.needed_locks, and by
227
    default it does nothing.
228

229
    This function is only called if you have something already set in
230
    self.needed_locks for the level.
231

232
    @param level: Locking level which is going to be locked
233
    @type level: member of ganeti.locking.LEVELS
234

235
    """
236

    
237
  def CheckPrereq(self):
238
    """Check prerequisites for this LU.
239

240
    This method should check that the prerequisites for the execution
241
    of this LU are fulfilled. It can do internode communication, but
242
    it should be idempotent - no cluster or system changes are
243
    allowed.
244

245
    The method should raise errors.OpPrereqError in case something is
246
    not fulfilled. Its return value is ignored.
247

248
    This method should also update all the parameters of the opcode to
249
    their canonical form if it hasn't been done by ExpandNames before.
250

251
    """
252
    if self.tasklets is not None:
253
      for (idx, tl) in enumerate(self.tasklets):
254
        logging.debug("Checking prerequisites for tasklet %s/%s",
255
                      idx + 1, len(self.tasklets))
256
        tl.CheckPrereq()
257
    else:
258
      pass
259

    
260
  def Exec(self, feedback_fn):
261
    """Execute the LU.
262

263
    This method should implement the actual work. It should raise
264
    errors.OpExecError for failures that are somewhat dealt with in
265
    code, or expected.
266

267
    """
268
    if self.tasklets is not None:
269
      for (idx, tl) in enumerate(self.tasklets):
270
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271
        tl.Exec(feedback_fn)
272
    else:
273
      raise NotImplementedError
274

    
275
  def BuildHooksEnv(self):
276
    """Build hooks environment for this LU.
277

278
    @rtype: dict
279
    @return: Dictionary containing the environment that will be used for
280
      running the hooks for this LU. The keys of the dict must not be prefixed
281
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282
      will extend the environment with additional variables. If no environment
283
      should be defined, an empty dictionary should be returned (not C{None}).
284
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
285
      will not be called.
286

287
    """
288
    raise NotImplementedError
289

    
290
  def BuildHooksNodes(self):
291
    """Build list of nodes to run LU's hooks.
292

293
    @rtype: tuple; (list, list)
294
    @return: Tuple containing a list of node names on which the hook
295
      should run before the execution and a list of node names on which the
296
      hook should run after the execution. No nodes should be returned as an
297
      empty list (and not None).
298
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
299
      will not be called.
300

301
    """
302
    raise NotImplementedError
303

    
304
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305
    """Notify the LU about the results of its hooks.
306

307
    This method is called every time a hooks phase is executed, and notifies
308
    the Logical Unit about the hooks' result. The LU can then use it to alter
309
    its result based on the hooks.  By default the method does nothing and the
310
    previous result is passed back unchanged but any LU can define it if it
311
    wants to use the local cluster hook-scripts somehow.
312

313
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
314
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315
    @param hook_results: the results of the multi-node hooks rpc call
316
    @param feedback_fn: function used send feedback back to the caller
317
    @param lu_result: the previous Exec result this LU had, or None
318
        in the PRE phase
319
    @return: the new Exec result, based on the previous result
320
        and hook results
321

322
    """
323
    # API must be kept, thus we ignore the unused argument and could
324
    # be a function warnings
325
    # pylint: disable=W0613,R0201
326
    return lu_result
327

    
328
  def _ExpandAndLockInstance(self):
329
    """Helper function to expand and lock an instance.
330

331
    Many LUs that work on an instance take its name in self.op.instance_name
332
    and need to expand it and then declare the expanded name for locking. This
333
    function does it, and then updates self.op.instance_name to the expanded
334
    name. It also initializes needed_locks as a dict, if this hasn't been done
335
    before.
336

337
    """
338
    if self.needed_locks is None:
339
      self.needed_locks = {}
340
    else:
341
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342
        "_ExpandAndLockInstance called with instance-level locks set"
343
    self.op.instance_name = _ExpandInstanceName(self.cfg,
344
                                                self.op.instance_name)
345
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
346

    
347
  def _LockInstancesNodes(self, primary_only=False):
348
    """Helper function to declare instances' nodes for locking.
349

350
    This function should be called after locking one or more instances to lock
351
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352
    with all primary or secondary nodes for instances already locked and
353
    present in self.needed_locks[locking.LEVEL_INSTANCE].
354

355
    It should be called from DeclareLocks, and for safety only works if
356
    self.recalculate_locks[locking.LEVEL_NODE] is set.
357

358
    In the future it may grow parameters to just lock some instance's nodes, or
359
    to just lock primaries or secondary nodes, if needed.
360

361
    If should be called in DeclareLocks in a way similar to::
362

363
      if level == locking.LEVEL_NODE:
364
        self._LockInstancesNodes()
365

366
    @type primary_only: boolean
367
    @param primary_only: only lock primary nodes of locked instances
368

369
    """
370
    assert locking.LEVEL_NODE in self.recalculate_locks, \
371
      "_LockInstancesNodes helper function called with no nodes to recalculate"
372

    
373
    # TODO: check if we're really been called with the instance locks held
374

    
375
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376
    # future we might want to have different behaviors depending on the value
377
    # of self.recalculate_locks[locking.LEVEL_NODE]
378
    wanted_nodes = []
379
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381
      wanted_nodes.append(instance.primary_node)
382
      if not primary_only:
383
        wanted_nodes.extend(instance.secondary_nodes)
384

    
385
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
389

    
390
    del self.recalculate_locks[locking.LEVEL_NODE]
391

    
392

    
393
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
394
  """Simple LU which runs no hooks.
395

396
  This LU is intended as a parent for other LogicalUnits which will
397
  run no hooks, in order to reduce duplicate code.
398

399
  """
400
  HPATH = None
401
  HTYPE = None
402

    
403
  def BuildHooksEnv(self):
404
    """Empty BuildHooksEnv for NoHooksLu.
405

406
    This just raises an error.
407

408
    """
409
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
410

    
411
  def BuildHooksNodes(self):
412
    """Empty BuildHooksNodes for NoHooksLU.
413

414
    """
415
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
416

    
417

    
418
class Tasklet:
419
  """Tasklet base class.
420

421
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
423
  tasklets know nothing about locks.
424

425
  Subclasses must follow these rules:
426
    - Implement CheckPrereq
427
    - Implement Exec
428

429
  """
430
  def __init__(self, lu):
431
    self.lu = lu
432

    
433
    # Shortcuts
434
    self.cfg = lu.cfg
435
    self.rpc = lu.rpc
436

    
437
  def CheckPrereq(self):
438
    """Check prerequisites for this tasklets.
439

440
    This method should check whether the prerequisites for the execution of
441
    this tasklet are fulfilled. It can do internode communication, but it
442
    should be idempotent - no cluster or system changes are allowed.
443

444
    The method should raise errors.OpPrereqError in case something is not
445
    fulfilled. Its return value is ignored.
446

447
    This method should also update all parameters to their canonical form if it
448
    hasn't been done before.
449

450
    """
451
    pass
452

    
453
  def Exec(self, feedback_fn):
454
    """Execute the tasklet.
455

456
    This method should implement the actual work. It should raise
457
    errors.OpExecError for failures that are somewhat dealt with in code, or
458
    expected.
459

460
    """
461
    raise NotImplementedError
462

    
463

    
464
class _QueryBase:
465
  """Base for query utility classes.
466

467
  """
468
  #: Attribute holding field definitions
469
  FIELDS = None
470

    
471
  def __init__(self, filter_, fields, use_locking):
472
    """Initializes this class.
473

474
    """
475
    self.use_locking = use_locking
476

    
477
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
478
                             namefield="name")
479
    self.requested_data = self.query.RequestedData()
480
    self.names = self.query.RequestedNames()
481

    
482
    # Sort only if no names were requested
483
    self.sort_by_name = not self.names
484

    
485
    self.do_locking = None
486
    self.wanted = None
487

    
488
  def _GetNames(self, lu, all_names, lock_level):
489
    """Helper function to determine names asked for in the query.
490

491
    """
492
    if self.do_locking:
493
      names = lu.owned_locks(lock_level)
494
    else:
495
      names = all_names
496

    
497
    if self.wanted == locking.ALL_SET:
498
      assert not self.names
499
      # caller didn't specify names, so ordering is not important
500
      return utils.NiceSort(names)
501

    
502
    # caller specified names and we must keep the same order
503
    assert self.names
504
    assert not self.do_locking or lu.glm.is_owned(lock_level)
505

    
506
    missing = set(self.wanted).difference(names)
507
    if missing:
508
      raise errors.OpExecError("Some items were removed before retrieving"
509
                               " their data: %s" % missing)
510

    
511
    # Return expanded names
512
    return self.wanted
513

    
514
  def ExpandNames(self, lu):
515
    """Expand names for this query.
516

517
    See L{LogicalUnit.ExpandNames}.
518

519
    """
520
    raise NotImplementedError()
521

    
522
  def DeclareLocks(self, lu, level):
523
    """Declare locks for this query.
524

525
    See L{LogicalUnit.DeclareLocks}.
526

527
    """
528
    raise NotImplementedError()
529

    
530
  def _GetQueryData(self, lu):
531
    """Collects all data for this query.
532

533
    @return: Query data object
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def NewStyleQuery(self, lu):
539
    """Collect data and execute query.
540

541
    """
542
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543
                                  sort_by_name=self.sort_by_name)
544

    
545
  def OldStyleQuery(self, lu):
546
    """Collect data and execute query.
547

548
    """
549
    return self.query.OldStyleQuery(self._GetQueryData(lu),
550
                                    sort_by_name=self.sort_by_name)
551

    
552

    
553
def _ShareAll():
554
  """Returns a dict declaring all lock levels shared.
555

556
  """
557
  return dict.fromkeys(locking.LEVELS, 1)
558

    
559

    
560
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
561
  """Checks if the owned node groups are still correct for an instance.
562

563
  @type cfg: L{config.ConfigWriter}
564
  @param cfg: The cluster configuration
565
  @type instance_name: string
566
  @param instance_name: Instance name
567
  @type owned_groups: set or frozenset
568
  @param owned_groups: List of currently owned node groups
569

570
  """
571
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
572

    
573
  if not owned_groups.issuperset(inst_groups):
574
    raise errors.OpPrereqError("Instance %s's node groups changed since"
575
                               " locks were acquired, current groups are"
576
                               " are '%s', owning groups '%s'; retry the"
577
                               " operation" %
578
                               (instance_name,
579
                                utils.CommaJoin(inst_groups),
580
                                utils.CommaJoin(owned_groups)),
581
                               errors.ECODE_STATE)
582

    
583
  return inst_groups
584

    
585

    
586
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
587
  """Checks if the instances in a node group are still correct.
588

589
  @type cfg: L{config.ConfigWriter}
590
  @param cfg: The cluster configuration
591
  @type group_uuid: string
592
  @param group_uuid: Node group UUID
593
  @type owned_instances: set or frozenset
594
  @param owned_instances: List of currently owned instances
595

596
  """
597
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
598
  if owned_instances != wanted_instances:
599
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
600
                               " locks were acquired, wanted '%s', have '%s';"
601
                               " retry the operation" %
602
                               (group_uuid,
603
                                utils.CommaJoin(wanted_instances),
604
                                utils.CommaJoin(owned_instances)),
605
                               errors.ECODE_STATE)
606

    
607
  return wanted_instances
608

    
609

    
610
def _SupportsOob(cfg, node):
611
  """Tells if node supports OOB.
612

613
  @type cfg: L{config.ConfigWriter}
614
  @param cfg: The cluster configuration
615
  @type node: L{objects.Node}
616
  @param node: The node
617
  @return: The OOB script if supported or an empty string otherwise
618

619
  """
620
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
621

    
622

    
623
def _GetWantedNodes(lu, nodes):
624
  """Returns list of checked and expanded node names.
625

626
  @type lu: L{LogicalUnit}
627
  @param lu: the logical unit on whose behalf we execute
628
  @type nodes: list
629
  @param nodes: list of node names or None for all nodes
630
  @rtype: list
631
  @return: the list of nodes, sorted
632
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
633

634
  """
635
  if nodes:
636
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
637

    
638
  return utils.NiceSort(lu.cfg.GetNodeList())
639

    
640

    
641
def _GetWantedInstances(lu, instances):
642
  """Returns list of checked and expanded instance names.
643

644
  @type lu: L{LogicalUnit}
645
  @param lu: the logical unit on whose behalf we execute
646
  @type instances: list
647
  @param instances: list of instance names or None for all instances
648
  @rtype: list
649
  @return: the list of instances, sorted
650
  @raise errors.OpPrereqError: if the instances parameter is wrong type
651
  @raise errors.OpPrereqError: if any of the passed instances is not found
652

653
  """
654
  if instances:
655
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
656
  else:
657
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
658
  return wanted
659

    
660

    
661
def _GetUpdatedParams(old_params, update_dict,
662
                      use_default=True, use_none=False):
663
  """Return the new version of a parameter dictionary.
664

665
  @type old_params: dict
666
  @param old_params: old parameters
667
  @type update_dict: dict
668
  @param update_dict: dict containing new parameter values, or
669
      constants.VALUE_DEFAULT to reset the parameter to its default
670
      value
671
  @param use_default: boolean
672
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
673
      values as 'to be deleted' values
674
  @param use_none: boolean
675
  @type use_none: whether to recognise C{None} values as 'to be
676
      deleted' values
677
  @rtype: dict
678
  @return: the new parameter dictionary
679

680
  """
681
  params_copy = copy.deepcopy(old_params)
682
  for key, val in update_dict.iteritems():
683
    if ((use_default and val == constants.VALUE_DEFAULT) or
684
        (use_none and val is None)):
685
      try:
686
        del params_copy[key]
687
      except KeyError:
688
        pass
689
    else:
690
      params_copy[key] = val
691
  return params_copy
692

    
693

    
694
def _ReleaseLocks(lu, level, names=None, keep=None):
695
  """Releases locks owned by an LU.
696

697
  @type lu: L{LogicalUnit}
698
  @param level: Lock level
699
  @type names: list or None
700
  @param names: Names of locks to release
701
  @type keep: list or None
702
  @param keep: Names of locks to retain
703

704
  """
705
  assert not (keep is not None and names is not None), \
706
         "Only one of the 'names' and the 'keep' parameters can be given"
707

    
708
  if names is not None:
709
    should_release = names.__contains__
710
  elif keep:
711
    should_release = lambda name: name not in keep
712
  else:
713
    should_release = None
714

    
715
  if should_release:
716
    retain = []
717
    release = []
718

    
719
    # Determine which locks to release
720
    for name in lu.owned_locks(level):
721
      if should_release(name):
722
        release.append(name)
723
      else:
724
        retain.append(name)
725

    
726
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
727

    
728
    # Release just some locks
729
    lu.glm.release(level, names=release)
730

    
731
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
732
  else:
733
    # Release everything
734
    lu.glm.release(level)
735

    
736
    assert not lu.glm.is_owned(level), "No locks should be owned"
737

    
738

    
739
def _MapInstanceDisksToNodes(instances):
740
  """Creates a map from (node, volume) to instance name.
741

742
  @type instances: list of L{objects.Instance}
743
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
744

745
  """
746
  return dict(((node, vol), inst.name)
747
              for inst in instances
748
              for (node, vols) in inst.MapLVsByNode().items()
749
              for vol in vols)
750

    
751

    
752
def _RunPostHook(lu, node_name):
753
  """Runs the post-hook for an opcode on a single node.
754

755
  """
756
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
757
  try:
758
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
759
  except:
760
    # pylint: disable=W0702
761
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
762

    
763

    
764
def _CheckOutputFields(static, dynamic, selected):
765
  """Checks whether all selected fields are valid.
766

767
  @type static: L{utils.FieldSet}
768
  @param static: static fields set
769
  @type dynamic: L{utils.FieldSet}
770
  @param dynamic: dynamic fields set
771

772
  """
773
  f = utils.FieldSet()
774
  f.Extend(static)
775
  f.Extend(dynamic)
776

    
777
  delta = f.NonMatching(selected)
778
  if delta:
779
    raise errors.OpPrereqError("Unknown output fields selected: %s"
780
                               % ",".join(delta), errors.ECODE_INVAL)
781

    
782

    
783
def _CheckGlobalHvParams(params):
784
  """Validates that given hypervisor params are not global ones.
785

786
  This will ensure that instances don't get customised versions of
787
  global params.
788

789
  """
790
  used_globals = constants.HVC_GLOBALS.intersection(params)
791
  if used_globals:
792
    msg = ("The following hypervisor parameters are global and cannot"
793
           " be customized at instance level, please modify them at"
794
           " cluster level: %s" % utils.CommaJoin(used_globals))
795
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
796

    
797

    
798
def _CheckNodeOnline(lu, node, msg=None):
799
  """Ensure that a given node is online.
800

801
  @param lu: the LU on behalf of which we make the check
802
  @param node: the node to check
803
  @param msg: if passed, should be a message to replace the default one
804
  @raise errors.OpPrereqError: if the node is offline
805

806
  """
807
  if msg is None:
808
    msg = "Can't use offline node"
809
  if lu.cfg.GetNodeInfo(node).offline:
810
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
811

    
812

    
813
def _CheckNodeNotDrained(lu, node):
814
  """Ensure that a given node is not drained.
815

816
  @param lu: the LU on behalf of which we make the check
817
  @param node: the node to check
818
  @raise errors.OpPrereqError: if the node is drained
819

820
  """
821
  if lu.cfg.GetNodeInfo(node).drained:
822
    raise errors.OpPrereqError("Can't use drained node %s" % node,
823
                               errors.ECODE_STATE)
824

    
825

    
826
def _CheckNodeVmCapable(lu, node):
827
  """Ensure that a given node is vm capable.
828

829
  @param lu: the LU on behalf of which we make the check
830
  @param node: the node to check
831
  @raise errors.OpPrereqError: if the node is not vm capable
832

833
  """
834
  if not lu.cfg.GetNodeInfo(node).vm_capable:
835
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
836
                               errors.ECODE_STATE)
837

    
838

    
839
def _CheckNodeHasOS(lu, node, os_name, force_variant):
840
  """Ensure that a node supports a given OS.
841

842
  @param lu: the LU on behalf of which we make the check
843
  @param node: the node to check
844
  @param os_name: the OS to query about
845
  @param force_variant: whether to ignore variant errors
846
  @raise errors.OpPrereqError: if the node is not supporting the OS
847

848
  """
849
  result = lu.rpc.call_os_get(node, os_name)
850
  result.Raise("OS '%s' not in supported OS list for node %s" %
851
               (os_name, node),
852
               prereq=True, ecode=errors.ECODE_INVAL)
853
  if not force_variant:
854
    _CheckOSVariant(result.payload, os_name)
855

    
856

    
857
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
858
  """Ensure that a node has the given secondary ip.
859

860
  @type lu: L{LogicalUnit}
861
  @param lu: the LU on behalf of which we make the check
862
  @type node: string
863
  @param node: the node to check
864
  @type secondary_ip: string
865
  @param secondary_ip: the ip to check
866
  @type prereq: boolean
867
  @param prereq: whether to throw a prerequisite or an execute error
868
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
869
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
870

871
  """
872
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
873
  result.Raise("Failure checking secondary ip on node %s" % node,
874
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
875
  if not result.payload:
876
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
877
           " please fix and re-run this command" % secondary_ip)
878
    if prereq:
879
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
880
    else:
881
      raise errors.OpExecError(msg)
882

    
883

    
884
def _GetClusterDomainSecret():
885
  """Reads the cluster domain secret.
886

887
  """
888
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
889
                               strict=True)
890

    
891

    
892
def _CheckInstanceDown(lu, instance, reason):
893
  """Ensure that an instance is not running."""
894
  if instance.admin_up:
895
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
896
                               (instance.name, reason), errors.ECODE_STATE)
897

    
898
  pnode = instance.primary_node
899
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
900
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
901
              prereq=True, ecode=errors.ECODE_ENVIRON)
902

    
903
  if instance.name in ins_l.payload:
904
    raise errors.OpPrereqError("Instance %s is running, %s" %
905
                               (instance.name, reason), errors.ECODE_STATE)
906

    
907

    
908
def _ExpandItemName(fn, name, kind):
909
  """Expand an item name.
910

911
  @param fn: the function to use for expansion
912
  @param name: requested item name
913
  @param kind: text description ('Node' or 'Instance')
914
  @return: the resolved (full) name
915
  @raise errors.OpPrereqError: if the item is not found
916

917
  """
918
  full_name = fn(name)
919
  if full_name is None:
920
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
921
                               errors.ECODE_NOENT)
922
  return full_name
923

    
924

    
925
def _ExpandNodeName(cfg, name):
926
  """Wrapper over L{_ExpandItemName} for nodes."""
927
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
928

    
929

    
930
def _ExpandInstanceName(cfg, name):
931
  """Wrapper over L{_ExpandItemName} for instance."""
932
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
933

    
934

    
935
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
936
                          memory, vcpus, nics, disk_template, disks,
937
                          bep, hvp, hypervisor_name, tags):
938
  """Builds instance related env variables for hooks
939

940
  This builds the hook environment from individual variables.
941

942
  @type name: string
943
  @param name: the name of the instance
944
  @type primary_node: string
945
  @param primary_node: the name of the instance's primary node
946
  @type secondary_nodes: list
947
  @param secondary_nodes: list of secondary nodes as strings
948
  @type os_type: string
949
  @param os_type: the name of the instance's OS
950
  @type status: boolean
951
  @param status: the should_run status of the instance
952
  @type memory: string
953
  @param memory: the memory size of the instance
954
  @type vcpus: string
955
  @param vcpus: the count of VCPUs the instance has
956
  @type nics: list
957
  @param nics: list of tuples (ip, mac, mode, link) representing
958
      the NICs the instance has
959
  @type disk_template: string
960
  @param disk_template: the disk template of the instance
961
  @type disks: list
962
  @param disks: the list of (size, mode) pairs
963
  @type bep: dict
964
  @param bep: the backend parameters for the instance
965
  @type hvp: dict
966
  @param hvp: the hypervisor parameters for the instance
967
  @type hypervisor_name: string
968
  @param hypervisor_name: the hypervisor for the instance
969
  @type tags: list
970
  @param tags: list of instance tags as strings
971
  @rtype: dict
972
  @return: the hook environment for this instance
973

974
  """
975
  if status:
976
    str_status = "up"
977
  else:
978
    str_status = "down"
979
  env = {
980
    "OP_TARGET": name,
981
    "INSTANCE_NAME": name,
982
    "INSTANCE_PRIMARY": primary_node,
983
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
984
    "INSTANCE_OS_TYPE": os_type,
985
    "INSTANCE_STATUS": str_status,
986
    "INSTANCE_MEMORY": memory,
987
    "INSTANCE_VCPUS": vcpus,
988
    "INSTANCE_DISK_TEMPLATE": disk_template,
989
    "INSTANCE_HYPERVISOR": hypervisor_name,
990
  }
991

    
992
  if nics:
993
    nic_count = len(nics)
994
    for idx, (ip, mac, mode, link) in enumerate(nics):
995
      if ip is None:
996
        ip = ""
997
      env["INSTANCE_NIC%d_IP" % idx] = ip
998
      env["INSTANCE_NIC%d_MAC" % idx] = mac
999
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1000
      env["INSTANCE_NIC%d_LINK" % idx] = link
1001
      if mode == constants.NIC_MODE_BRIDGED:
1002
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003
  else:
1004
    nic_count = 0
1005

    
1006
  env["INSTANCE_NIC_COUNT"] = nic_count
1007

    
1008
  if disks:
1009
    disk_count = len(disks)
1010
    for idx, (size, mode) in enumerate(disks):
1011
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1012
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1013
  else:
1014
    disk_count = 0
1015

    
1016
  env["INSTANCE_DISK_COUNT"] = disk_count
1017

    
1018
  if not tags:
1019
    tags = []
1020

    
1021
  env["INSTANCE_TAGS"] = " ".join(tags)
1022

    
1023
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024
    for key, value in source.items():
1025
      env["INSTANCE_%s_%s" % (kind, key)] = value
1026

    
1027
  return env
1028

    
1029

    
1030
def _NICListToTuple(lu, nics):
1031
  """Build a list of nic information tuples.
1032

1033
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034
  value in LUInstanceQueryData.
1035

1036
  @type lu:  L{LogicalUnit}
1037
  @param lu: the logical unit on whose behalf we execute
1038
  @type nics: list of L{objects.NIC}
1039
  @param nics: list of nics to convert to hooks tuples
1040

1041
  """
1042
  hooks_nics = []
1043
  cluster = lu.cfg.GetClusterInfo()
1044
  for nic in nics:
1045
    ip = nic.ip
1046
    mac = nic.mac
1047
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048
    mode = filled_params[constants.NIC_MODE]
1049
    link = filled_params[constants.NIC_LINK]
1050
    hooks_nics.append((ip, mac, mode, link))
1051
  return hooks_nics
1052

    
1053

    
1054
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055
  """Builds instance related env variables for hooks from an object.
1056

1057
  @type lu: L{LogicalUnit}
1058
  @param lu: the logical unit on whose behalf we execute
1059
  @type instance: L{objects.Instance}
1060
  @param instance: the instance for which we should build the
1061
      environment
1062
  @type override: dict
1063
  @param override: dictionary with key/values that will override
1064
      our values
1065
  @rtype: dict
1066
  @return: the hook environment dictionary
1067

1068
  """
1069
  cluster = lu.cfg.GetClusterInfo()
1070
  bep = cluster.FillBE(instance)
1071
  hvp = cluster.FillHV(instance)
1072
  args = {
1073
    "name": instance.name,
1074
    "primary_node": instance.primary_node,
1075
    "secondary_nodes": instance.secondary_nodes,
1076
    "os_type": instance.os,
1077
    "status": instance.admin_up,
1078
    "memory": bep[constants.BE_MEMORY],
1079
    "vcpus": bep[constants.BE_VCPUS],
1080
    "nics": _NICListToTuple(lu, instance.nics),
1081
    "disk_template": instance.disk_template,
1082
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083
    "bep": bep,
1084
    "hvp": hvp,
1085
    "hypervisor_name": instance.hypervisor,
1086
    "tags": instance.tags,
1087
  }
1088
  if override:
1089
    args.update(override)
1090
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1091

    
1092

    
1093
def _AdjustCandidatePool(lu, exceptions):
1094
  """Adjust the candidate pool after node operations.
1095

1096
  """
1097
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098
  if mod_list:
1099
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1100
               utils.CommaJoin(node.name for node in mod_list))
1101
    for name in mod_list:
1102
      lu.context.ReaddNode(name)
1103
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104
  if mc_now > mc_max:
1105
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106
               (mc_now, mc_max))
1107

    
1108

    
1109
def _DecideSelfPromotion(lu, exceptions=None):
1110
  """Decide whether I should promote myself as a master candidate.
1111

1112
  """
1113
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115
  # the new node will increase mc_max with one, so:
1116
  mc_should = min(mc_should + 1, cp_size)
1117
  return mc_now < mc_should
1118

    
1119

    
1120
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121
  """Check that the brigdes needed by a list of nics exist.
1122

1123
  """
1124
  cluster = lu.cfg.GetClusterInfo()
1125
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126
  brlist = [params[constants.NIC_LINK] for params in paramslist
1127
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128
  if brlist:
1129
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1130
    result.Raise("Error checking bridges on destination node '%s'" %
1131
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132

    
1133

    
1134
def _CheckInstanceBridgesExist(lu, instance, node=None):
1135
  """Check that the brigdes needed by an instance exist.
1136

1137
  """
1138
  if node is None:
1139
    node = instance.primary_node
1140
  _CheckNicsBridgesExist(lu, instance.nics, node)
1141

    
1142

    
1143
def _CheckOSVariant(os_obj, name):
1144
  """Check whether an OS name conforms to the os variants specification.
1145

1146
  @type os_obj: L{objects.OS}
1147
  @param os_obj: OS object to check
1148
  @type name: string
1149
  @param name: OS name passed by the user, to check for validity
1150

1151
  """
1152
  variant = objects.OS.GetVariant(name)
1153
  if not os_obj.supported_variants:
1154
    if variant:
1155
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156
                                 " passed)" % (os_obj.name, variant),
1157
                                 errors.ECODE_INVAL)
1158
    return
1159
  if not variant:
1160
    raise errors.OpPrereqError("OS name must include a variant",
1161
                               errors.ECODE_INVAL)
1162

    
1163
  if variant not in os_obj.supported_variants:
1164
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165

    
1166

    
1167
def _GetNodeInstancesInner(cfg, fn):
1168
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169

    
1170

    
1171
def _GetNodeInstances(cfg, node_name):
1172
  """Returns a list of all primary and secondary instances on a node.
1173

1174
  """
1175

    
1176
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177

    
1178

    
1179
def _GetNodePrimaryInstances(cfg, node_name):
1180
  """Returns primary instances on a node.
1181

1182
  """
1183
  return _GetNodeInstancesInner(cfg,
1184
                                lambda inst: node_name == inst.primary_node)
1185

    
1186

    
1187
def _GetNodeSecondaryInstances(cfg, node_name):
1188
  """Returns secondary instances on a node.
1189

1190
  """
1191
  return _GetNodeInstancesInner(cfg,
1192
                                lambda inst: node_name in inst.secondary_nodes)
1193

    
1194

    
1195
def _GetStorageTypeArgs(cfg, storage_type):
1196
  """Returns the arguments for a storage type.
1197

1198
  """
1199
  # Special case for file storage
1200
  if storage_type == constants.ST_FILE:
1201
    # storage.FileStorage wants a list of storage directories
1202
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203

    
1204
  return []
1205

    
1206

    
1207
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208
  faulty = []
1209

    
1210
  for dev in instance.disks:
1211
    cfg.SetDiskID(dev, node_name)
1212

    
1213
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214
  result.Raise("Failed to get disk status from node %s" % node_name,
1215
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216

    
1217
  for idx, bdev_status in enumerate(result.payload):
1218
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219
      faulty.append(idx)
1220

    
1221
  return faulty
1222

    
1223

    
1224
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225
  """Check the sanity of iallocator and node arguments and use the
1226
  cluster-wide iallocator if appropriate.
1227

1228
  Check that at most one of (iallocator, node) is specified. If none is
1229
  specified, then the LU's opcode's iallocator slot is filled with the
1230
  cluster-wide default iallocator.
1231

1232
  @type iallocator_slot: string
1233
  @param iallocator_slot: the name of the opcode iallocator slot
1234
  @type node_slot: string
1235
  @param node_slot: the name of the opcode target node slot
1236

1237
  """
1238
  node = getattr(lu.op, node_slot, None)
1239
  iallocator = getattr(lu.op, iallocator_slot, None)
1240

    
1241
  if node is not None and iallocator is not None:
1242
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243
                               errors.ECODE_INVAL)
1244
  elif node is None and iallocator is None:
1245
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1246
    if default_iallocator:
1247
      setattr(lu.op, iallocator_slot, default_iallocator)
1248
    else:
1249
      raise errors.OpPrereqError("No iallocator or node given and no"
1250
                                 " cluster-wide default iallocator found;"
1251
                                 " please specify either an iallocator or a"
1252
                                 " node, or set a cluster-wide default"
1253
                                 " iallocator")
1254

    
1255

    
1256
def _GetDefaultIAllocator(cfg, iallocator):
1257
  """Decides on which iallocator to use.
1258

1259
  @type cfg: L{config.ConfigWriter}
1260
  @param cfg: Cluster configuration object
1261
  @type iallocator: string or None
1262
  @param iallocator: Iallocator specified in opcode
1263
  @rtype: string
1264
  @return: Iallocator name
1265

1266
  """
1267
  if not iallocator:
1268
    # Use default iallocator
1269
    iallocator = cfg.GetDefaultIAllocator()
1270

    
1271
  if not iallocator:
1272
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273
                               " opcode nor as a cluster-wide default",
1274
                               errors.ECODE_INVAL)
1275

    
1276
  return iallocator
1277

    
1278

    
1279
class LUClusterPostInit(LogicalUnit):
1280
  """Logical unit for running hooks after cluster initialization.
1281

1282
  """
1283
  HPATH = "cluster-init"
1284
  HTYPE = constants.HTYPE_CLUSTER
1285

    
1286
  def BuildHooksEnv(self):
1287
    """Build hooks env.
1288

1289
    """
1290
    return {
1291
      "OP_TARGET": self.cfg.GetClusterName(),
1292
      }
1293

    
1294
  def BuildHooksNodes(self):
1295
    """Build hooks nodes.
1296

1297
    """
1298
    return ([], [self.cfg.GetMasterNode()])
1299

    
1300
  def Exec(self, feedback_fn):
1301
    """Nothing to do.
1302

1303
    """
1304
    return True
1305

    
1306

    
1307
class LUClusterDestroy(LogicalUnit):
1308
  """Logical unit for destroying the cluster.
1309

1310
  """
1311
  HPATH = "cluster-destroy"
1312
  HTYPE = constants.HTYPE_CLUSTER
1313

    
1314
  def BuildHooksEnv(self):
1315
    """Build hooks env.
1316

1317
    """
1318
    return {
1319
      "OP_TARGET": self.cfg.GetClusterName(),
1320
      }
1321

    
1322
  def BuildHooksNodes(self):
1323
    """Build hooks nodes.
1324

1325
    """
1326
    return ([], [])
1327

    
1328
  def CheckPrereq(self):
1329
    """Check prerequisites.
1330

1331
    This checks whether the cluster is empty.
1332

1333
    Any errors are signaled by raising errors.OpPrereqError.
1334

1335
    """
1336
    master = self.cfg.GetMasterNode()
1337

    
1338
    nodelist = self.cfg.GetNodeList()
1339
    if len(nodelist) != 1 or nodelist[0] != master:
1340
      raise errors.OpPrereqError("There are still %d node(s) in"
1341
                                 " this cluster." % (len(nodelist) - 1),
1342
                                 errors.ECODE_INVAL)
1343
    instancelist = self.cfg.GetInstanceList()
1344
    if instancelist:
1345
      raise errors.OpPrereqError("There are still %d instance(s) in"
1346
                                 " this cluster." % len(instancelist),
1347
                                 errors.ECODE_INVAL)
1348

    
1349
  def Exec(self, feedback_fn):
1350
    """Destroys the cluster.
1351

1352
    """
1353
    master = self.cfg.GetMasterNode()
1354

    
1355
    # Run post hooks on master node before it's removed
1356
    _RunPostHook(self, master)
1357

    
1358
    result = self.rpc.call_node_deactivate_master_ip(master)
1359
    result.Raise("Could not disable the master role")
1360

    
1361
    return master
1362

    
1363

    
1364
def _VerifyCertificate(filename):
1365
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1366

1367
  @type filename: string
1368
  @param filename: Path to PEM file
1369

1370
  """
1371
  try:
1372
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373
                                           utils.ReadFile(filename))
1374
  except Exception, err: # pylint: disable=W0703
1375
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1376
            "Failed to load X509 certificate %s: %s" % (filename, err))
1377

    
1378
  (errcode, msg) = \
1379
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380
                                constants.SSL_CERT_EXPIRATION_ERROR)
1381

    
1382
  if msg:
1383
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1384
  else:
1385
    fnamemsg = None
1386

    
1387
  if errcode is None:
1388
    return (None, fnamemsg)
1389
  elif errcode == utils.CERT_WARNING:
1390
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391
  elif errcode == utils.CERT_ERROR:
1392
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393

    
1394
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395

    
1396

    
1397
def _GetAllHypervisorParameters(cluster, instances):
1398
  """Compute the set of all hypervisor parameters.
1399

1400
  @type cluster: L{objects.Cluster}
1401
  @param cluster: the cluster object
1402
  @param instances: list of L{objects.Instance}
1403
  @param instances: additional instances from which to obtain parameters
1404
  @rtype: list of (origin, hypervisor, parameters)
1405
  @return: a list with all parameters found, indicating the hypervisor they
1406
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407

1408
  """
1409
  hvp_data = []
1410

    
1411
  for hv_name in cluster.enabled_hypervisors:
1412
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413

    
1414
  for os_name, os_hvp in cluster.os_hvp.items():
1415
    for hv_name, hv_params in os_hvp.items():
1416
      if hv_params:
1417
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419

    
1420
  # TODO: collapse identical parameter values in a single one
1421
  for instance in instances:
1422
    if instance.hvparams:
1423
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424
                       cluster.FillHV(instance)))
1425

    
1426
  return hvp_data
1427

    
1428

    
1429
class _VerifyErrors(object):
1430
  """Mix-in for cluster/group verify LUs.
1431

1432
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433
  self.op and self._feedback_fn to be available.)
1434

1435
  """
1436

    
1437
  ETYPE_FIELD = "code"
1438
  ETYPE_ERROR = "ERROR"
1439
  ETYPE_WARNING = "WARNING"
1440

    
1441
  def _Error(self, ecode, item, msg, *args, **kwargs):
1442
    """Format an error message.
1443

1444
    Based on the opcode's error_codes parameter, either format a
1445
    parseable error code, or a simpler error string.
1446

1447
    This must be called only from Exec and functions called from Exec.
1448

1449
    """
1450
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1451
    itype, etxt = ecode
1452
    # first complete the msg
1453
    if args:
1454
      msg = msg % args
1455
    # then format the whole message
1456
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1457
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1458
    else:
1459
      if item:
1460
        item = " " + item
1461
      else:
1462
        item = ""
1463
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1464
    # and finally report it via the feedback_fn
1465
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1466

    
1467
  def _ErrorIf(self, cond, *args, **kwargs):
1468
    """Log an error message if the passed condition is True.
1469

1470
    """
1471
    cond = (bool(cond)
1472
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1473
    if cond:
1474
      self._Error(*args, **kwargs)
1475
    # do not mark the operation as failed for WARN cases only
1476
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1477
      self.bad = self.bad or cond
1478

    
1479

    
1480
class LUClusterVerify(NoHooksLU):
1481
  """Submits all jobs necessary to verify the cluster.
1482

1483
  """
1484
  REQ_BGL = False
1485

    
1486
  def ExpandNames(self):
1487
    self.needed_locks = {}
1488

    
1489
  def Exec(self, feedback_fn):
1490
    jobs = []
1491

    
1492
    if self.op.group_name:
1493
      groups = [self.op.group_name]
1494
      depends_fn = lambda: None
1495
    else:
1496
      groups = self.cfg.GetNodeGroupList()
1497

    
1498
      # Verify global configuration
1499
      jobs.append([
1500
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1501
        ])
1502

    
1503
      # Always depend on global verification
1504
      depends_fn = lambda: [(-len(jobs), [])]
1505

    
1506
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1507
                                            ignore_errors=self.op.ignore_errors,
1508
                                            depends=depends_fn())]
1509
                for group in groups)
1510

    
1511
    # Fix up all parameters
1512
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1513
      op.debug_simulate_errors = self.op.debug_simulate_errors
1514
      op.verbose = self.op.verbose
1515
      op.error_codes = self.op.error_codes
1516
      try:
1517
        op.skip_checks = self.op.skip_checks
1518
      except AttributeError:
1519
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1520

    
1521
    return ResultWithJobs(jobs)
1522

    
1523

    
1524
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1525
  """Verifies the cluster config.
1526

1527
  """
1528
  REQ_BGL = True
1529

    
1530
  def _VerifyHVP(self, hvp_data):
1531
    """Verifies locally the syntax of the hypervisor parameters.
1532

1533
    """
1534
    for item, hv_name, hv_params in hvp_data:
1535
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1536
             (item, hv_name))
1537
      try:
1538
        hv_class = hypervisor.GetHypervisor(hv_name)
1539
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1540
        hv_class.CheckParameterSyntax(hv_params)
1541
      except errors.GenericError, err:
1542
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1543

    
1544
  def ExpandNames(self):
1545
    # Information can be safely retrieved as the BGL is acquired in exclusive
1546
    # mode
1547
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1548
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1549
    self.all_node_info = self.cfg.GetAllNodesInfo()
1550
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1551
    self.needed_locks = {}
1552

    
1553
  def Exec(self, feedback_fn):
1554
    """Verify integrity of cluster, performing various test on nodes.
1555

1556
    """
1557
    self.bad = False
1558
    self._feedback_fn = feedback_fn
1559

    
1560
    feedback_fn("* Verifying cluster config")
1561

    
1562
    for msg in self.cfg.VerifyConfig():
1563
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1564

    
1565
    feedback_fn("* Verifying cluster certificate files")
1566

    
1567
    for cert_filename in constants.ALL_CERT_FILES:
1568
      (errcode, msg) = _VerifyCertificate(cert_filename)
1569
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1570

    
1571
    feedback_fn("* Verifying hypervisor parameters")
1572

    
1573
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1574
                                                self.all_inst_info.values()))
1575

    
1576
    feedback_fn("* Verifying all nodes belong to an existing group")
1577

    
1578
    # We do this verification here because, should this bogus circumstance
1579
    # occur, it would never be caught by VerifyGroup, which only acts on
1580
    # nodes/instances reachable from existing node groups.
1581

    
1582
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1583
                         if node.group not in self.all_group_info)
1584

    
1585
    dangling_instances = {}
1586
    no_node_instances = []
1587

    
1588
    for inst in self.all_inst_info.values():
1589
      if inst.primary_node in dangling_nodes:
1590
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1591
      elif inst.primary_node not in self.all_node_info:
1592
        no_node_instances.append(inst.name)
1593

    
1594
    pretty_dangling = [
1595
        "%s (%s)" %
1596
        (node.name,
1597
         utils.CommaJoin(dangling_instances.get(node.name,
1598
                                                ["no instances"])))
1599
        for node in dangling_nodes]
1600

    
1601
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1602
                  None,
1603
                  "the following nodes (and their instances) belong to a non"
1604
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1605

    
1606
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1607
                  None,
1608
                  "the following instances have a non-existing primary-node:"
1609
                  " %s", utils.CommaJoin(no_node_instances))
1610

    
1611
    return not self.bad
1612

    
1613

    
1614
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1615
  """Verifies the status of a node group.
1616

1617
  """
1618
  HPATH = "cluster-verify"
1619
  HTYPE = constants.HTYPE_CLUSTER
1620
  REQ_BGL = False
1621

    
1622
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1623

    
1624
  class NodeImage(object):
1625
    """A class representing the logical and physical status of a node.
1626

1627
    @type name: string
1628
    @ivar name: the node name to which this object refers
1629
    @ivar volumes: a structure as returned from
1630
        L{ganeti.backend.GetVolumeList} (runtime)
1631
    @ivar instances: a list of running instances (runtime)
1632
    @ivar pinst: list of configured primary instances (config)
1633
    @ivar sinst: list of configured secondary instances (config)
1634
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1635
        instances for which this node is secondary (config)
1636
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1637
    @ivar dfree: free disk, as reported by the node (runtime)
1638
    @ivar offline: the offline status (config)
1639
    @type rpc_fail: boolean
1640
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1641
        not whether the individual keys were correct) (runtime)
1642
    @type lvm_fail: boolean
1643
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1644
    @type hyp_fail: boolean
1645
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1646
    @type ghost: boolean
1647
    @ivar ghost: whether this is a known node or not (config)
1648
    @type os_fail: boolean
1649
    @ivar os_fail: whether the RPC call didn't return valid OS data
1650
    @type oslist: list
1651
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1652
    @type vm_capable: boolean
1653
    @ivar vm_capable: whether the node can host instances
1654

1655
    """
1656
    def __init__(self, offline=False, name=None, vm_capable=True):
1657
      self.name = name
1658
      self.volumes = {}
1659
      self.instances = []
1660
      self.pinst = []
1661
      self.sinst = []
1662
      self.sbp = {}
1663
      self.mfree = 0
1664
      self.dfree = 0
1665
      self.offline = offline
1666
      self.vm_capable = vm_capable
1667
      self.rpc_fail = False
1668
      self.lvm_fail = False
1669
      self.hyp_fail = False
1670
      self.ghost = False
1671
      self.os_fail = False
1672
      self.oslist = {}
1673

    
1674
  def ExpandNames(self):
1675
    # This raises errors.OpPrereqError on its own:
1676
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1677

    
1678
    # Get instances in node group; this is unsafe and needs verification later
1679
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1680

    
1681
    self.needed_locks = {
1682
      locking.LEVEL_INSTANCE: inst_names,
1683
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1684
      locking.LEVEL_NODE: [],
1685
      }
1686

    
1687
    self.share_locks = _ShareAll()
1688

    
1689
  def DeclareLocks(self, level):
1690
    if level == locking.LEVEL_NODE:
1691
      # Get members of node group; this is unsafe and needs verification later
1692
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1693

    
1694
      all_inst_info = self.cfg.GetAllInstancesInfo()
1695

    
1696
      # In Exec(), we warn about mirrored instances that have primary and
1697
      # secondary living in separate node groups. To fully verify that
1698
      # volumes for these instances are healthy, we will need to do an
1699
      # extra call to their secondaries. We ensure here those nodes will
1700
      # be locked.
1701
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1702
        # Important: access only the instances whose lock is owned
1703
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1704
          nodes.update(all_inst_info[inst].secondary_nodes)
1705

    
1706
      self.needed_locks[locking.LEVEL_NODE] = nodes
1707

    
1708
  def CheckPrereq(self):
1709
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1710
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1711

    
1712
    group_nodes = set(self.group_info.members)
1713
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1714

    
1715
    unlocked_nodes = \
1716
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1717

    
1718
    unlocked_instances = \
1719
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1720

    
1721
    if unlocked_nodes:
1722
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1723
                                 utils.CommaJoin(unlocked_nodes))
1724

    
1725
    if unlocked_instances:
1726
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1727
                                 utils.CommaJoin(unlocked_instances))
1728

    
1729
    self.all_node_info = self.cfg.GetAllNodesInfo()
1730
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1731

    
1732
    self.my_node_names = utils.NiceSort(group_nodes)
1733
    self.my_inst_names = utils.NiceSort(group_instances)
1734

    
1735
    self.my_node_info = dict((name, self.all_node_info[name])
1736
                             for name in self.my_node_names)
1737

    
1738
    self.my_inst_info = dict((name, self.all_inst_info[name])
1739
                             for name in self.my_inst_names)
1740

    
1741
    # We detect here the nodes that will need the extra RPC calls for verifying
1742
    # split LV volumes; they should be locked.
1743
    extra_lv_nodes = set()
1744

    
1745
    for inst in self.my_inst_info.values():
1746
      if inst.disk_template in constants.DTS_INT_MIRROR:
1747
        group = self.my_node_info[inst.primary_node].group
1748
        for nname in inst.secondary_nodes:
1749
          if self.all_node_info[nname].group != group:
1750
            extra_lv_nodes.add(nname)
1751

    
1752
    unlocked_lv_nodes = \
1753
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1754

    
1755
    if unlocked_lv_nodes:
1756
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1757
                                 utils.CommaJoin(unlocked_lv_nodes))
1758
    self.extra_lv_nodes = list(extra_lv_nodes)
1759

    
1760
  def _VerifyNode(self, ninfo, nresult):
1761
    """Perform some basic validation on data returned from a node.
1762

1763
      - check the result data structure is well formed and has all the
1764
        mandatory fields
1765
      - check ganeti version
1766

1767
    @type ninfo: L{objects.Node}
1768
    @param ninfo: the node to check
1769
    @param nresult: the results from the node
1770
    @rtype: boolean
1771
    @return: whether overall this call was successful (and we can expect
1772
         reasonable values in the respose)
1773

1774
    """
1775
    node = ninfo.name
1776
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1777

    
1778
    # main result, nresult should be a non-empty dict
1779
    test = not nresult or not isinstance(nresult, dict)
1780
    _ErrorIf(test, constants.CV_ENODERPC, node,
1781
                  "unable to verify node: no data returned")
1782
    if test:
1783
      return False
1784

    
1785
    # compares ganeti version
1786
    local_version = constants.PROTOCOL_VERSION
1787
    remote_version = nresult.get("version", None)
1788
    test = not (remote_version and
1789
                isinstance(remote_version, (list, tuple)) and
1790
                len(remote_version) == 2)
1791
    _ErrorIf(test, constants.CV_ENODERPC, node,
1792
             "connection to node returned invalid data")
1793
    if test:
1794
      return False
1795

    
1796
    test = local_version != remote_version[0]
1797
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1798
             "incompatible protocol versions: master %s,"
1799
             " node %s", local_version, remote_version[0])
1800
    if test:
1801
      return False
1802

    
1803
    # node seems compatible, we can actually try to look into its results
1804

    
1805
    # full package version
1806
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1807
                  constants.CV_ENODEVERSION, node,
1808
                  "software version mismatch: master %s, node %s",
1809
                  constants.RELEASE_VERSION, remote_version[1],
1810
                  code=self.ETYPE_WARNING)
1811

    
1812
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1813
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1814
      for hv_name, hv_result in hyp_result.iteritems():
1815
        test = hv_result is not None
1816
        _ErrorIf(test, constants.CV_ENODEHV, node,
1817
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1818

    
1819
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1820
    if ninfo.vm_capable and isinstance(hvp_result, list):
1821
      for item, hv_name, hv_result in hvp_result:
1822
        _ErrorIf(True, constants.CV_ENODEHV, node,
1823
                 "hypervisor %s parameter verify failure (source %s): %s",
1824
                 hv_name, item, hv_result)
1825

    
1826
    test = nresult.get(constants.NV_NODESETUP,
1827
                       ["Missing NODESETUP results"])
1828
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1829
             "; ".join(test))
1830

    
1831
    return True
1832

    
1833
  def _VerifyNodeTime(self, ninfo, nresult,
1834
                      nvinfo_starttime, nvinfo_endtime):
1835
    """Check the node time.
1836

1837
    @type ninfo: L{objects.Node}
1838
    @param ninfo: the node to check
1839
    @param nresult: the remote results for the node
1840
    @param nvinfo_starttime: the start time of the RPC call
1841
    @param nvinfo_endtime: the end time of the RPC call
1842

1843
    """
1844
    node = ninfo.name
1845
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1846

    
1847
    ntime = nresult.get(constants.NV_TIME, None)
1848
    try:
1849
      ntime_merged = utils.MergeTime(ntime)
1850
    except (ValueError, TypeError):
1851
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1852
      return
1853

    
1854
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1855
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1856
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1857
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1858
    else:
1859
      ntime_diff = None
1860

    
1861
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1862
             "Node time diverges by at least %s from master node time",
1863
             ntime_diff)
1864

    
1865
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1866
    """Check the node LVM results.
1867

1868
    @type ninfo: L{objects.Node}
1869
    @param ninfo: the node to check
1870
    @param nresult: the remote results for the node
1871
    @param vg_name: the configured VG name
1872

1873
    """
1874
    if vg_name is None:
1875
      return
1876

    
1877
    node = ninfo.name
1878
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1879

    
1880
    # checks vg existence and size > 20G
1881
    vglist = nresult.get(constants.NV_VGLIST, None)
1882
    test = not vglist
1883
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1884
    if not test:
1885
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1886
                                            constants.MIN_VG_SIZE)
1887
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1888

    
1889
    # check pv names
1890
    pvlist = nresult.get(constants.NV_PVLIST, None)
1891
    test = pvlist is None
1892
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1893
    if not test:
1894
      # check that ':' is not present in PV names, since it's a
1895
      # special character for lvcreate (denotes the range of PEs to
1896
      # use on the PV)
1897
      for _, pvname, owner_vg in pvlist:
1898
        test = ":" in pvname
1899
        _ErrorIf(test, constants.CV_ENODELVM, node,
1900
                 "Invalid character ':' in PV '%s' of VG '%s'",
1901
                 pvname, owner_vg)
1902

    
1903
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1904
    """Check the node bridges.
1905

1906
    @type ninfo: L{objects.Node}
1907
    @param ninfo: the node to check
1908
    @param nresult: the remote results for the node
1909
    @param bridges: the expected list of bridges
1910

1911
    """
1912
    if not bridges:
1913
      return
1914

    
1915
    node = ninfo.name
1916
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1917

    
1918
    missing = nresult.get(constants.NV_BRIDGES, None)
1919
    test = not isinstance(missing, list)
1920
    _ErrorIf(test, constants.CV_ENODENET, node,
1921
             "did not return valid bridge information")
1922
    if not test:
1923
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1924
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1925

    
1926
  def _VerifyNodeNetwork(self, ninfo, nresult):
1927
    """Check the node network connectivity results.
1928

1929
    @type ninfo: L{objects.Node}
1930
    @param ninfo: the node to check
1931
    @param nresult: the remote results for the node
1932

1933
    """
1934
    node = ninfo.name
1935
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1936

    
1937
    test = constants.NV_NODELIST not in nresult
1938
    _ErrorIf(test, constants.CV_ENODESSH, node,
1939
             "node hasn't returned node ssh connectivity data")
1940
    if not test:
1941
      if nresult[constants.NV_NODELIST]:
1942
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1943
          _ErrorIf(True, constants.CV_ENODESSH, node,
1944
                   "ssh communication with node '%s': %s", a_node, a_msg)
1945

    
1946
    test = constants.NV_NODENETTEST not in nresult
1947
    _ErrorIf(test, constants.CV_ENODENET, node,
1948
             "node hasn't returned node tcp connectivity data")
1949
    if not test:
1950
      if nresult[constants.NV_NODENETTEST]:
1951
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1952
        for anode in nlist:
1953
          _ErrorIf(True, constants.CV_ENODENET, node,
1954
                   "tcp communication with node '%s': %s",
1955
                   anode, nresult[constants.NV_NODENETTEST][anode])
1956

    
1957
    test = constants.NV_MASTERIP not in nresult
1958
    _ErrorIf(test, constants.CV_ENODENET, node,
1959
             "node hasn't returned node master IP reachability data")
1960
    if not test:
1961
      if not nresult[constants.NV_MASTERIP]:
1962
        if node == self.master_node:
1963
          msg = "the master node cannot reach the master IP (not configured?)"
1964
        else:
1965
          msg = "cannot reach the master IP"
1966
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
1967

    
1968
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1969
                      diskstatus):
1970
    """Verify an instance.
1971

1972
    This function checks to see if the required block devices are
1973
    available on the instance's node.
1974

1975
    """
1976
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1977
    node_current = instanceconfig.primary_node
1978

    
1979
    node_vol_should = {}
1980
    instanceconfig.MapLVsByNode(node_vol_should)
1981

    
1982
    for node in node_vol_should:
1983
      n_img = node_image[node]
1984
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1985
        # ignore missing volumes on offline or broken nodes
1986
        continue
1987
      for volume in node_vol_should[node]:
1988
        test = volume not in n_img.volumes
1989
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
1990
                 "volume %s missing on node %s", volume, node)
1991

    
1992
    if instanceconfig.admin_up:
1993
      pri_img = node_image[node_current]
1994
      test = instance not in pri_img.instances and not pri_img.offline
1995
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
1996
               "instance not running on its primary node %s",
1997
               node_current)
1998

    
1999
    diskdata = [(nname, success, status, idx)
2000
                for (nname, disks) in diskstatus.items()
2001
                for idx, (success, status) in enumerate(disks)]
2002

    
2003
    for nname, success, bdev_status, idx in diskdata:
2004
      # the 'ghost node' construction in Exec() ensures that we have a
2005
      # node here
2006
      snode = node_image[nname]
2007
      bad_snode = snode.ghost or snode.offline
2008
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2009
               constants.CV_EINSTANCEFAULTYDISK, instance,
2010
               "couldn't retrieve status for disk/%s on %s: %s",
2011
               idx, nname, bdev_status)
2012
      _ErrorIf((instanceconfig.admin_up and success and
2013
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2014
               constants.CV_EINSTANCEFAULTYDISK, instance,
2015
               "disk/%s on %s is faulty", idx, nname)
2016

    
2017
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2018
    """Verify if there are any unknown volumes in the cluster.
2019

2020
    The .os, .swap and backup volumes are ignored. All other volumes are
2021
    reported as unknown.
2022

2023
    @type reserved: L{ganeti.utils.FieldSet}
2024
    @param reserved: a FieldSet of reserved volume names
2025

2026
    """
2027
    for node, n_img in node_image.items():
2028
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2029
        # skip non-healthy nodes
2030
        continue
2031
      for volume in n_img.volumes:
2032
        test = ((node not in node_vol_should or
2033
                volume not in node_vol_should[node]) and
2034
                not reserved.Matches(volume))
2035
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2036
                      "volume %s is unknown", volume)
2037

    
2038
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2039
    """Verify N+1 Memory Resilience.
2040

2041
    Check that if one single node dies we can still start all the
2042
    instances it was primary for.
2043

2044
    """
2045
    cluster_info = self.cfg.GetClusterInfo()
2046
    for node, n_img in node_image.items():
2047
      # This code checks that every node which is now listed as
2048
      # secondary has enough memory to host all instances it is
2049
      # supposed to should a single other node in the cluster fail.
2050
      # FIXME: not ready for failover to an arbitrary node
2051
      # FIXME: does not support file-backed instances
2052
      # WARNING: we currently take into account down instances as well
2053
      # as up ones, considering that even if they're down someone
2054
      # might want to start them even in the event of a node failure.
2055
      if n_img.offline:
2056
        # we're skipping offline nodes from the N+1 warning, since
2057
        # most likely we don't have good memory infromation from them;
2058
        # we already list instances living on such nodes, and that's
2059
        # enough warning
2060
        continue
2061
      for prinode, instances in n_img.sbp.items():
2062
        needed_mem = 0
2063
        for instance in instances:
2064
          bep = cluster_info.FillBE(instance_cfg[instance])
2065
          if bep[constants.BE_AUTO_BALANCE]:
2066
            needed_mem += bep[constants.BE_MEMORY]
2067
        test = n_img.mfree < needed_mem
2068
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2069
                      "not enough memory to accomodate instance failovers"
2070
                      " should node %s fail (%dMiB needed, %dMiB available)",
2071
                      prinode, needed_mem, n_img.mfree)
2072

    
2073
  @classmethod
2074
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2075
                   (files_all, files_all_opt, files_mc, files_vm)):
2076
    """Verifies file checksums collected from all nodes.
2077

2078
    @param errorif: Callback for reporting errors
2079
    @param nodeinfo: List of L{objects.Node} objects
2080
    @param master_node: Name of master node
2081
    @param all_nvinfo: RPC results
2082

2083
    """
2084
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2085
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2086
           "Found file listed in more than one file list"
2087

    
2088
    # Define functions determining which nodes to consider for a file
2089
    files2nodefn = [
2090
      (files_all, None),
2091
      (files_all_opt, None),
2092
      (files_mc, lambda node: (node.master_candidate or
2093
                               node.name == master_node)),
2094
      (files_vm, lambda node: node.vm_capable),
2095
      ]
2096

    
2097
    # Build mapping from filename to list of nodes which should have the file
2098
    nodefiles = {}
2099
    for (files, fn) in files2nodefn:
2100
      if fn is None:
2101
        filenodes = nodeinfo
2102
      else:
2103
        filenodes = filter(fn, nodeinfo)
2104
      nodefiles.update((filename,
2105
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2106
                       for filename in files)
2107

    
2108
    assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm)
2109

    
2110
    fileinfo = dict((filename, {}) for filename in nodefiles)
2111
    ignore_nodes = set()
2112

    
2113
    for node in nodeinfo:
2114
      if node.offline:
2115
        ignore_nodes.add(node.name)
2116
        continue
2117

    
2118
      nresult = all_nvinfo[node.name]
2119

    
2120
      if nresult.fail_msg or not nresult.payload:
2121
        node_files = None
2122
      else:
2123
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2124

    
2125
      test = not (node_files and isinstance(node_files, dict))
2126
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2127
              "Node did not return file checksum data")
2128
      if test:
2129
        ignore_nodes.add(node.name)
2130
        continue
2131

    
2132
      # Build per-checksum mapping from filename to nodes having it
2133
      for (filename, checksum) in node_files.items():
2134
        assert filename in nodefiles
2135
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2136

    
2137
    for (filename, checksums) in fileinfo.items():
2138
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2139

    
2140
      # Nodes having the file
2141
      with_file = frozenset(node_name
2142
                            for nodes in fileinfo[filename].values()
2143
                            for node_name in nodes) - ignore_nodes
2144

    
2145
      expected_nodes = nodefiles[filename] - ignore_nodes
2146

    
2147
      # Nodes missing file
2148
      missing_file = expected_nodes - with_file
2149

    
2150
      if filename in files_all_opt:
2151
        # All or no nodes
2152
        errorif(missing_file and missing_file != expected_nodes,
2153
                constants.CV_ECLUSTERFILECHECK, None,
2154
                "File %s is optional, but it must exist on all or no"
2155
                " nodes (not found on %s)",
2156
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2157
      else:
2158
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2159
                "File %s is missing from node(s) %s", filename,
2160
                utils.CommaJoin(utils.NiceSort(missing_file)))
2161

    
2162
        # Warn if a node has a file it shouldn't
2163
        unexpected = with_file - expected_nodes
2164
        errorif(unexpected,
2165
                constants.CV_ECLUSTERFILECHECK, None,
2166
                "File %s should not exist on node(s) %s",
2167
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2168

    
2169
      # See if there are multiple versions of the file
2170
      test = len(checksums) > 1
2171
      if test:
2172
        variants = ["variant %s on %s" %
2173
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2174
                    for (idx, (checksum, nodes)) in
2175
                      enumerate(sorted(checksums.items()))]
2176
      else:
2177
        variants = []
2178

    
2179
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2180
              "File %s found with %s different checksums (%s)",
2181
              filename, len(checksums), "; ".join(variants))
2182

    
2183
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2184
                      drbd_map):
2185
    """Verifies and the node DRBD status.
2186

2187
    @type ninfo: L{objects.Node}
2188
    @param ninfo: the node to check
2189
    @param nresult: the remote results for the node
2190
    @param instanceinfo: the dict of instances
2191
    @param drbd_helper: the configured DRBD usermode helper
2192
    @param drbd_map: the DRBD map as returned by
2193
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2194

2195
    """
2196
    node = ninfo.name
2197
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2198

    
2199
    if drbd_helper:
2200
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2201
      test = (helper_result == None)
2202
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2203
               "no drbd usermode helper returned")
2204
      if helper_result:
2205
        status, payload = helper_result
2206
        test = not status
2207
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2208
                 "drbd usermode helper check unsuccessful: %s", payload)
2209
        test = status and (payload != drbd_helper)
2210
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2211
                 "wrong drbd usermode helper: %s", payload)
2212

    
2213
    # compute the DRBD minors
2214
    node_drbd = {}
2215
    for minor, instance in drbd_map[node].items():
2216
      test = instance not in instanceinfo
2217
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2218
               "ghost instance '%s' in temporary DRBD map", instance)
2219
        # ghost instance should not be running, but otherwise we
2220
        # don't give double warnings (both ghost instance and
2221
        # unallocated minor in use)
2222
      if test:
2223
        node_drbd[minor] = (instance, False)
2224
      else:
2225
        instance = instanceinfo[instance]
2226
        node_drbd[minor] = (instance.name, instance.admin_up)
2227

    
2228
    # and now check them
2229
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2230
    test = not isinstance(used_minors, (tuple, list))
2231
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2232
             "cannot parse drbd status file: %s", str(used_minors))
2233
    if test:
2234
      # we cannot check drbd status
2235
      return
2236

    
2237
    for minor, (iname, must_exist) in node_drbd.items():
2238
      test = minor not in used_minors and must_exist
2239
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2240
               "drbd minor %d of instance %s is not active", minor, iname)
2241
    for minor in used_minors:
2242
      test = minor not in node_drbd
2243
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2244
               "unallocated drbd minor %d is in use", minor)
2245

    
2246
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2247
    """Builds the node OS structures.
2248

2249
    @type ninfo: L{objects.Node}
2250
    @param ninfo: the node to check
2251
    @param nresult: the remote results for the node
2252
    @param nimg: the node image object
2253

2254
    """
2255
    node = ninfo.name
2256
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2257

    
2258
    remote_os = nresult.get(constants.NV_OSLIST, None)
2259
    test = (not isinstance(remote_os, list) or
2260
            not compat.all(isinstance(v, list) and len(v) == 7
2261
                           for v in remote_os))
2262

    
2263
    _ErrorIf(test, constants.CV_ENODEOS, node,
2264
             "node hasn't returned valid OS data")
2265

    
2266
    nimg.os_fail = test
2267

    
2268
    if test:
2269
      return
2270

    
2271
    os_dict = {}
2272

    
2273
    for (name, os_path, status, diagnose,
2274
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2275

    
2276
      if name not in os_dict:
2277
        os_dict[name] = []
2278

    
2279
      # parameters is a list of lists instead of list of tuples due to
2280
      # JSON lacking a real tuple type, fix it:
2281
      parameters = [tuple(v) for v in parameters]
2282
      os_dict[name].append((os_path, status, diagnose,
2283
                            set(variants), set(parameters), set(api_ver)))
2284

    
2285
    nimg.oslist = os_dict
2286

    
2287
  def _VerifyNodeOS(self, ninfo, nimg, base):
2288
    """Verifies the node OS list.
2289

2290
    @type ninfo: L{objects.Node}
2291
    @param ninfo: the node to check
2292
    @param nimg: the node image object
2293
    @param base: the 'template' node we match against (e.g. from the master)
2294

2295
    """
2296
    node = ninfo.name
2297
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2298

    
2299
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2300

    
2301
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2302
    for os_name, os_data in nimg.oslist.items():
2303
      assert os_data, "Empty OS status for OS %s?!" % os_name
2304
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2305
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2306
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2307
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2308
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2309
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2310
      # comparisons with the 'base' image
2311
      test = os_name not in base.oslist
2312
      _ErrorIf(test, constants.CV_ENODEOS, node,
2313
               "Extra OS %s not present on reference node (%s)",
2314
               os_name, base.name)
2315
      if test:
2316
        continue
2317
      assert base.oslist[os_name], "Base node has empty OS status?"
2318
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2319
      if not b_status:
2320
        # base OS is invalid, skipping
2321
        continue
2322
      for kind, a, b in [("API version", f_api, b_api),
2323
                         ("variants list", f_var, b_var),
2324
                         ("parameters", beautify_params(f_param),
2325
                          beautify_params(b_param))]:
2326
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2327
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2328
                 kind, os_name, base.name,
2329
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2330

    
2331
    # check any missing OSes
2332
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2333
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2334
             "OSes present on reference node %s but missing on this node: %s",
2335
             base.name, utils.CommaJoin(missing))
2336

    
2337
  def _VerifyOob(self, ninfo, nresult):
2338
    """Verifies out of band functionality of a node.
2339

2340
    @type ninfo: L{objects.Node}
2341
    @param ninfo: the node to check
2342
    @param nresult: the remote results for the node
2343

2344
    """
2345
    node = ninfo.name
2346
    # We just have to verify the paths on master and/or master candidates
2347
    # as the oob helper is invoked on the master
2348
    if ((ninfo.master_candidate or ninfo.master_capable) and
2349
        constants.NV_OOB_PATHS in nresult):
2350
      for path_result in nresult[constants.NV_OOB_PATHS]:
2351
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2352

    
2353
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2354
    """Verifies and updates the node volume data.
2355

2356
    This function will update a L{NodeImage}'s internal structures
2357
    with data from the remote call.
2358

2359
    @type ninfo: L{objects.Node}
2360
    @param ninfo: the node to check
2361
    @param nresult: the remote results for the node
2362
    @param nimg: the node image object
2363
    @param vg_name: the configured VG name
2364

2365
    """
2366
    node = ninfo.name
2367
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2368

    
2369
    nimg.lvm_fail = True
2370
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2371
    if vg_name is None:
2372
      pass
2373
    elif isinstance(lvdata, basestring):
2374
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2375
               utils.SafeEncode(lvdata))
2376
    elif not isinstance(lvdata, dict):
2377
      _ErrorIf(True, constants.CV_ENODELVM, node,
2378
               "rpc call to node failed (lvlist)")
2379
    else:
2380
      nimg.volumes = lvdata
2381
      nimg.lvm_fail = False
2382

    
2383
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2384
    """Verifies and updates the node instance list.
2385

2386
    If the listing was successful, then updates this node's instance
2387
    list. Otherwise, it marks the RPC call as failed for the instance
2388
    list key.
2389

2390
    @type ninfo: L{objects.Node}
2391
    @param ninfo: the node to check
2392
    @param nresult: the remote results for the node
2393
    @param nimg: the node image object
2394

2395
    """
2396
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2397
    test = not isinstance(idata, list)
2398
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2399
                  "rpc call to node failed (instancelist): %s",
2400
                  utils.SafeEncode(str(idata)))
2401
    if test:
2402
      nimg.hyp_fail = True
2403
    else:
2404
      nimg.instances = idata
2405

    
2406
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2407
    """Verifies and computes a node information map
2408

2409
    @type ninfo: L{objects.Node}
2410
    @param ninfo: the node to check
2411
    @param nresult: the remote results for the node
2412
    @param nimg: the node image object
2413
    @param vg_name: the configured VG name
2414

2415
    """
2416
    node = ninfo.name
2417
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2418

    
2419
    # try to read free memory (from the hypervisor)
2420
    hv_info = nresult.get(constants.NV_HVINFO, None)
2421
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2422
    _ErrorIf(test, constants.CV_ENODEHV, node,
2423
             "rpc call to node failed (hvinfo)")
2424
    if not test:
2425
      try:
2426
        nimg.mfree = int(hv_info["memory_free"])
2427
      except (ValueError, TypeError):
2428
        _ErrorIf(True, constants.CV_ENODERPC, node,
2429
                 "node returned invalid nodeinfo, check hypervisor")
2430

    
2431
    # FIXME: devise a free space model for file based instances as well
2432
    if vg_name is not None:
2433
      test = (constants.NV_VGLIST not in nresult or
2434
              vg_name not in nresult[constants.NV_VGLIST])
2435
      _ErrorIf(test, constants.CV_ENODELVM, node,
2436
               "node didn't return data for the volume group '%s'"
2437
               " - it is either missing or broken", vg_name)
2438
      if not test:
2439
        try:
2440
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2441
        except (ValueError, TypeError):
2442
          _ErrorIf(True, constants.CV_ENODERPC, node,
2443
                   "node returned invalid LVM info, check LVM status")
2444

    
2445
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2446
    """Gets per-disk status information for all instances.
2447

2448
    @type nodelist: list of strings
2449
    @param nodelist: Node names
2450
    @type node_image: dict of (name, L{objects.Node})
2451
    @param node_image: Node objects
2452
    @type instanceinfo: dict of (name, L{objects.Instance})
2453
    @param instanceinfo: Instance objects
2454
    @rtype: {instance: {node: [(succes, payload)]}}
2455
    @return: a dictionary of per-instance dictionaries with nodes as
2456
        keys and disk information as values; the disk information is a
2457
        list of tuples (success, payload)
2458

2459
    """
2460
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2461

    
2462
    node_disks = {}
2463
    node_disks_devonly = {}
2464
    diskless_instances = set()
2465
    diskless = constants.DT_DISKLESS
2466

    
2467
    for nname in nodelist:
2468
      node_instances = list(itertools.chain(node_image[nname].pinst,
2469
                                            node_image[nname].sinst))
2470
      diskless_instances.update(inst for inst in node_instances
2471
                                if instanceinfo[inst].disk_template == diskless)
2472
      disks = [(inst, disk)
2473
               for inst in node_instances
2474
               for disk in instanceinfo[inst].disks]
2475

    
2476
      if not disks:
2477
        # No need to collect data
2478
        continue
2479

    
2480
      node_disks[nname] = disks
2481

    
2482
      # Creating copies as SetDiskID below will modify the objects and that can
2483
      # lead to incorrect data returned from nodes
2484
      devonly = [dev.Copy() for (_, dev) in disks]
2485

    
2486
      for dev in devonly:
2487
        self.cfg.SetDiskID(dev, nname)
2488

    
2489
      node_disks_devonly[nname] = devonly
2490

    
2491
    assert len(node_disks) == len(node_disks_devonly)
2492

    
2493
    # Collect data from all nodes with disks
2494
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2495
                                                          node_disks_devonly)
2496

    
2497
    assert len(result) == len(node_disks)
2498

    
2499
    instdisk = {}
2500

    
2501
    for (nname, nres) in result.items():
2502
      disks = node_disks[nname]
2503

    
2504
      if nres.offline:
2505
        # No data from this node
2506
        data = len(disks) * [(False, "node offline")]
2507
      else:
2508
        msg = nres.fail_msg
2509
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2510
                 "while getting disk information: %s", msg)
2511
        if msg:
2512
          # No data from this node
2513
          data = len(disks) * [(False, msg)]
2514
        else:
2515
          data = []
2516
          for idx, i in enumerate(nres.payload):
2517
            if isinstance(i, (tuple, list)) and len(i) == 2:
2518
              data.append(i)
2519
            else:
2520
              logging.warning("Invalid result from node %s, entry %d: %s",
2521
                              nname, idx, i)
2522
              data.append((False, "Invalid result from the remote node"))
2523

    
2524
      for ((inst, _), status) in zip(disks, data):
2525
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2526

    
2527
    # Add empty entries for diskless instances.
2528
    for inst in diskless_instances:
2529
      assert inst not in instdisk
2530
      instdisk[inst] = {}
2531

    
2532
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2533
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2534
                      compat.all(isinstance(s, (tuple, list)) and
2535
                                 len(s) == 2 for s in statuses)
2536
                      for inst, nnames in instdisk.items()
2537
                      for nname, statuses in nnames.items())
2538
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2539

    
2540
    return instdisk
2541

    
2542
  @staticmethod
2543
  def _SshNodeSelector(group_uuid, all_nodes):
2544
    """Create endless iterators for all potential SSH check hosts.
2545

2546
    """
2547
    nodes = [node for node in all_nodes
2548
             if (node.group != group_uuid and
2549
                 not node.offline)]
2550
    keyfunc = operator.attrgetter("group")
2551

    
2552
    return map(itertools.cycle,
2553
               [sorted(map(operator.attrgetter("name"), names))
2554
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2555
                                                  keyfunc)])
2556

    
2557
  @classmethod
2558
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2559
    """Choose which nodes should talk to which other nodes.
2560

2561
    We will make nodes contact all nodes in their group, and one node from
2562
    every other group.
2563

2564
    @warning: This algorithm has a known issue if one node group is much
2565
      smaller than others (e.g. just one node). In such a case all other
2566
      nodes will talk to the single node.
2567

2568
    """
2569
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2570
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2571

    
2572
    return (online_nodes,
2573
            dict((name, sorted([i.next() for i in sel]))
2574
                 for name in online_nodes))
2575

    
2576
  def BuildHooksEnv(self):
2577
    """Build hooks env.
2578

2579
    Cluster-Verify hooks just ran in the post phase and their failure makes
2580
    the output be logged in the verify output and the verification to fail.
2581

2582
    """
2583
    env = {
2584
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2585
      }
2586

    
2587
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2588
               for node in self.my_node_info.values())
2589

    
2590
    return env
2591

    
2592
  def BuildHooksNodes(self):
2593
    """Build hooks nodes.
2594

2595
    """
2596
    return ([], self.my_node_names)
2597

    
2598
  def Exec(self, feedback_fn):
2599
    """Verify integrity of the node group, performing various test on nodes.
2600

2601
    """
2602
    # This method has too many local variables. pylint: disable=R0914
2603
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2604

    
2605
    if not self.my_node_names:
2606
      # empty node group
2607
      feedback_fn("* Empty node group, skipping verification")
2608
      return True
2609

    
2610
    self.bad = False
2611
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2612
    verbose = self.op.verbose
2613
    self._feedback_fn = feedback_fn
2614

    
2615
    vg_name = self.cfg.GetVGName()
2616
    drbd_helper = self.cfg.GetDRBDHelper()
2617
    cluster = self.cfg.GetClusterInfo()
2618
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2619
    hypervisors = cluster.enabled_hypervisors
2620
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2621

    
2622
    i_non_redundant = [] # Non redundant instances
2623
    i_non_a_balanced = [] # Non auto-balanced instances
2624
    n_offline = 0 # Count of offline nodes
2625
    n_drained = 0 # Count of nodes being drained
2626
    node_vol_should = {}
2627

    
2628
    # FIXME: verify OS list
2629

    
2630
    # File verification
2631
    filemap = _ComputeAncillaryFiles(cluster, False)
2632

    
2633
    # do local checksums
2634
    master_node = self.master_node = self.cfg.GetMasterNode()
2635
    master_ip = self.cfg.GetMasterIP()
2636

    
2637
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2638

    
2639
    node_verify_param = {
2640
      constants.NV_FILELIST:
2641
        utils.UniqueSequence(filename
2642
                             for files in filemap
2643
                             for filename in files),
2644
      constants.NV_NODELIST:
2645
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2646
                                  self.all_node_info.values()),
2647
      constants.NV_HYPERVISOR: hypervisors,
2648
      constants.NV_HVPARAMS:
2649
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2650
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2651
                                 for node in node_data_list
2652
                                 if not node.offline],
2653
      constants.NV_INSTANCELIST: hypervisors,
2654
      constants.NV_VERSION: None,
2655
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2656
      constants.NV_NODESETUP: None,
2657
      constants.NV_TIME: None,
2658
      constants.NV_MASTERIP: (master_node, master_ip),
2659
      constants.NV_OSLIST: None,
2660
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2661
      }
2662

    
2663
    if vg_name is not None:
2664
      node_verify_param[constants.NV_VGLIST] = None
2665
      node_verify_param[constants.NV_LVLIST] = vg_name
2666
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2667
      node_verify_param[constants.NV_DRBDLIST] = None
2668

    
2669
    if drbd_helper:
2670
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2671

    
2672
    # bridge checks
2673
    # FIXME: this needs to be changed per node-group, not cluster-wide
2674
    bridges = set()
2675
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2676
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2677
      bridges.add(default_nicpp[constants.NIC_LINK])
2678
    for instance in self.my_inst_info.values():
2679
      for nic in instance.nics:
2680
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2681
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2682
          bridges.add(full_nic[constants.NIC_LINK])
2683

    
2684
    if bridges:
2685
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2686

    
2687
    # Build our expected cluster state
2688
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2689
                                                 name=node.name,
2690
                                                 vm_capable=node.vm_capable))
2691
                      for node in node_data_list)
2692

    
2693
    # Gather OOB paths
2694
    oob_paths = []
2695
    for node in self.all_node_info.values():
2696
      path = _SupportsOob(self.cfg, node)
2697
      if path and path not in oob_paths:
2698
        oob_paths.append(path)
2699

    
2700
    if oob_paths:
2701
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2702

    
2703
    for instance in self.my_inst_names:
2704
      inst_config = self.my_inst_info[instance]
2705

    
2706
      for nname in inst_config.all_nodes:
2707
        if nname not in node_image:
2708
          gnode = self.NodeImage(name=nname)
2709
          gnode.ghost = (nname not in self.all_node_info)
2710
          node_image[nname] = gnode
2711

    
2712
      inst_config.MapLVsByNode(node_vol_should)
2713

    
2714
      pnode = inst_config.primary_node
2715
      node_image[pnode].pinst.append(instance)
2716

    
2717
      for snode in inst_config.secondary_nodes:
2718
        nimg = node_image[snode]
2719
        nimg.sinst.append(instance)
2720
        if pnode not in nimg.sbp:
2721
          nimg.sbp[pnode] = []
2722
        nimg.sbp[pnode].append(instance)
2723

    
2724
    # At this point, we have the in-memory data structures complete,
2725
    # except for the runtime information, which we'll gather next
2726

    
2727
    # Due to the way our RPC system works, exact response times cannot be
2728
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2729
    # time before and after executing the request, we can at least have a time
2730
    # window.
2731
    nvinfo_starttime = time.time()
2732
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2733
                                           node_verify_param,
2734
                                           self.cfg.GetClusterName())
2735
    nvinfo_endtime = time.time()
2736

    
2737
    if self.extra_lv_nodes and vg_name is not None:
2738
      extra_lv_nvinfo = \
2739
          self.rpc.call_node_verify(self.extra_lv_nodes,
2740
                                    {constants.NV_LVLIST: vg_name},
2741
                                    self.cfg.GetClusterName())
2742
    else:
2743
      extra_lv_nvinfo = {}
2744

    
2745
    all_drbd_map = self.cfg.ComputeDRBDMap()
2746

    
2747
    feedback_fn("* Gathering disk information (%s nodes)" %
2748
                len(self.my_node_names))
2749
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2750
                                     self.my_inst_info)
2751

    
2752
    feedback_fn("* Verifying configuration file consistency")
2753

    
2754
    # If not all nodes are being checked, we need to make sure the master node
2755
    # and a non-checked vm_capable node are in the list.
2756
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2757
    if absent_nodes:
2758
      vf_nvinfo = all_nvinfo.copy()
2759
      vf_node_info = list(self.my_node_info.values())
2760
      additional_nodes = []
2761
      if master_node not in self.my_node_info:
2762
        additional_nodes.append(master_node)
2763
        vf_node_info.append(self.all_node_info[master_node])
2764
      # Add the first vm_capable node we find which is not included
2765
      for node in absent_nodes:
2766
        nodeinfo = self.all_node_info[node]
2767
        if nodeinfo.vm_capable and not nodeinfo.offline:
2768
          additional_nodes.append(node)
2769
          vf_node_info.append(self.all_node_info[node])
2770
          break
2771
      key = constants.NV_FILELIST
2772
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2773
                                                 {key: node_verify_param[key]},
2774
                                                 self.cfg.GetClusterName()))
2775
    else:
2776
      vf_nvinfo = all_nvinfo
2777
      vf_node_info = self.my_node_info.values()
2778

    
2779
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2780

    
2781
    feedback_fn("* Verifying node status")
2782

    
2783
    refos_img = None
2784

    
2785
    for node_i in node_data_list:
2786
      node = node_i.name
2787
      nimg = node_image[node]
2788

    
2789
      if node_i.offline:
2790
        if verbose:
2791
          feedback_fn("* Skipping offline node %s" % (node,))
2792
        n_offline += 1
2793
        continue
2794

    
2795
      if node == master_node:
2796
        ntype = "master"
2797
      elif node_i.master_candidate:
2798
        ntype = "master candidate"
2799
      elif node_i.drained:
2800
        ntype = "drained"
2801
        n_drained += 1
2802
      else:
2803
        ntype = "regular"
2804
      if verbose:
2805
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2806

    
2807
      msg = all_nvinfo[node].fail_msg
2808
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2809
               msg)
2810
      if msg:
2811
        nimg.rpc_fail = True
2812
        continue
2813

    
2814
      nresult = all_nvinfo[node].payload
2815

    
2816
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2817
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2818
      self._VerifyNodeNetwork(node_i, nresult)
2819
      self._VerifyOob(node_i, nresult)
2820

    
2821
      if nimg.vm_capable:
2822
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2823
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2824
                             all_drbd_map)
2825

    
2826
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2827
        self._UpdateNodeInstances(node_i, nresult, nimg)
2828
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2829
        self._UpdateNodeOS(node_i, nresult, nimg)
2830

    
2831
        if not nimg.os_fail:
2832
          if refos_img is None:
2833
            refos_img = nimg
2834
          self._VerifyNodeOS(node_i, nimg, refos_img)
2835
        self._VerifyNodeBridges(node_i, nresult, bridges)
2836

    
2837
        # Check whether all running instancies are primary for the node. (This
2838
        # can no longer be done from _VerifyInstance below, since some of the
2839
        # wrong instances could be from other node groups.)
2840
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2841

    
2842
        for inst in non_primary_inst:
2843
          test = inst in self.all_inst_info
2844
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2845
                   "instance should not run on node %s", node_i.name)
2846
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2847
                   "node is running unknown instance %s", inst)
2848

    
2849
    for node, result in extra_lv_nvinfo.items():
2850
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2851
                              node_image[node], vg_name)
2852

    
2853
    feedback_fn("* Verifying instance status")
2854
    for instance in self.my_inst_names:
2855
      if verbose:
2856
        feedback_fn("* Verifying instance %s" % instance)
2857
      inst_config = self.my_inst_info[instance]
2858
      self._VerifyInstance(instance, inst_config, node_image,
2859
                           instdisk[instance])
2860
      inst_nodes_offline = []
2861

    
2862
      pnode = inst_config.primary_node
2863
      pnode_img = node_image[pnode]
2864
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2865
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2866
               " primary node failed", instance)
2867

    
2868
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2869
               constants.CV_EINSTANCEBADNODE, instance,
2870
               "instance is marked as running and lives on offline node %s",
2871
               inst_config.primary_node)
2872

    
2873
      # If the instance is non-redundant we cannot survive losing its primary
2874
      # node, so we are not N+1 compliant. On the other hand we have no disk
2875
      # templates with more than one secondary so that situation is not well
2876
      # supported either.
2877
      # FIXME: does not support file-backed instances
2878
      if not inst_config.secondary_nodes:
2879
        i_non_redundant.append(instance)
2880

    
2881
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2882
               constants.CV_EINSTANCELAYOUT,
2883
               instance, "instance has multiple secondary nodes: %s",
2884
               utils.CommaJoin(inst_config.secondary_nodes),
2885
               code=self.ETYPE_WARNING)
2886

    
2887
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2888
        pnode = inst_config.primary_node
2889
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2890
        instance_groups = {}
2891

    
2892
        for node in instance_nodes:
2893
          instance_groups.setdefault(self.all_node_info[node].group,
2894
                                     []).append(node)
2895

    
2896
        pretty_list = [
2897
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2898
          # Sort so that we always list the primary node first.
2899
          for group, nodes in sorted(instance_groups.items(),
2900
                                     key=lambda (_, nodes): pnode in nodes,
2901
                                     reverse=True)]
2902

    
2903
        self._ErrorIf(len(instance_groups) > 1,
2904
                      constants.CV_EINSTANCESPLITGROUPS,
2905
                      instance, "instance has primary and secondary nodes in"
2906
                      " different groups: %s", utils.CommaJoin(pretty_list),
2907
                      code=self.ETYPE_WARNING)
2908

    
2909
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2910
        i_non_a_balanced.append(instance)
2911

    
2912
      for snode in inst_config.secondary_nodes:
2913
        s_img = node_image[snode]
2914
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2915
                 snode, "instance %s, connection to secondary node failed",
2916
                 instance)
2917

    
2918
        if s_img.offline:
2919
          inst_nodes_offline.append(snode)
2920

    
2921
      # warn that the instance lives on offline nodes
2922
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2923
               "instance has offline secondary node(s) %s",
2924
               utils.CommaJoin(inst_nodes_offline))
2925
      # ... or ghost/non-vm_capable nodes
2926
      for node in inst_config.all_nodes:
2927
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2928
                 instance, "instance lives on ghost node %s", node)
2929
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2930
                 instance, "instance lives on non-vm_capable node %s", node)
2931

    
2932
    feedback_fn("* Verifying orphan volumes")
2933
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2934

    
2935
    # We will get spurious "unknown volume" warnings if any node of this group
2936
    # is secondary for an instance whose primary is in another group. To avoid
2937
    # them, we find these instances and add their volumes to node_vol_should.
2938
    for inst in self.all_inst_info.values():
2939
      for secondary in inst.secondary_nodes:
2940
        if (secondary in self.my_node_info
2941
            and inst.name not in self.my_inst_info):
2942
          inst.MapLVsByNode(node_vol_should)
2943
          break
2944

    
2945
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2946

    
2947
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2948
      feedback_fn("* Verifying N+1 Memory redundancy")
2949
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2950

    
2951
    feedback_fn("* Other Notes")
2952
    if i_non_redundant:
2953
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2954
                  % len(i_non_redundant))
2955

    
2956
    if i_non_a_balanced:
2957
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2958
                  % len(i_non_a_balanced))
2959

    
2960
    if n_offline:
2961
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2962

    
2963
    if n_drained:
2964
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2965

    
2966
    return not self.bad
2967

    
2968
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2969
    """Analyze the post-hooks' result
2970

2971
    This method analyses the hook result, handles it, and sends some
2972
    nicely-formatted feedback back to the user.
2973

2974
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2975
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2976
    @param hooks_results: the results of the multi-node hooks rpc call
2977
    @param feedback_fn: function used send feedback back to the caller
2978
    @param lu_result: previous Exec result
2979
    @return: the new Exec result, based on the previous result
2980
        and hook results
2981

2982
    """
2983
    # We only really run POST phase hooks, only for non-empty groups,
2984
    # and are only interested in their results
2985
    if not self.my_node_names:
2986
      # empty node group
2987
      pass
2988
    elif phase == constants.HOOKS_PHASE_POST:
2989
      # Used to change hooks' output to proper indentation
2990
      feedback_fn("* Hooks Results")
2991
      assert hooks_results, "invalid result from hooks"
2992

    
2993
      for node_name in hooks_results:
2994
        res = hooks_results[node_name]
2995
        msg = res.fail_msg
2996
        test = msg and not res.offline
2997
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
2998
                      "Communication failure in hooks execution: %s", msg)
2999
        if res.offline or msg:
3000
          # No need to investigate payload if node is offline or gave
3001
          # an error.
3002
          continue
3003
        for script, hkr, output in res.payload:
3004
          test = hkr == constants.HKR_FAIL
3005
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3006
                        "Script %s failed, output:", script)
3007
          if test:
3008
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3009
            feedback_fn("%s" % output)
3010
            lu_result = False
3011

    
3012
    return lu_result
3013

    
3014

    
3015
class LUClusterVerifyDisks(NoHooksLU):
3016
  """Verifies the cluster disks status.
3017

3018
  """
3019
  REQ_BGL = False
3020

    
3021
  def ExpandNames(self):
3022
    self.share_locks = _ShareAll()
3023
    self.needed_locks = {
3024
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3025
      }
3026

    
3027
  def Exec(self, feedback_fn):
3028
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3029

    
3030
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3031
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3032
                           for group in group_names])
3033

    
3034

    
3035
class LUGroupVerifyDisks(NoHooksLU):
3036
  """Verifies the status of all disks in a node group.
3037

3038
  """
3039
  REQ_BGL = False
3040

    
3041
  def ExpandNames(self):
3042
    # Raises errors.OpPrereqError on its own if group can't be found
3043
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3044

    
3045
    self.share_locks = _ShareAll()
3046
    self.needed_locks = {
3047
      locking.LEVEL_INSTANCE: [],
3048
      locking.LEVEL_NODEGROUP: [],
3049
      locking.LEVEL_NODE: [],
3050
      }
3051

    
3052
  def DeclareLocks(self, level):
3053
    if level == locking.LEVEL_INSTANCE:
3054
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3055

    
3056
      # Lock instances optimistically, needs verification once node and group
3057
      # locks have been acquired
3058
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3059
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3060

    
3061
    elif level == locking.LEVEL_NODEGROUP:
3062
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3063

    
3064
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3065
        set([self.group_uuid] +
3066
            # Lock all groups used by instances optimistically; this requires
3067
            # going via the node before it's locked, requiring verification
3068
            # later on
3069
            [group_uuid
3070
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3071
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3072

    
3073
    elif level == locking.LEVEL_NODE:
3074
      # This will only lock the nodes in the group to be verified which contain
3075
      # actual instances
3076
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3077
      self._LockInstancesNodes()
3078

    
3079
      # Lock all nodes in group to be verified
3080
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3081
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3082
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3083

    
3084
  def CheckPrereq(self):
3085
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3086
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3087
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3088

    
3089
    assert self.group_uuid in owned_groups
3090

    
3091
    # Check if locked instances are still correct
3092
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3093

    
3094
    # Get instance information
3095
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3096

    
3097
    # Check if node groups for locked instances are still correct
3098
    for (instance_name, inst) in self.instances.items():
3099
      assert owned_nodes.issuperset(inst.all_nodes), \
3100
        "Instance %s's nodes changed while we kept the lock" % instance_name
3101

    
3102
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3103
                                             owned_groups)
3104

    
3105
      assert self.group_uuid in inst_groups, \
3106
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3107

    
3108
  def Exec(self, feedback_fn):
3109
    """Verify integrity of cluster disks.
3110

3111
    @rtype: tuple of three items
3112
    @return: a tuple of (dict of node-to-node_error, list of instances
3113
        which need activate-disks, dict of instance: (node, volume) for
3114
        missing volumes
3115

3116
    """
3117
    res_nodes = {}
3118
    res_instances = set()
3119
    res_missing = {}
3120

    
3121
    nv_dict = _MapInstanceDisksToNodes([inst
3122
                                        for inst in self.instances.values()
3123
                                        if inst.admin_up])
3124

    
3125
    if nv_dict:
3126
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3127
                             set(self.cfg.GetVmCapableNodeList()))
3128

    
3129
      node_lvs = self.rpc.call_lv_list(nodes, [])
3130

    
3131
      for (node, node_res) in node_lvs.items():
3132
        if node_res.offline:
3133
          continue
3134

    
3135
        msg = node_res.fail_msg
3136
        if msg:
3137
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3138
          res_nodes[node] = msg
3139
          continue
3140

    
3141
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3142
          inst = nv_dict.pop((node, lv_name), None)
3143
          if not (lv_online or inst is None):
3144
            res_instances.add(inst)
3145

    
3146
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3147
      # better
3148
      for key, inst in nv_dict.iteritems():
3149
        res_missing.setdefault(inst, []).append(key)
3150

    
3151
    return (res_nodes, list(res_instances), res_missing)
3152

    
3153

    
3154
class LUClusterRepairDiskSizes(NoHooksLU):
3155
  """Verifies the cluster disks sizes.
3156

3157
  """
3158
  REQ_BGL = False
3159

    
3160
  def ExpandNames(self):
3161
    if self.op.instances:
3162
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3163
      self.needed_locks = {
3164
        locking.LEVEL_NODE: [],
3165
        locking.LEVEL_INSTANCE: self.wanted_names,
3166
        }
3167
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3168
    else:
3169
      self.wanted_names = None
3170
      self.needed_locks = {
3171
        locking.LEVEL_NODE: locking.ALL_SET,
3172
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3173
        }
3174
    self.share_locks = _ShareAll()
3175

    
3176
  def DeclareLocks(self, level):
3177
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3178
      self._LockInstancesNodes(primary_only=True)
3179

    
3180
  def CheckPrereq(self):
3181
    """Check prerequisites.
3182

3183
    This only checks the optional instance list against the existing names.
3184

3185
    """
3186
    if self.wanted_names is None:
3187
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3188

    
3189
    self.wanted_instances = \
3190
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3191

    
3192
  def _EnsureChildSizes(self, disk):
3193
    """Ensure children of the disk have the needed disk size.
3194

3195
    This is valid mainly for DRBD8 and fixes an issue where the
3196
    children have smaller disk size.
3197

3198
    @param disk: an L{ganeti.objects.Disk} object
3199

3200
    """
3201
    if disk.dev_type == constants.LD_DRBD8:
3202
      assert disk.children, "Empty children for DRBD8?"
3203
      fchild = disk.children[0]
3204
      mismatch = fchild.size < disk.size
3205
      if mismatch:
3206
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3207
                     fchild.size, disk.size)
3208
        fchild.size = disk.size
3209

    
3210
      # and we recurse on this child only, not on the metadev
3211
      return self._EnsureChildSizes(fchild) or mismatch
3212
    else:
3213
      return False
3214

    
3215
  def Exec(self, feedback_fn):
3216
    """Verify the size of cluster disks.
3217

3218
    """
3219
    # TODO: check child disks too
3220
    # TODO: check differences in size between primary/secondary nodes
3221
    per_node_disks = {}
3222
    for instance in self.wanted_instances:
3223
      pnode = instance.primary_node
3224
      if pnode not in per_node_disks:
3225
        per_node_disks[pnode] = []
3226
      for idx, disk in enumerate(instance.disks):
3227
        per_node_disks[pnode].append((instance, idx, disk))
3228

    
3229
    changed = []
3230
    for node, dskl in per_node_disks.items():
3231
      newl = [v[2].Copy() for v in dskl]
3232
      for dsk in newl:
3233
        self.cfg.SetDiskID(dsk, node)
3234
      result = self.rpc.call_blockdev_getsize(node, newl)
3235
      if result.fail_msg:
3236
        self.LogWarning("Failure in blockdev_getsize call to node"
3237
                        " %s, ignoring", node)
3238
        continue
3239
      if len(result.payload) != len(dskl):
3240
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3241
                        " result.payload=%s", node, len(dskl), result.payload)
3242
        self.LogWarning("Invalid result from node %s, ignoring node results",
3243
                        node)
3244
        continue
3245
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3246
        if size is None:
3247
          self.LogWarning("Disk %d of instance %s did not return size"
3248
                          " information, ignoring", idx, instance.name)
3249
          continue
3250
        if not isinstance(size, (int, long)):
3251
          self.LogWarning("Disk %d of instance %s did not return valid"
3252
                          " size information, ignoring", idx, instance.name)
3253
          continue
3254
        size = size >> 20
3255
        if size != disk.size:
3256
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3257
                       " correcting: recorded %d, actual %d", idx,
3258
                       instance.name, disk.size, size)
3259
          disk.size = size
3260
          self.cfg.Update(instance, feedback_fn)
3261
          changed.append((instance.name, idx, size))
3262
        if self._EnsureChildSizes(disk):
3263
          self.cfg.Update(instance, feedback_fn)
3264
          changed.append((instance.name, idx, disk.size))
3265
    return changed
3266

    
3267

    
3268
class LUClusterRename(LogicalUnit):
3269
  """Rename the cluster.
3270

3271
  """
3272
  HPATH = "cluster-rename"
3273
  HTYPE = constants.HTYPE_CLUSTER
3274

    
3275
  def BuildHooksEnv(self):
3276
    """Build hooks env.
3277

3278
    """
3279
    return {
3280
      "OP_TARGET": self.cfg.GetClusterName(),
3281
      "NEW_NAME": self.op.name,
3282
      }
3283

    
3284
  def BuildHooksNodes(self):
3285
    """Build hooks nodes.
3286

3287
    """
3288
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3289

    
3290
  def CheckPrereq(self):
3291
    """Verify that the passed name is a valid one.
3292

3293
    """
3294
    hostname = netutils.GetHostname(name=self.op.name,
3295
                                    family=self.cfg.GetPrimaryIPFamily())
3296

    
3297
    new_name = hostname.name
3298
    self.ip = new_ip = hostname.ip
3299
    old_name = self.cfg.GetClusterName()
3300
    old_ip = self.cfg.GetMasterIP()
3301
    if new_name == old_name and new_ip == old_ip:
3302
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3303
                                 " cluster has changed",
3304
                                 errors.ECODE_INVAL)
3305
    if new_ip != old_ip:
3306
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3307
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3308
                                   " reachable on the network" %
3309
                                   new_ip, errors.ECODE_NOTUNIQUE)
3310

    
3311
    self.op.name = new_name
3312

    
3313
  def Exec(self, feedback_fn):
3314
    """Rename the cluster.
3315

3316
    """
3317
    clustername = self.op.name
3318
    ip = self.ip
3319

    
3320
    # shutdown the master IP
3321
    master = self.cfg.GetMasterNode()
3322
    result = self.rpc.call_node_deactivate_master_ip(master)
3323
    result.Raise("Could not disable the master role")
3324

    
3325
    try:
3326
      cluster = self.cfg.GetClusterInfo()
3327
      cluster.cluster_name = clustername
3328
      cluster.master_ip = ip
3329
      self.cfg.Update(cluster, feedback_fn)
3330

    
3331
      # update the known hosts file
3332
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3333
      node_list = self.cfg.GetOnlineNodeList()
3334
      try:
3335
        node_list.remove(master)
3336
      except ValueError:
3337
        pass
3338
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3339
    finally:
3340
      result = self.rpc.call_node_activate_master_ip(master)
3341
      msg = result.fail_msg
3342
      if msg:
3343
        self.LogWarning("Could not re-enable the master role on"
3344
                        " the master, please restart manually: %s", msg)
3345

    
3346
    return clustername
3347

    
3348

    
3349
def _ValidateNetmask(cfg, netmask):
3350
  """Checks if a netmask is valid.
3351

3352
  @type cfg: L{config.ConfigWriter}
3353
  @param cfg: The cluster configuration
3354
  @type netmask: int
3355
  @param netmask: the netmask to be verified
3356
  @raise errors.OpPrereqError: if the validation fails
3357

3358
  """
3359
  ip_family = cfg.GetPrimaryIPFamily()
3360
  try:
3361
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3362
  except errors.ProgrammerError:
3363
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3364
                               ip_family)
3365
  if not ipcls.ValidateNetmask(netmask):
3366
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3367
                                (netmask))
3368

    
3369

    
3370
class LUClusterSetParams(LogicalUnit):
3371
  """Change the parameters of the cluster.
3372

3373
  """
3374
  HPATH = "cluster-modify"
3375
  HTYPE = constants.HTYPE_CLUSTER
3376
  REQ_BGL = False
3377

    
3378
  def CheckArguments(self):
3379
    """Check parameters
3380

3381
    """
3382
    if self.op.uid_pool:
3383
      uidpool.CheckUidPool(self.op.uid_pool)
3384

    
3385
    if self.op.add_uids:
3386
      uidpool.CheckUidPool(self.op.add_uids)
3387

    
3388
    if self.op.remove_uids:
3389
      uidpool.CheckUidPool(self.op.remove_uids)
3390

    
3391
    if self.op.master_netmask is not None:
3392
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3393

    
3394
  def ExpandNames(self):
3395
    # FIXME: in the future maybe other cluster params won't require checking on
3396
    # all nodes to be modified.
3397
    self.needed_locks = {
3398
      locking.LEVEL_NODE: locking.ALL_SET,
3399
    }
3400
    self.share_locks[locking.LEVEL_NODE] = 1
3401

    
3402
  def BuildHooksEnv(self):
3403
    """Build hooks env.
3404

3405
    """
3406
    return {
3407
      "OP_TARGET": self.cfg.GetClusterName(),
3408
      "NEW_VG_NAME": self.op.vg_name,
3409
      }
3410

    
3411
  def BuildHooksNodes(self):
3412
    """Build hooks nodes.
3413

3414
    """
3415
    mn = self.cfg.GetMasterNode()
3416
    return ([mn], [mn])
3417

    
3418
  def CheckPrereq(self):
3419
    """Check prerequisites.
3420

3421
    This checks whether the given params don't conflict and
3422
    if the given volume group is valid.
3423

3424
    """
3425
    if self.op.vg_name is not None and not self.op.vg_name:
3426
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3427
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3428
                                   " instances exist", errors.ECODE_INVAL)
3429

    
3430
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3431
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3432
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3433
                                   " drbd-based instances exist",
3434
                                   errors.ECODE_INVAL)
3435

    
3436
    node_list = self.owned_locks(locking.LEVEL_NODE)
3437

    
3438
    # if vg_name not None, checks given volume group on all nodes
3439
    if self.op.vg_name:
3440
      vglist = self.rpc.call_vg_list(node_list)
3441
      for node in node_list:
3442
        msg = vglist[node].fail_msg
3443
        if msg:
3444
          # ignoring down node
3445
          self.LogWarning("Error while gathering data on node %s"
3446
                          " (ignoring node): %s", node, msg)
3447
          continue
3448
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3449
                                              self.op.vg_name,
3450
                                              constants.MIN_VG_SIZE)
3451
        if vgstatus:
3452
          raise errors.OpPrereqError("Error on node '%s': %s" %
3453
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3454

    
3455
    if self.op.drbd_helper:
3456
      # checks given drbd helper on all nodes
3457
      helpers = self.rpc.call_drbd_helper(node_list)
3458
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3459
        if ninfo.offline:
3460
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3461
          continue
3462
        msg = helpers[node].fail_msg
3463
        if msg:
3464
          raise errors.OpPrereqError("Error checking drbd helper on node"
3465
                                     " '%s': %s" % (node, msg),
3466
                                     errors.ECODE_ENVIRON)
3467
        node_helper = helpers[node].payload
3468
        if node_helper != self.op.drbd_helper:
3469
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3470
                                     (node, node_helper), errors.ECODE_ENVIRON)
3471

    
3472
    self.cluster = cluster = self.cfg.GetClusterInfo()
3473
    # validate params changes
3474
    if self.op.beparams:
3475
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3476
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3477

    
3478
    if self.op.ndparams:
3479
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3480
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3481

    
3482
      # TODO: we need a more general way to handle resetting
3483
      # cluster-level parameters to default values
3484
      if self.new_ndparams["oob_program"] == "":
3485
        self.new_ndparams["oob_program"] = \
3486
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3487

    
3488
    if self.op.nicparams:
3489
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3490
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3491
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3492
      nic_errors = []
3493

    
3494
      # check all instances for consistency
3495
      for instance in self.cfg.GetAllInstancesInfo().values():
3496
        for nic_idx, nic in enumerate(instance.nics):
3497
          params_copy = copy.deepcopy(nic.nicparams)
3498
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3499

    
3500
          # check parameter syntax
3501
          try:
3502
            objects.NIC.CheckParameterSyntax(params_filled)
3503
          except errors.ConfigurationError, err:
3504
            nic_errors.append("Instance %s, nic/%d: %s" %
3505
                              (instance.name, nic_idx, err))
3506

    
3507
          # if we're moving instances to routed, check that they have an ip
3508
          target_mode = params_filled[constants.NIC_MODE]
3509
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3510
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3511
                              " address" % (instance.name, nic_idx))
3512
      if nic_errors:
3513
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3514
                                   "\n".join(nic_errors))
3515

    
3516
    # hypervisor list/parameters
3517
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3518
    if self.op.hvparams:
3519
      for hv_name, hv_dict in self.op.hvparams.items():
3520
        if hv_name not in self.new_hvparams:
3521
          self.new_hvparams[hv_name] = hv_dict
3522
        else:
3523
          self.new_hvparams[hv_name].update(hv_dict)
3524

    
3525
    # os hypervisor parameters
3526
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3527
    if self.op.os_hvp:
3528
      for os_name, hvs in self.op.os_hvp.items():
3529
        if os_name not in self.new_os_hvp:
3530
          self.new_os_hvp[os_name] = hvs
3531
        else:
3532
          for hv_name, hv_dict in hvs.items():
3533
            if hv_name not in self.new_os_hvp[os_name]:
3534
              self.new_os_hvp[os_name][hv_name] = hv_dict
3535
            else:
3536
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3537

    
3538
    # os parameters
3539
    self.new_osp = objects.FillDict(cluster.osparams, {})
3540
    if self.op.osparams:
3541
      for os_name, osp in self.op.osparams.items():
3542
        if os_name not in self.new_osp:
3543
          self.new_osp[os_name] = {}
3544

    
3545
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3546
                                                  use_none=True)
3547

    
3548
        if not self.new_osp[os_name]:
3549
          # we removed all parameters
3550
          del self.new_osp[os_name]
3551
        else:
3552
          # check the parameter validity (remote check)
3553
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3554
                         os_name, self.new_osp[os_name])
3555

    
3556
    # changes to the hypervisor list
3557
    if self.op.enabled_hypervisors is not None:
3558
      self.hv_list = self.op.enabled_hypervisors
3559
      for hv in self.hv_list:
3560
        # if the hypervisor doesn't already exist in the cluster
3561
        # hvparams, we initialize it to empty, and then (in both
3562
        # cases) we make sure to fill the defaults, as we might not
3563
        # have a complete defaults list if the hypervisor wasn't
3564
        # enabled before
3565
        if hv not in new_hvp:
3566
          new_hvp[hv] = {}
3567
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3568
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3569
    else:
3570
      self.hv_list = cluster.enabled_hypervisors
3571

    
3572
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3573
      # either the enabled list has changed, or the parameters have, validate
3574
      for hv_name, hv_params in self.new_hvparams.items():
3575
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3576
            (self.op.enabled_hypervisors and
3577
             hv_name in self.op.enabled_hypervisors)):
3578
          # either this is a new hypervisor, or its parameters have changed
3579
          hv_class = hypervisor.GetHypervisor(hv_name)
3580
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3581
          hv_class.CheckParameterSyntax(hv_params)
3582
          _CheckHVParams(self, node_list, hv_name, hv_params)
3583

    
3584
    if self.op.os_hvp:
3585
      # no need to check any newly-enabled hypervisors, since the
3586
      # defaults have already been checked in the above code-block
3587
      for os_name, os_hvp in self.new_os_hvp.items():
3588
        for hv_name, hv_params in os_hvp.items():
3589
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3590
          # we need to fill in the new os_hvp on top of the actual hv_p
3591
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3592
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3593
          hv_class = hypervisor.GetHypervisor(hv_name)
3594
          hv_class.CheckParameterSyntax(new_osp)
3595
          _CheckHVParams(self, node_list, hv_name, new_osp)
3596

    
3597
    if self.op.default_iallocator:
3598
      alloc_script = utils.FindFile(self.op.default_iallocator,
3599
                                    constants.IALLOCATOR_SEARCH_PATH,
3600
                                    os.path.isfile)
3601
      if alloc_script is None:
3602
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3603
                                   " specified" % self.op.default_iallocator,
3604
                                   errors.ECODE_INVAL)
3605

    
3606
  def Exec(self, feedback_fn):
3607
    """Change the parameters of the cluster.
3608

3609
    """
3610
    if self.op.vg_name is not None:
3611
      new_volume = self.op.vg_name
3612
      if not new_volume:
3613
        new_volume = None
3614
      if new_volume != self.cfg.GetVGName():
3615
        self.cfg.SetVGName(new_volume)
3616
      else:
3617
        feedback_fn("Cluster LVM configuration already in desired"
3618
                    " state, not changing")
3619
    if self.op.drbd_helper is not None:
3620
      new_helper = self.op.drbd_helper
3621
      if not new_helper:
3622
        new_helper = None
3623
      if new_helper != self.cfg.GetDRBDHelper():
3624
        self.cfg.SetDRBDHelper(new_helper)
3625
      else:
3626
        feedback_fn("Cluster DRBD helper already in desired state,"
3627
                    " not changing")
3628
    if self.op.hvparams:
3629
      self.cluster.hvparams = self.new_hvparams
3630
    if self.op.os_hvp:
3631
      self.cluster.os_hvp = self.new_os_hvp
3632
    if self.op.enabled_hypervisors is not None:
3633
      self.cluster.hvparams = self.new_hvparams
3634
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3635
    if self.op.beparams:
3636
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3637
    if self.op.nicparams:
3638
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3639
    if self.op.osparams:
3640
      self.cluster.osparams = self.new_osp
3641
    if self.op.ndparams:
3642
      self.cluster.ndparams = self.new_ndparams
3643

    
3644
    if self.op.candidate_pool_size is not None:
3645
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3646
      # we need to update the pool size here, otherwise the save will fail
3647
      _AdjustCandidatePool(self, [])
3648

    
3649
    if self.op.maintain_node_health is not None:
3650
      self.cluster.maintain_node_health = self.op.maintain_node_health
3651

    
3652
    if self.op.prealloc_wipe_disks is not None:
3653
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3654

    
3655
    if self.op.add_uids is not None:
3656
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3657

    
3658
    if self.op.remove_uids is not None:
3659
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3660

    
3661
    if self.op.uid_pool is not None:
3662
      self.cluster.uid_pool = self.op.uid_pool
3663

    
3664
    if self.op.default_iallocator is not None:
3665
      self.cluster.default_iallocator = self.op.default_iallocator
3666

    
3667
    if self.op.reserved_lvs is not None:
3668
      self.cluster.reserved_lvs = self.op.reserved_lvs
3669

    
3670
    def helper_os(aname, mods, desc):
3671
      desc += " OS list"
3672
      lst = getattr(self.cluster, aname)
3673
      for key, val in mods:
3674
        if key == constants.DDM_ADD:
3675
          if val in lst:
3676
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3677
          else:
3678
            lst.append(val)
3679
        elif key == constants.DDM_REMOVE:
3680
          if val in lst:
3681
            lst.remove(val)
3682
          else:
3683
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3684
        else:
3685
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3686

    
3687
    if self.op.hidden_os:
3688
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3689

    
3690
    if self.op.blacklisted_os:
3691
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3692

    
3693
    if self.op.master_netdev:
3694
      master = self.cfg.GetMasterNode()
3695
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3696
                  self.cluster.master_netdev)
3697
      result = self.rpc.call_node_deactivate_master_ip(master)
3698
      result.Raise("Could not disable the master ip")
3699
      feedback_fn("Changing master_netdev from %s to %s" %
3700
                  (self.cluster.master_netdev, self.op.master_netdev))
3701
      self.cluster.master_netdev = self.op.master_netdev
3702

    
3703
    if self.op.master_netmask:
3704
      master = self.cfg.GetMasterNode()
3705
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3706
      result = self.rpc.call_node_change_master_netmask(master,
3707
                                                        self.op.master_netmask)
3708
      if result.fail_msg:
3709
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3710
        self.LogWarning(msg)
3711
        feedback_fn(msg)
3712
      else:
3713
        self.cluster.master_netmask = self.op.master_netmask
3714

    
3715
    self.cfg.Update(self.cluster, feedback_fn)
3716

    
3717
    if self.op.master_netdev:
3718
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3719
                  self.op.master_netdev)
3720
      result = self.rpc.call_node_activate_master_ip(master)
3721
      if result.fail_msg:
3722
        self.LogWarning("Could not re-enable the master ip on"
3723
                        " the master, please restart manually: %s",
3724
                        result.fail_msg)
3725

    
3726

    
3727
def _UploadHelper(lu, nodes, fname):
3728
  """Helper for uploading a file and showing warnings.
3729

3730
  """
3731
  if os.path.exists(fname):
3732
    result = lu.rpc.call_upload_file(nodes, fname)
3733
    for to_node, to_result in result.items():
3734
      msg = to_result.fail_msg
3735
      if msg:
3736
        msg = ("Copy of file %s to node %s failed: %s" %
3737
               (fname, to_node, msg))
3738
        lu.proc.LogWarning(msg)
3739

    
3740

    
3741
def _ComputeAncillaryFiles(cluster, redist):
3742
  """Compute files external to Ganeti which need to be consistent.
3743

3744
  @type redist: boolean
3745
  @param redist: Whether to include files which need to be redistributed
3746

3747
  """
3748
  # Compute files for all nodes
3749
  files_all = set([
3750
    constants.SSH_KNOWN_HOSTS_FILE,
3751
    constants.CONFD_HMAC_KEY,
3752
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3753
    ])
3754

    
3755
  if not redist:
3756
    files_all.update(constants.ALL_CERT_FILES)
3757
    files_all.update(ssconf.SimpleStore().GetFileList())
3758
  else:
3759
    # we need to ship at least the RAPI certificate
3760
    files_all.add(constants.RAPI_CERT_FILE)
3761

    
3762
  if cluster.modify_etc_hosts:
3763
    files_all.add(constants.ETC_HOSTS)
3764

    
3765
  # Files which must either exist on all nodes or on none
3766
  files_all_opt = set([
3767
    constants.RAPI_USERS_FILE,
3768
    ])
3769

    
3770
  # Files which should only be on master candidates
3771
  files_mc = set()
3772
  if not redist:
3773
    files_mc.add(constants.CLUSTER_CONF_FILE)
3774

    
3775
  # Files which should only be on VM-capable nodes
3776
  files_vm = set(filename
3777
    for hv_name in cluster.enabled_hypervisors
3778
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3779

    
3780
  # Filenames must be unique
3781
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3782
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3783
         "Found file listed in more than one file list"
3784

    
3785
  return (files_all, files_all_opt, files_mc, files_vm)
3786

    
3787

    
3788
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3789
  """Distribute additional files which are part of the cluster configuration.
3790

3791
  ConfigWriter takes care of distributing the config and ssconf files, but
3792
  there are more files which should be distributed to all nodes. This function
3793
  makes sure those are copied.
3794

3795
  @param lu: calling logical unit
3796
  @param additional_nodes: list of nodes not in the config to distribute to
3797
  @type additional_vm: boolean
3798
  @param additional_vm: whether the additional nodes are vm-capable or not
3799

3800
  """
3801
  # Gather target nodes
3802
  cluster = lu.cfg.GetClusterInfo()
3803
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3804

    
3805
  online_nodes = lu.cfg.GetOnlineNodeList()
3806
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3807

    
3808
  if additional_nodes is not None:
3809
    online_nodes.extend(additional_nodes)
3810
    if additional_vm:
3811
      vm_nodes.extend(additional_nodes)
3812

    
3813
  # Never distribute to master node
3814
  for nodelist in [online_nodes, vm_nodes]:
3815
    if master_info.name in nodelist:
3816
      nodelist.remove(master_info.name)
3817

    
3818
  # Gather file lists
3819
  (files_all, files_all_opt, files_mc, files_vm) = \
3820
    _ComputeAncillaryFiles(cluster, True)
3821

    
3822
  # Never re-distribute configuration file from here
3823
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3824
              constants.CLUSTER_CONF_FILE in files_vm)
3825
  assert not files_mc, "Master candidates not handled in this function"
3826

    
3827
  filemap = [
3828
    (online_nodes, files_all),
3829
    (online_nodes, files_all_opt),
3830
    (vm_nodes, files_vm),
3831
    ]
3832

    
3833
  # Upload the files
3834
  for (node_list, files) in filemap:
3835
    for fname in files:
3836
      _UploadHelper(lu, node_list, fname)
3837

    
3838

    
3839
class LUClusterRedistConf(NoHooksLU):
3840
  """Force the redistribution of cluster configuration.
3841

3842
  This is a very simple LU.
3843

3844
  """
3845
  REQ_BGL = False
3846

    
3847
  def ExpandNames(self):
3848
    self.needed_locks = {
3849
      locking.LEVEL_NODE: locking.ALL_SET,
3850
    }
3851
    self.share_locks[locking.LEVEL_NODE] = 1
3852

    
3853
  def Exec(self, feedback_fn):
3854
    """Redistribute the configuration.
3855

3856
    """
3857
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3858
    _RedistributeAncillaryFiles(self)
3859

    
3860

    
3861
class LUClusterActivateMasterIp(NoHooksLU):
3862
  """Activate the master IP on the master node.
3863

3864
  """
3865
  def Exec(self, feedback_fn):
3866
    """Activate the master IP.
3867

3868
    """
3869
    master = self.cfg.GetMasterNode()
3870
    self.rpc.call_node_activate_master_ip(master)
3871

    
3872

    
3873
class LUClusterDeactivateMasterIp(NoHooksLU):
3874
  """Deactivate the master IP on the master node.
3875

3876
  """
3877
  def Exec(self, feedback_fn):
3878
    """Deactivate the master IP.
3879

3880
    """
3881
    master = self.cfg.GetMasterNode()
3882
    self.rpc.call_node_deactivate_master_ip(master)
3883

    
3884

    
3885
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3886
  """Sleep and poll for an instance's disk to sync.
3887

3888
  """
3889
  if not instance.disks or disks is not None and not disks:
3890
    return True
3891

    
3892
  disks = _ExpandCheckDisks(instance, disks)
3893

    
3894
  if not oneshot:
3895
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3896

    
3897
  node = instance.primary_node
3898

    
3899
  for dev in disks:
3900
    lu.cfg.SetDiskID(dev, node)
3901

    
3902
  # TODO: Convert to utils.Retry
3903

    
3904
  retries = 0
3905
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3906
  while True:
3907
    max_time = 0
3908
    done = True
3909
    cumul_degraded = False
3910
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3911
    msg = rstats.fail_msg
3912
    if msg:
3913
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3914
      retries += 1
3915
      if retries >= 10:
3916
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3917
                                 " aborting." % node)
3918
      time.sleep(6)
3919
      continue
3920
    rstats = rstats.payload
3921
    retries = 0
3922
    for i, mstat in enumerate(rstats):
3923
      if mstat is None:
3924
        lu.LogWarning("Can't compute data for node %s/%s",
3925
                           node, disks[i].iv_name)
3926
        continue
3927

    
3928
      cumul_degraded = (cumul_degraded or
3929
                        (mstat.is_degraded and mstat.sync_percent is None))
3930
      if mstat.sync_percent is not None:
3931
        done = False
3932
        if mstat.estimated_time is not None:
3933
          rem_time = ("%s remaining (estimated)" %
3934
                      utils.FormatSeconds(mstat.estimated_time))
3935
          max_time = mstat.estimated_time
3936
        else:
3937
          rem_time = "no time estimate"
3938
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3939
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3940

    
3941
    # if we're done but degraded, let's do a few small retries, to
3942
    # make sure we see a stable and not transient situation; therefore
3943
    # we force restart of the loop
3944
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3945
      logging.info("Degraded disks found, %d retries left", degr_retries)
3946
      degr_retries -= 1
3947
      time.sleep(1)
3948
      continue
3949

    
3950
    if done or oneshot:
3951
      break
3952

    
3953
    time.sleep(min(60, max_time))
3954

    
3955
  if done:
3956
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3957
  return not cumul_degraded
3958

    
3959

    
3960
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3961
  """Check that mirrors are not degraded.
3962

3963
  The ldisk parameter, if True, will change the test from the
3964
  is_degraded attribute (which represents overall non-ok status for
3965
  the device(s)) to the ldisk (representing the local storage status).
3966

3967
  """
3968
  lu.cfg.SetDiskID(dev, node)
3969

    
3970
  result = True
3971

    
3972
  if on_primary or dev.AssembleOnSecondary():
3973
    rstats = lu.rpc.call_blockdev_find(node, dev)
3974
    msg = rstats.fail_msg
3975
    if msg:
3976
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3977
      result = False
3978
    elif not rstats.payload:
3979
      lu.LogWarning("Can't find disk on node %s", node)
3980
      result = False
3981
    else:
3982
      if ldisk:
3983
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3984
      else:
3985
        result = result and not rstats.payload.is_degraded
3986

    
3987
  if dev.children:
3988
    for child in dev.children:
3989
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3990

    
3991
  return result
3992

    
3993

    
3994
class LUOobCommand(NoHooksLU):
3995
  """Logical unit for OOB handling.
3996

3997
  """
3998
  REG_BGL = False
3999
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4000

    
4001
  def ExpandNames(self):
4002
    """Gather locks we need.
4003

4004
    """
4005
    if self.op.node_names:
4006
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4007
      lock_names = self.op.node_names
4008
    else:
4009
      lock_names = locking.ALL_SET
4010

    
4011
    self.needed_locks = {
4012
      locking.LEVEL_NODE: lock_names,
4013
      }
4014

    
4015
  def CheckPrereq(self):
4016
    """Check prerequisites.
4017

4018
    This checks:
4019
     - the node exists in the configuration
4020
     - OOB is supported
4021

4022
    Any errors are signaled by raising errors.OpPrereqError.
4023

4024
    """
4025
    self.nodes = []
4026
    self.master_node = self.cfg.GetMasterNode()
4027

    
4028
    assert self.op.power_delay >= 0.0
4029

    
4030
    if self.op.node_names:
4031
      if (self.op.command in self._SKIP_MASTER and
4032
          self.master_node in self.op.node_names):
4033
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4034
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4035

    
4036
        if master_oob_handler:
4037
          additional_text = ("run '%s %s %s' if you want to operate on the"
4038
                             " master regardless") % (master_oob_handler,
4039
                                                      self.op.command,
4040
                                                      self.master_node)
4041
        else:
4042
          additional_text = "it does not support out-of-band operations"
4043

    
4044
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4045
                                    " allowed for %s; %s") %
4046
                                   (self.master_node, self.op.command,
4047
                                    additional_text), errors.ECODE_INVAL)
4048
    else:
4049
      self.op.node_names = self.cfg.GetNodeList()
4050
      if self.op.command in self._SKIP_MASTER:
4051
        self.op.node_names.remove(self.master_node)
4052

    
4053
    if self.op.command in self._SKIP_MASTER:
4054
      assert self.master_node not in self.op.node_names
4055

    
4056
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4057
      if node is None:
4058
        raise errors.OpPrereqError("Node %s not found" % node_name,
4059
                                   errors.ECODE_NOENT)
4060
      else:
4061
        self.nodes.append(node)
4062

    
4063
      if (not self.op.ignore_status and
4064
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4065
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4066
                                    " not marked offline") % node_name,
4067
                                   errors.ECODE_STATE)
4068

    
4069
  def Exec(self, feedback_fn):
4070
    """Execute OOB and return result if we expect any.
4071

4072
    """
4073
    master_node = self.master_node
4074
    ret = []
4075

    
4076
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4077
                                              key=lambda node: node.name)):
4078
      node_entry = [(constants.RS_NORMAL, node.name)]
4079
      ret.append(node_entry)
4080

    
4081
      oob_program = _SupportsOob(self.cfg, node)
4082

    
4083
      if not oob_program:
4084
        node_entry.append((constants.RS_UNAVAIL, None))
4085
        continue
4086

    
4087
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4088
                   self.op.command, oob_program, node.name)
4089
      result = self.rpc.call_run_oob(master_node, oob_program,
4090
                                     self.op.command, node.name,
4091
                                     self.op.timeout)
4092

    
4093
      if result.fail_msg:
4094
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4095
                        node.name, result.fail_msg)
4096
        node_entry.append((constants.RS_NODATA, None))
4097
      else:
4098
        try:
4099
          self._CheckPayload(result)
4100
        except errors.OpExecError, err:
4101
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4102
                          node.name, err)
4103
          node_entry.append((constants.RS_NODATA, None))
4104
        else:
4105
          if self.op.command == constants.OOB_HEALTH:
4106
            # For health we should log important events
4107
            for item, status in result.payload:
4108
              if status in [constants.OOB_STATUS_WARNING,
4109
                            constants.OOB_STATUS_CRITICAL]:
4110
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4111
                                item, node.name, status)
4112

    
4113
          if self.op.command == constants.OOB_POWER_ON:
4114
            node.powered = True
4115
          elif self.op.command == constants.OOB_POWER_OFF:
4116
            node.powered = False
4117
          elif self.op.command == constants.OOB_POWER_STATUS:
4118
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4119
            if powered != node.powered:
4120
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4121
                               " match actual power state (%s)"), node.powered,
4122
                              node.name, powered)
4123

    
4124
          # For configuration changing commands we should update the node
4125
          if self.op.command in (constants.OOB_POWER_ON,
4126
                                 constants.OOB_POWER_OFF):
4127
            self.cfg.Update(node, feedback_fn)
4128

    
4129
          node_entry.append((constants.RS_NORMAL, result.payload))
4130

    
4131
          if (self.op.command == constants.OOB_POWER_ON and
4132
              idx < len(self.nodes) - 1):
4133
            time.sleep(self.op.power_delay)
4134

    
4135
    return ret
4136

    
4137
  def _CheckPayload(self, result):
4138
    """Checks if the payload is valid.
4139

4140
    @param result: RPC result
4141
    @raises errors.OpExecError: If payload is not valid
4142

4143
    """
4144
    errs = []
4145
    if self.op.command == constants.OOB_HEALTH:
4146
      if not isinstance(result.payload, list):
4147
        errs.append("command 'health' is expected to return a list but got %s" %
4148
                    type(result.payload))
4149
      else:
4150
        for item, status in result.payload:
4151
          if status not in constants.OOB_STATUSES:
4152
            errs.append("health item '%s' has invalid status '%s'" %
4153
                        (item, status))
4154

    
4155
    if self.op.command == constants.OOB_POWER_STATUS:
4156
      if not isinstance(result.payload, dict):
4157
        errs.append("power-status is expected to return a dict but got %s" %
4158
                    type(result.payload))
4159

    
4160
    if self.op.command in [
4161
        constants.OOB_POWER_ON,
4162
        constants.OOB_POWER_OFF,
4163
        constants.OOB_POWER_CYCLE,
4164
        ]:
4165
      if result.payload is not None:
4166
        errs.append("%s is expected to not return payload but got '%s'" %
4167
                    (self.op.command, result.payload))
4168

    
4169
    if errs:
4170
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4171
                               utils.CommaJoin(errs))
4172

    
4173

    
4174
class _OsQuery(_QueryBase):
4175
  FIELDS = query.OS_FIELDS
4176

    
4177
  def ExpandNames(self, lu):
4178
    # Lock all nodes in shared mode
4179
    # Temporary removal of locks, should be reverted later
4180
    # TODO: reintroduce locks when they are lighter-weight
4181
    lu.needed_locks = {}
4182
    #self.share_locks[locking.LEVEL_NODE] = 1
4183
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4184

    
4185
    # The following variables interact with _QueryBase._GetNames
4186
    if self.names:
4187
      self.wanted = self.names
4188
    else:
4189
      self.wanted = locking.ALL_SET
4190

    
4191
    self.do_locking = self.use_locking
4192

    
4193
  def DeclareLocks(self, lu, level):
4194
    pass
4195

    
4196
  @staticmethod
4197
  def _DiagnoseByOS(rlist):
4198
    """Remaps a per-node return list into an a per-os per-node dictionary
4199

4200
    @param rlist: a map with node names as keys and OS objects as values
4201

4202
    @rtype: dict
4203
    @return: a dictionary with osnames as keys and as value another
4204
        map, with nodes as keys and tuples of (path, status, diagnose,
4205
        variants, parameters, api_versions) as values, eg::
4206

4207
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4208
                                     (/srv/..., False, "invalid api")],
4209
                           "node2": [(/srv/..., True, "", [], [])]}
4210
          }
4211

4212
    """
4213
    all_os = {}
4214
    # we build here the list of nodes that didn't fail the RPC (at RPC
4215
    # level), so that nodes with a non-responding node daemon don't
4216
    # make all OSes invalid
4217
    good_nodes = [node_name for node_name in rlist
4218
                  if not rlist[node_name].fail_msg]
4219
    for node_name, nr in rlist.items():
4220
      if nr.fail_msg or not nr.payload:
4221
        continue
4222
      for (name, path, status, diagnose, variants,
4223
           params, api_versions) in nr.payload:
4224
        if name not in all_os:
4225
          # build a list of nodes for this os containing empty lists
4226
          # for each node in node_list
4227
          all_os[name] = {}
4228
          for nname in good_nodes:
4229
            all_os[name][nname] = []
4230
        # convert params from [name, help] to (name, help)
4231
        params = [tuple(v) for v in params]
4232
        all_os[name][node_name].append((path, status, diagnose,
4233
                                        variants, params, api_versions))
4234
    return all_os
4235

    
4236
  def _GetQueryData(self, lu):
4237
    """Computes the list of nodes and their attributes.
4238

4239
    """
4240
    # Locking is not used
4241
    assert not (compat.any(lu.glm.is_owned(level)
4242
                           for level in locking.LEVELS
4243
                           if level != locking.LEVEL_CLUSTER) or
4244
                self.do_locking or self.use_locking)
4245

    
4246
    valid_nodes = [node.name
4247
                   for node in lu.cfg.GetAllNodesInfo().values()
4248
                   if not node.offline and node.vm_capable]
4249
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4250
    cluster = lu.cfg.GetClusterInfo()
4251

    
4252
    data = {}
4253

    
4254
    for (os_name, os_data) in pol.items():
4255
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4256
                          hidden=(os_name in cluster.hidden_os),
4257
                          blacklisted=(os_name in cluster.blacklisted_os))
4258

    
4259
      variants = set()
4260
      parameters = set()
4261
      api_versions = set()
4262

    
4263
      for idx, osl in enumerate(os_data.values()):
4264
        info.valid = bool(info.valid and osl and osl[0][1])
4265
        if not info.valid:
4266
          break
4267

    
4268
        (node_variants, node_params, node_api) = osl[0][3:6]
4269
        if idx == 0:
4270
          # First entry
4271
          variants.update(node_variants)
4272
          parameters.update(node_params)
4273
          api_versions.update(node_api)
4274
        else:
4275
          # Filter out inconsistent values
4276
          variants.intersection_update(node_variants)
4277
          parameters.intersection_update(node_params)
4278
          api_versions.intersection_update(node_api)
4279

    
4280
      info.variants = list(variants)
4281
      info.parameters = list(parameters)
4282
      info.api_versions = list(api_versions)
4283

    
4284
      data[os_name] = info
4285

    
4286
    # Prepare data in requested order
4287
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4288
            if name in data]
4289

    
4290

    
4291
class LUOsDiagnose(NoHooksLU):
4292
  """Logical unit for OS diagnose/query.
4293

4294
  """
4295
  REQ_BGL = False
4296

    
4297
  @staticmethod
4298
  def _BuildFilter(fields, names):
4299
    """Builds a filter for querying OSes.
4300

4301
    """
4302
    name_filter = qlang.MakeSimpleFilter("name", names)
4303

    
4304
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4305
    # respective field is not requested
4306
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4307
                     for fname in ["hidden", "blacklisted"]
4308
                     if fname not in fields]
4309
    if "valid" not in fields:
4310
      status_filter.append([qlang.OP_TRUE, "valid"])
4311

    
4312
    if status_filter:
4313
      status_filter.insert(0, qlang.OP_AND)
4314
    else:
4315
      status_filter = None
4316

    
4317
    if name_filter and status_filter:
4318
      return [qlang.OP_AND, name_filter, status_filter]
4319
    elif name_filter:
4320
      return name_filter
4321
    else:
4322
      return status_filter
4323

    
4324
  def CheckArguments(self):
4325
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4326
                       self.op.output_fields, False)
4327

    
4328
  def ExpandNames(self):
4329
    self.oq.ExpandNames(self)
4330

    
4331
  def Exec(self, feedback_fn):
4332
    return self.oq.OldStyleQuery(self)
4333

    
4334

    
4335
class LUNodeRemove(LogicalUnit):
4336
  """Logical unit for removing a node.
4337

4338
  """
4339
  HPATH = "node-remove"
4340
  HTYPE = constants.HTYPE_NODE
4341

    
4342
  def BuildHooksEnv(self):
4343
    """Build hooks env.
4344

4345
    This doesn't run on the target node in the pre phase as a failed
4346
    node would then be impossible to remove.
4347

4348
    """
4349
    return {
4350
      "OP_TARGET": self.op.node_name,
4351
      "NODE_NAME": self.op.node_name,
4352
      }
4353

    
4354
  def BuildHooksNodes(self):
4355
    """Build hooks nodes.
4356

4357
    """
4358
    all_nodes = self.cfg.GetNodeList()
4359
    try:
4360
      all_nodes.remove(self.op.node_name)
4361
    except ValueError:
4362
      logging.warning("Node '%s', which is about to be removed, was not found"
4363
                      " in the list of all nodes", self.op.node_name)
4364
    return (all_nodes, all_nodes)
4365

    
4366
  def CheckPrereq(self):
4367
    """Check prerequisites.
4368

4369
    This checks:
4370
     - the node exists in the configuration
4371
     - it does not have primary or secondary instances
4372
     - it's not the master
4373

4374
    Any errors are signaled by raising errors.OpPrereqError.
4375

4376
    """
4377
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4378
    node = self.cfg.GetNodeInfo(self.op.node_name)
4379
    assert node is not None
4380

    
4381
    masternode = self.cfg.GetMasterNode()
4382
    if node.name == masternode:
4383
      raise errors.OpPrereqError("Node is the master node, failover to another"
4384
                                 " node is required", errors.ECODE_INVAL)
4385

    
4386
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4387
      if node.name in instance.all_nodes:
4388
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4389
                                   " please remove first" % instance_name,
4390
                                   errors.ECODE_INVAL)
4391
    self.op.node_name = node.name
4392
    self.node = node
4393

    
4394
  def Exec(self, feedback_fn):
4395
    """Removes the node from the cluster.
4396

4397
    """
4398
    node = self.node
4399
    logging.info("Stopping the node daemon and removing configs from node %s",
4400
                 node.name)
4401

    
4402
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4403

    
4404
    # Promote nodes to master candidate as needed
4405
    _AdjustCandidatePool(self, exceptions=[node.name])
4406
    self.context.RemoveNode(node.name)
4407

    
4408
    # Run post hooks on the node before it's removed
4409
    _RunPostHook(self, node.name)
4410

    
4411
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4412
    msg = result.fail_msg
4413
    if msg:
4414
      self.LogWarning("Errors encountered on the remote node while leaving"
4415
                      " the cluster: %s", msg)
4416

    
4417
    # Remove node from our /etc/hosts
4418
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4419
      master_node = self.cfg.GetMasterNode()
4420
      result = self.rpc.call_etc_hosts_modify(master_node,
4421
                                              constants.ETC_HOSTS_REMOVE,
4422
                                              node.name, None)
4423
      result.Raise("Can't update hosts file with new host data")
4424
      _RedistributeAncillaryFiles(self)
4425

    
4426

    
4427
class _NodeQuery(_QueryBase):
4428
  FIELDS = query.NODE_FIELDS
4429

    
4430
  def ExpandNames(self, lu):
4431
    lu.needed_locks = {}
4432
    lu.share_locks = _ShareAll()
4433

    
4434
    if self.names:
4435
      self.wanted = _GetWantedNodes(lu, self.names)
4436
    else:
4437
      self.wanted = locking.ALL_SET
4438

    
4439
    self.do_locking = (self.use_locking and
4440
                       query.NQ_LIVE in self.requested_data)
4441

    
4442
    if self.do_locking:
4443
      # If any non-static field is requested we need to lock the nodes
4444
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4445

    
4446
  def DeclareLocks(self, lu, level):
4447
    pass
4448

    
4449
  def _GetQueryData(self, lu):
4450
    """Computes the list of nodes and their attributes.
4451

4452
    """
4453
    all_info = lu.cfg.GetAllNodesInfo()
4454

    
4455
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4456

    
4457
    # Gather data as requested
4458
    if query.NQ_LIVE in self.requested_data:
4459
      # filter out non-vm_capable nodes
4460
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4461

    
4462
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4463
                                        lu.cfg.GetHypervisorType())
4464
      live_data = dict((name, nresult.payload)
4465
                       for (name, nresult) in node_data.items()
4466
                       if not nresult.fail_msg and nresult.payload)
4467
    else:
4468
      live_data = None
4469

    
4470
    if query.NQ_INST in self.requested_data:
4471
      node_to_primary = dict([(name, set()) for name in nodenames])
4472
      node_to_secondary = dict([(name, set()) for name in nodenames])
4473

    
4474
      inst_data = lu.cfg.GetAllInstancesInfo()
4475

    
4476
      for inst in inst_data.values():
4477
        if inst.primary_node in node_to_primary:
4478
          node_to_primary[inst.primary_node].add(inst.name)
4479
        for secnode in inst.secondary_nodes:
4480
          if secnode in node_to_secondary:
4481
            node_to_secondary[secnode].add(inst.name)
4482
    else:
4483
      node_to_primary = None
4484
      node_to_secondary = None
4485

    
4486
    if query.NQ_OOB in self.requested_data:
4487
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4488
                         for name, node in all_info.iteritems())
4489
    else:
4490
      oob_support = None
4491

    
4492
    if query.NQ_GROUP in self.requested_data:
4493
      groups = lu.cfg.GetAllNodeGroupsInfo()
4494
    else:
4495
      groups = {}
4496

    
4497
    return query.NodeQueryData([all_info[name] for name in nodenames],
4498
                               live_data, lu.cfg.GetMasterNode(),
4499
                               node_to_primary, node_to_secondary, groups,
4500
                               oob_support, lu.cfg.GetClusterInfo())
4501

    
4502

    
4503
class LUNodeQuery(NoHooksLU):
4504
  """Logical unit for querying nodes.
4505

4506
  """
4507
  # pylint: disable=W0142
4508
  REQ_BGL = False
4509

    
4510
  def CheckArguments(self):
4511
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4512
                         self.op.output_fields, self.op.use_locking)
4513

    
4514
  def ExpandNames(self):
4515
    self.nq.ExpandNames(self)
4516

    
4517
  def Exec(self, feedback_fn):
4518
    return self.nq.OldStyleQuery(self)
4519

    
4520

    
4521
class LUNodeQueryvols(NoHooksLU):
4522
  """Logical unit for getting volumes on node(s).
4523

4524
  """
4525
  REQ_BGL = False
4526
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4527
  _FIELDS_STATIC = utils.FieldSet("node")
4528

    
4529
  def CheckArguments(self):
4530
    _CheckOutputFields(static=self._FIELDS_STATIC,
4531
                       dynamic=self._FIELDS_DYNAMIC,
4532
                       selected=self.op.output_fields)
4533

    
4534
  def ExpandNames(self):
4535
    self.needed_locks = {}
4536
    self.share_locks[locking.LEVEL_NODE] = 1
4537
    if not self.op.nodes:
4538
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4539
    else:
4540
      self.needed_locks[locking.LEVEL_NODE] = \
4541
        _GetWantedNodes(self, self.op.nodes)
4542

    
4543
  def Exec(self, feedback_fn):
4544
    """Computes the list of nodes and their attributes.
4545

4546
    """
4547
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4548
    volumes = self.rpc.call_node_volumes(nodenames)
4549

    
4550
    ilist = self.cfg.GetAllInstancesInfo()
4551
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4552

    
4553
    output = []
4554
    for node in nodenames:
4555
      nresult = volumes[node]
4556
      if nresult.offline:
4557
        continue
4558
      msg = nresult.fail_msg
4559
      if msg:
4560
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4561
        continue
4562

    
4563
      node_vols = sorted(nresult.payload,
4564
                         key=operator.itemgetter("dev"))
4565

    
4566
      for vol in node_vols:
4567
        node_output = []
4568
        for field in self.op.output_fields:
4569
          if field == "node":
4570
            val = node
4571
          elif field == "phys":
4572
            val = vol["dev"]
4573
          elif field == "vg":
4574
            val = vol["vg"]
4575
          elif field == "name":
4576
            val = vol["name"]
4577
          elif field == "size":
4578
            val = int(float(vol["size"]))
4579
          elif field == "instance":
4580
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4581
          else:
4582
            raise errors.ParameterError(field)
4583
          node_output.append(str(val))
4584

    
4585
        output.append(node_output)
4586

    
4587
    return output
4588

    
4589

    
4590
class LUNodeQueryStorage(NoHooksLU):
4591
  """Logical unit for getting information on storage units on node(s).
4592

4593
  """
4594
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4595
  REQ_BGL = False
4596

    
4597
  def CheckArguments(self):
4598
    _CheckOutputFields(static=self._FIELDS_STATIC,
4599
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4600
                       selected=self.op.output_fields)
4601

    
4602
  def ExpandNames(self):
4603
    self.needed_locks = {}
4604
    self.share_locks[locking.LEVEL_NODE] = 1
4605

    
4606
    if self.op.nodes:
4607
      self.needed_locks[locking.LEVEL_NODE] = \
4608
        _GetWantedNodes(self, self.op.nodes)
4609
    else:
4610
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4611

    
4612
  def Exec(self, feedback_fn):
4613
    """Computes the list of nodes and their attributes.
4614

4615
    """
4616
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4617

    
4618
    # Always get name to sort by
4619
    if constants.SF_NAME in self.op.output_fields:
4620
      fields = self.op.output_fields[:]
4621
    else:
4622
      fields = [constants.SF_NAME] + self.op.output_fields
4623

    
4624
    # Never ask for node or type as it's only known to the LU
4625
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4626
      while extra in fields:
4627
        fields.remove(extra)
4628

    
4629
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4630
    name_idx = field_idx[constants.SF_NAME]
4631

    
4632
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4633
    data = self.rpc.call_storage_list(self.nodes,
4634
                                      self.op.storage_type, st_args,
4635
                                      self.op.name, fields)
4636

    
4637
    result = []
4638

    
4639
    for node in utils.NiceSort(self.nodes):
4640
      nresult = data[node]
4641
      if nresult.offline:
4642
        continue
4643

    
4644
      msg = nresult.fail_msg
4645
      if msg:
4646
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4647
        continue
4648

    
4649
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4650

    
4651
      for name in utils.NiceSort(rows.keys()):
4652
        row = rows[name]
4653

    
4654
        out = []
4655

    
4656
        for field in self.op.output_fields:
4657
          if field == constants.SF_NODE:
4658
            val = node
4659
          elif field == constants.SF_TYPE:
4660
            val = self.op.storage_type
4661
          elif field in field_idx:
4662
            val = row[field_idx[field]]
4663
          else:
4664
            raise errors.ParameterError(field)
4665

    
4666
          out.append(val)
4667

    
4668
        result.append(out)
4669

    
4670
    return result
4671

    
4672

    
4673
class _InstanceQuery(_QueryBase):
4674
  FIELDS = query.INSTANCE_FIELDS
4675

    
4676
  def ExpandNames(self, lu):
4677
    lu.needed_locks = {}
4678
    lu.share_locks = _ShareAll()
4679

    
4680
    if self.names:
4681
      self.wanted = _GetWantedInstances(lu, self.names)
4682
    else:
4683
      self.wanted = locking.ALL_SET
4684

    
4685
    self.do_locking = (self.use_locking and
4686
                       query.IQ_LIVE in self.requested_data)
4687
    if self.do_locking:
4688
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4689
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4690
      lu.needed_locks[locking.LEVEL_NODE] = []
4691
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4692

    
4693
    self.do_grouplocks = (self.do_locking and
4694
                          query.IQ_NODES in self.requested_data)
4695

    
4696
  def DeclareLocks(self, lu, level):
4697
    if self.do_locking:
4698
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4699
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4700

    
4701
        # Lock all groups used by instances optimistically; this requires going
4702
        # via the node before it's locked, requiring verification later on
4703
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4704
          set(group_uuid
4705
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4706
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4707
      elif level == locking.LEVEL_NODE:
4708
        lu._LockInstancesNodes() # pylint: disable=W0212
4709

    
4710
  @staticmethod
4711
  def _CheckGroupLocks(lu):
4712
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4713
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4714

    
4715
    # Check if node groups for locked instances are still correct
4716
    for instance_name in owned_instances:
4717
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4718

    
4719
  def _GetQueryData(self, lu):
4720
    """Computes the list of instances and their attributes.
4721

4722
    """
4723
    if self.do_grouplocks:
4724
      self._CheckGroupLocks(lu)
4725

    
4726
    cluster = lu.cfg.GetClusterInfo()
4727
    all_info = lu.cfg.GetAllInstancesInfo()
4728

    
4729
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4730

    
4731
    instance_list = [all_info[name] for name in instance_names]
4732
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4733
                                        for inst in instance_list)))
4734
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4735
    bad_nodes = []
4736
    offline_nodes = []
4737
    wrongnode_inst = set()
4738

    
4739
    # Gather data as requested
4740
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4741
      live_data = {}
4742
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4743
      for name in nodes:
4744
        result = node_data[name]
4745
        if result.offline:
4746
          # offline nodes will be in both lists
4747
          assert result.fail_msg
4748
          offline_nodes.append(name)
4749
        if result.fail_msg:
4750
          bad_nodes.append(name)
4751
        elif result.payload:
4752
          for inst in result.payload:
4753
            if inst in all_info:
4754
              if all_info[inst].primary_node == name:
4755
                live_data.update(result.payload)
4756
              else:
4757
                wrongnode_inst.add(inst)
4758
            else:
4759
              # orphan instance; we don't list it here as we don't
4760
              # handle this case yet in the output of instance listing
4761
              logging.warning("Orphan instance '%s' found on node %s",
4762
                              inst, name)
4763
        # else no instance is alive
4764
    else:
4765
      live_data = {}
4766

    
4767
    if query.IQ_DISKUSAGE in self.requested_data:
4768
      disk_usage = dict((inst.name,
4769
                         _ComputeDiskSize(inst.disk_template,
4770
                                          [{constants.IDISK_SIZE: disk.size}
4771
                                           for disk in inst.disks]))
4772
                        for inst in instance_list)
4773
    else:
4774
      disk_usage = None
4775

    
4776
    if query.IQ_CONSOLE in self.requested_data:
4777
      consinfo = {}
4778
      for inst in instance_list:
4779
        if inst.name in live_data:
4780
          # Instance is running
4781
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4782
        else:
4783
          consinfo[inst.name] = None
4784
      assert set(consinfo.keys()) == set(instance_names)
4785
    else:
4786
      consinfo = None
4787

    
4788
    if query.IQ_NODES in self.requested_data:
4789
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4790
                                            instance_list)))
4791
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4792
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4793
                    for uuid in set(map(operator.attrgetter("group"),
4794
                                        nodes.values())))
4795
    else:
4796
      nodes = None
4797
      groups = None
4798

    
4799
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4800
                                   disk_usage, offline_nodes, bad_nodes,
4801
                                   live_data, wrongnode_inst, consinfo,
4802
                                   nodes, groups)
4803

    
4804

    
4805
class LUQuery(NoHooksLU):
4806
  """Query for resources/items of a certain kind.
4807

4808
  """
4809
  # pylint: disable=W0142
4810
  REQ_BGL = False
4811

    
4812
  def CheckArguments(self):
4813
    qcls = _GetQueryImplementation(self.op.what)
4814

    
4815
    self.impl = qcls(self.op.filter, self.op.fields, self.op.use_locking)
4816

    
4817
  def ExpandNames(self):
4818
    self.impl.ExpandNames(self)
4819

    
4820
  def DeclareLocks(self, level):
4821
    self.impl.DeclareLocks(self, level)
4822

    
4823
  def Exec(self, feedback_fn):
4824
    return self.impl.NewStyleQuery(self)
4825

    
4826

    
4827
class LUQueryFields(NoHooksLU):
4828
  """Query for resources/items of a certain kind.
4829

4830
  """
4831
  # pylint: disable=W0142
4832
  REQ_BGL = False
4833

    
4834
  def CheckArguments(self):
4835
    self.qcls = _GetQueryImplementation(self.op.what)
4836

    
4837
  def ExpandNames(self):
4838
    self.needed_locks = {}
4839

    
4840
  def Exec(self, feedback_fn):
4841
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4842

    
4843

    
4844
class LUNodeModifyStorage(NoHooksLU):
4845
  """Logical unit for modifying a storage volume on a node.
4846

4847
  """
4848
  REQ_BGL = False
4849

    
4850
  def CheckArguments(self):
4851
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4852

    
4853
    storage_type = self.op.storage_type
4854

    
4855
    try:
4856
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4857
    except KeyError:
4858
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4859
                                 " modified" % storage_type,
4860
                                 errors.ECODE_INVAL)
4861

    
4862
    diff = set(self.op.changes.keys()) - modifiable
4863
    if diff:
4864
      raise errors.OpPrereqError("The following fields can not be modified for"
4865
                                 " storage units of type '%s': %r" %
4866
                                 (storage_type, list(diff)),
4867
                                 errors.ECODE_INVAL)
4868

    
4869
  def ExpandNames(self):
4870
    self.needed_locks = {
4871
      locking.LEVEL_NODE: self.op.node_name,
4872
      }
4873

    
4874
  def Exec(self, feedback_fn):
4875
    """Computes the list of nodes and their attributes.
4876

4877
    """
4878
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4879
    result = self.rpc.call_storage_modify(self.op.node_name,
4880
                                          self.op.storage_type, st_args,
4881
                                          self.op.name, self.op.changes)
4882
    result.Raise("Failed to modify storage unit '%s' on %s" %
4883
                 (self.op.name, self.op.node_name))
4884

    
4885

    
4886
class LUNodeAdd(LogicalUnit):
4887
  """Logical unit for adding node to the cluster.
4888

4889
  """
4890
  HPATH = "node-add"
4891
  HTYPE = constants.HTYPE_NODE
4892
  _NFLAGS = ["master_capable", "vm_capable"]
4893

    
4894
  def CheckArguments(self):
4895
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4896
    # validate/normalize the node name
4897
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4898
                                         family=self.primary_ip_family)
4899
    self.op.node_name = self.hostname.name
4900

    
4901
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4902
      raise errors.OpPrereqError("Cannot readd the master node",
4903
                                 errors.ECODE_STATE)
4904

    
4905
    if self.op.readd and self.op.group:
4906
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4907
                                 " being readded", errors.ECODE_INVAL)
4908

    
4909
  def BuildHooksEnv(self):
4910
    """Build hooks env.
4911

4912
    This will run on all nodes before, and on all nodes + the new node after.
4913

4914
    """
4915
    return {
4916
      "OP_TARGET": self.op.node_name,
4917
      "NODE_NAME": self.op.node_name,
4918
      "NODE_PIP": self.op.primary_ip,
4919
      "NODE_SIP": self.op.secondary_ip,
4920
      "MASTER_CAPABLE": str(self.op.master_capable),
4921
      "VM_CAPABLE": str(self.op.vm_capable),
4922
      }
4923

    
4924
  def BuildHooksNodes(self):
4925
    """Build hooks nodes.
4926

4927
    """
4928
    # Exclude added node
4929
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4930
    post_nodes = pre_nodes + [self.op.node_name, ]
4931

    
4932
    return (pre_nodes, post_nodes)
4933

    
4934
  def CheckPrereq(self):
4935
    """Check prerequisites.
4936

4937
    This checks:
4938
     - the new node is not already in the config
4939
     - it is resolvable
4940
     - its parameters (single/dual homed) matches the cluster
4941

4942
    Any errors are signaled by raising errors.OpPrereqError.
4943

4944
    """
4945
    cfg = self.cfg
4946
    hostname = self.hostname
4947
    node = hostname.name
4948
    primary_ip = self.op.primary_ip = hostname.ip
4949
    if self.op.secondary_ip is None:
4950
      if self.primary_ip_family == netutils.IP6Address.family:
4951
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4952
                                   " IPv4 address must be given as secondary",
4953
                                   errors.ECODE_INVAL)
4954
      self.op.secondary_ip = primary_ip
4955

    
4956
    secondary_ip = self.op.secondary_ip
4957
    if not netutils.IP4Address.IsValid(secondary_ip):
4958
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4959
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4960

    
4961
    node_list = cfg.GetNodeList()
4962
    if not self.op.readd and node in node_list:
4963
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4964
                                 node, errors.ECODE_EXISTS)
4965
    elif self.op.readd and node not in node_list:
4966
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4967
                                 errors.ECODE_NOENT)
4968

    
4969
    self.changed_primary_ip = False
4970

    
4971
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4972
      if self.op.readd and node == existing_node_name:
4973
        if existing_node.secondary_ip != secondary_ip:
4974
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4975
                                     " address configuration as before",
4976
                                     errors.ECODE_INVAL)
4977
        if existing_node.primary_ip != primary_ip:
4978
          self.changed_primary_ip = True
4979

    
4980
        continue
4981

    
4982
      if (existing_node.primary_ip == primary_ip or
4983
          existing_node.secondary_ip == primary_ip or
4984
          existing_node.primary_ip == secondary_ip or
4985
          existing_node.secondary_ip == secondary_ip):
4986
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4987
                                   " existing node %s" % existing_node.name,
4988
                                   errors.ECODE_NOTUNIQUE)
4989

    
4990
    # After this 'if' block, None is no longer a valid value for the
4991
    # _capable op attributes
4992
    if self.op.readd:
4993
      old_node = self.cfg.GetNodeInfo(node)
4994
      assert old_node is not None, "Can't retrieve locked node %s" % node
4995
      for attr in self._NFLAGS:
4996
        if getattr(self.op, attr) is None:
4997
          setattr(self.op, attr, getattr(old_node, attr))
4998
    else:
4999
      for attr in self._NFLAGS:
5000
        if getattr(self.op, attr) is None:
5001
          setattr(self.op, attr, True)
5002

    
5003
    if self.op.readd and not self.op.vm_capable:
5004
      pri, sec = cfg.GetNodeInstances(node)
5005
      if pri or sec:
5006
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5007
                                   " flag set to false, but it already holds"
5008
                                   " instances" % node,
5009
                                   errors.ECODE_STATE)
5010

    
5011
    # check that the type of the node (single versus dual homed) is the
5012
    # same as for the master
5013
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5014
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5015
    newbie_singlehomed = secondary_ip == primary_ip
5016
    if master_singlehomed != newbie_singlehomed:
5017
      if master_singlehomed:
5018
        raise errors.OpPrereqError("The master has no secondary ip but the"
5019
                                   " new node has one",
5020
                                   errors.ECODE_INVAL)
5021
      else:
5022
        raise errors.OpPrereqError("The master has a secondary ip but the"
5023
                                   " new node doesn't have one",
5024
                                   errors.ECODE_INVAL)
5025

    
5026
    # checks reachability
5027
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5028
      raise errors.OpPrereqError("Node not reachable by ping",
5029
                                 errors.ECODE_ENVIRON)
5030

    
5031
    if not newbie_singlehomed:
5032
      # check reachability from my secondary ip to newbie's secondary ip
5033
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5034
                           source=myself.secondary_ip):
5035
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5036
                                   " based ping to node daemon port",
5037
                                   errors.ECODE_ENVIRON)
5038

    
5039
    if self.op.readd:
5040
      exceptions = [node]
5041
    else:
5042
      exceptions = []
5043

    
5044
    if self.op.master_capable:
5045
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5046
    else:
5047
      self.master_candidate = False
5048

    
5049
    if self.op.readd:
5050
      self.new_node = old_node
5051
    else:
5052
      node_group = cfg.LookupNodeGroup(self.op.group)
5053
      self.new_node = objects.Node(name=node,
5054
                                   primary_ip=primary_ip,
5055
                                   secondary_ip=secondary_ip,
5056
                                   master_candidate=self.master_candidate,
5057
                                   offline=False, drained=False,
5058
                                   group=node_group)
5059

    
5060
    if self.op.ndparams:
5061
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5062

    
5063
  def Exec(self, feedback_fn):
5064
    """Adds the new node to the cluster.
5065

5066
    """
5067
    new_node = self.new_node
5068
    node = new_node.name
5069

    
5070
    # We adding a new node so we assume it's powered
5071
    new_node.powered = True
5072

    
5073
    # for re-adds, reset the offline/drained/master-candidate flags;
5074
    # we need to reset here, otherwise offline would prevent RPC calls
5075
    # later in the procedure; this also means that if the re-add
5076
    # fails, we are left with a non-offlined, broken node
5077
    if self.op.readd:
5078
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5079
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5080
      # if we demote the node, we do cleanup later in the procedure
5081
      new_node.master_candidate = self.master_candidate
5082
      if self.changed_primary_ip:
5083
        new_node.primary_ip = self.op.primary_ip
5084

    
5085
    # copy the master/vm_capable flags
5086
    for attr in self._NFLAGS:
5087
      setattr(new_node, attr, getattr(self.op, attr))
5088

    
5089
    # notify the user about any possible mc promotion
5090
    if new_node.master_candidate:
5091
      self.LogInfo("Node will be a master candidate")
5092

    
5093
    if self.op.ndparams:
5094
      new_node.ndparams = self.op.ndparams
5095
    else:
5096
      new_node.ndparams = {}
5097

    
5098
    # check connectivity
5099
    result = self.rpc.call_version([node])[node]
5100
    result.Raise("Can't get version information from node %s" % node)
5101
    if constants.PROTOCOL_VERSION == result.payload:
5102
      logging.info("Communication to node %s fine, sw version %s match",
5103
                   node, result.payload)
5104
    else:
5105
      raise errors.OpExecError("Version mismatch master version %s,"
5106
                               " node version %s" %
5107
                               (constants.PROTOCOL_VERSION, result.payload))
5108

    
5109
    # Add node to our /etc/hosts, and add key to known_hosts
5110
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5111
      master_node = self.cfg.GetMasterNode()
5112
      result = self.rpc.call_etc_hosts_modify(master_node,
5113
                                              constants.ETC_HOSTS_ADD,
5114
                                              self.hostname.name,
5115
                                              self.hostname.ip)
5116
      result.Raise("Can't update hosts file with new host data")
5117

    
5118
    if new_node.secondary_ip != new_node.primary_ip:
5119
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5120
                               False)
5121

    
5122
    node_verify_list = [self.cfg.GetMasterNode()]
5123
    node_verify_param = {
5124
      constants.NV_NODELIST: ([node], {}),
5125
      # TODO: do a node-net-test as well?
5126
    }
5127

    
5128
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5129
                                       self.cfg.GetClusterName())
5130
    for verifier in node_verify_list:
5131
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5132
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5133
      if nl_payload:
5134
        for failed in nl_payload:
5135
          feedback_fn("ssh/hostname verification failed"
5136
                      " (checking from %s): %s" %
5137
                      (verifier, nl_payload[failed]))
5138
        raise errors.OpExecError("ssh/hostname verification failed")
5139

    
5140
    if self.op.readd:
5141
      _RedistributeAncillaryFiles(self)
5142
      self.context.ReaddNode(new_node)
5143
      # make sure we redistribute the config
5144
      self.cfg.Update(new_node, feedback_fn)
5145
      # and make sure the new node will not have old files around
5146
      if not new_node.master_candidate:
5147
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5148
        msg = result.fail_msg
5149
        if msg:
5150
          self.LogWarning("Node failed to demote itself from master"
5151
                          " candidate status: %s" % msg)
5152
    else:
5153
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5154
                                  additional_vm=self.op.vm_capable)
5155
      self.context.AddNode(new_node, self.proc.GetECId())
5156

    
5157

    
5158
class LUNodeSetParams(LogicalUnit):
5159
  """Modifies the parameters of a node.
5160

5161
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5162
      to the node role (as _ROLE_*)
5163
  @cvar _R2F: a dictionary from node role to tuples of flags
5164
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5165

5166
  """
5167
  HPATH = "node-modify"
5168
  HTYPE = constants.HTYPE_NODE
5169
  REQ_BGL = False
5170
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5171
  _F2R = {
5172
    (True, False, False): _ROLE_CANDIDATE,
5173
    (False, True, False): _ROLE_DRAINED,
5174
    (False, False, True): _ROLE_OFFLINE,
5175
    (False, False, False): _ROLE_REGULAR,
5176
    }
5177
  _R2F = dict((v, k) for k, v in _F2R.items())
5178
  _FLAGS = ["master_candidate", "drained", "offline"]
5179

    
5180
  def CheckArguments(self):
5181
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5182
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5183
                self.op.master_capable, self.op.vm_capable,
5184
                self.op.secondary_ip, self.op.ndparams]
5185
    if all_mods.count(None) == len(all_mods):
5186
      raise errors.OpPrereqError("Please pass at least one modification",
5187
                                 errors.ECODE_INVAL)
5188
    if all_mods.count(True) > 1:
5189
      raise errors.OpPrereqError("Can't set the node into more than one"
5190
                                 " state at the same time",
5191
                                 errors.ECODE_INVAL)
5192

    
5193
    # Boolean value that tells us whether we might be demoting from MC
5194
    self.might_demote = (self.op.master_candidate == False or
5195
                         self.op.offline == True or
5196
                         self.op.drained == True or
5197
                         self.op.master_capable == False)
5198

    
5199
    if self.op.secondary_ip:
5200
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5201
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5202
                                   " address" % self.op.secondary_ip,
5203
                                   errors.ECODE_INVAL)
5204

    
5205
    self.lock_all = self.op.auto_promote and self.might_demote
5206
    self.lock_instances = self.op.secondary_ip is not None
5207

    
5208
  def ExpandNames(self):
5209
    if self.lock_all:
5210
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5211
    else:
5212
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5213

    
5214
    if self.lock_instances:
5215
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5216

    
5217
  def DeclareLocks(self, level):
5218
    # If we have locked all instances, before waiting to lock nodes, release
5219
    # all the ones living on nodes unrelated to the current operation.
5220
    if level == locking.LEVEL_NODE and self.lock_instances:
5221
      self.affected_instances = []
5222
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5223
        instances_keep = []
5224

    
5225
        # Build list of instances to release
5226
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5227
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5228
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5229
              self.op.node_name in instance.all_nodes):
5230
            instances_keep.append(instance_name)
5231
            self.affected_instances.append(instance)
5232

    
5233
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5234

    
5235
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5236
                set(instances_keep))
5237

    
5238
  def BuildHooksEnv(self):
5239
    """Build hooks env.
5240

5241
    This runs on the master node.
5242

5243
    """
5244
    return {
5245
      "OP_TARGET": self.op.node_name,
5246
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5247
      "OFFLINE": str(self.op.offline),
5248
      "DRAINED": str(self.op.drained),
5249
      "MASTER_CAPABLE": str(self.op.master_capable),
5250
      "VM_CAPABLE": str(self.op.vm_capable),
5251
      }
5252

    
5253
  def BuildHooksNodes(self):
5254
    """Build hooks nodes.
5255

5256
    """
5257
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5258
    return (nl, nl)
5259

    
5260
  def CheckPrereq(self):
5261
    """Check prerequisites.
5262

5263
    This only checks the instance list against the existing names.
5264

5265
    """
5266
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5267

    
5268
    if (self.op.master_candidate is not None or
5269
        self.op.drained is not None or
5270
        self.op.offline is not None):
5271
      # we can't change the master's node flags
5272
      if self.op.node_name == self.cfg.GetMasterNode():
5273
        raise errors.OpPrereqError("The master role can be changed"
5274
                                   " only via master-failover",
5275
                                   errors.ECODE_INVAL)
5276

    
5277
    if self.op.master_candidate and not node.master_capable:
5278
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5279
                                 " it a master candidate" % node.name,
5280
                                 errors.ECODE_STATE)
5281

    
5282
    if self.op.vm_capable == False:
5283
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5284
      if ipri or isec:
5285
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5286
                                   " the vm_capable flag" % node.name,
5287
                                   errors.ECODE_STATE)
5288

    
5289
    if node.master_candidate and self.might_demote and not self.lock_all:
5290
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5291
      # check if after removing the current node, we're missing master
5292
      # candidates
5293
      (mc_remaining, mc_should, _) = \
5294
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5295
      if mc_remaining < mc_should:
5296
        raise errors.OpPrereqError("Not enough master candidates, please"
5297
                                   " pass auto promote option to allow"
5298
                                   " promotion", errors.ECODE_STATE)
5299

    
5300
    self.old_flags = old_flags = (node.master_candidate,
5301
                                  node.drained, node.offline)
5302
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5303
    self.old_role = old_role = self._F2R[old_flags]
5304

    
5305
    # Check for ineffective changes
5306
    for attr in self._FLAGS:
5307
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5308
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5309
        setattr(self.op, attr, None)
5310

    
5311
    # Past this point, any flag change to False means a transition
5312
    # away from the respective state, as only real changes are kept
5313

    
5314
    # TODO: We might query the real power state if it supports OOB
5315
    if _SupportsOob(self.cfg, node):
5316
      if self.op.offline is False and not (node.powered or
5317
                                           self.op.powered == True):
5318
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5319
                                    " offline status can be reset") %
5320
                                   self.op.node_name)
5321
    elif self.op.powered is not None:
5322
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5323
                                  " as it does not support out-of-band"
5324
                                  " handling") % self.op.node_name)
5325

    
5326
    # If we're being deofflined/drained, we'll MC ourself if needed
5327
    if (self.op.drained == False or self.op.offline == False or
5328
        (self.op.master_capable and not node.master_capable)):
5329
      if _DecideSelfPromotion(self):
5330
        self.op.master_candidate = True
5331
        self.LogInfo("Auto-promoting node to master candidate")
5332

    
5333
    # If we're no longer master capable, we'll demote ourselves from MC
5334
    if self.op.master_capable == False and node.master_candidate:
5335
      self.LogInfo("Demoting from master candidate")
5336
      self.op.master_candidate = False
5337

    
5338
    # Compute new role
5339
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5340
    if self.op.master_candidate:
5341
      new_role = self._ROLE_CANDIDATE
5342
    elif self.op.drained:
5343
      new_role = self._ROLE_DRAINED
5344
    elif self.op.offline:
5345
      new_role = self._ROLE_OFFLINE
5346
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5347
      # False is still in new flags, which means we're un-setting (the
5348
      # only) True flag
5349
      new_role = self._ROLE_REGULAR
5350
    else: # no new flags, nothing, keep old role
5351
      new_role = old_role
5352

    
5353
    self.new_role = new_role
5354

    
5355
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5356
      # Trying to transition out of offline status
5357
      result = self.rpc.call_version([node.name])[node.name]
5358
      if result.fail_msg:
5359
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5360
                                   " to report its version: %s" %
5361
                                   (node.name, result.fail_msg),
5362
                                   errors.ECODE_STATE)
5363
      else:
5364
        self.LogWarning("Transitioning node from offline to online state"
5365
                        " without using re-add. Please make sure the node"
5366
                        " is healthy!")
5367

    
5368
    if self.op.secondary_ip:
5369
      # Ok even without locking, because this can't be changed by any LU
5370
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5371
      master_singlehomed = master.secondary_ip == master.primary_ip
5372
      if master_singlehomed and self.op.secondary_ip:
5373
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5374
                                   " homed cluster", errors.ECODE_INVAL)
5375

    
5376
      if node.offline:
5377
        if self.affected_instances:
5378
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5379
                                     " node has instances (%s) configured"
5380
                                     " to use it" % self.affected_instances)
5381
      else:
5382
        # On online nodes, check that no instances are running, and that
5383
        # the node has the new ip and we can reach it.
5384
        for instance in self.affected_instances:
5385
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5386

    
5387
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5388
        if master.name != node.name:
5389
          # check reachability from master secondary ip to new secondary ip
5390
          if not netutils.TcpPing(self.op.secondary_ip,
5391
                                  constants.DEFAULT_NODED_PORT,
5392
                                  source=master.secondary_ip):
5393
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5394
                                       " based ping to node daemon port",
5395
                                       errors.ECODE_ENVIRON)
5396

    
5397
    if self.op.ndparams:
5398
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5399
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5400
      self.new_ndparams = new_ndparams
5401

    
5402
  def Exec(self, feedback_fn):
5403
    """Modifies a node.
5404

5405
    """
5406
    node = self.node
5407
    old_role = self.old_role
5408
    new_role = self.new_role
5409

    
5410
    result = []
5411

    
5412
    if self.op.ndparams:
5413
      node.ndparams = self.new_ndparams
5414

    
5415
    if self.op.powered is not None:
5416
      node.powered = self.op.powered
5417

    
5418
    for attr in ["master_capable", "vm_capable"]:
5419
      val = getattr(self.op, attr)
5420
      if val is not None:
5421
        setattr(node, attr, val)
5422
        result.append((attr, str(val)))
5423

    
5424
    if new_role != old_role:
5425
      # Tell the node to demote itself, if no longer MC and not offline
5426
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5427
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5428
        if msg:
5429
          self.LogWarning("Node failed to demote itself: %s", msg)
5430

    
5431
      new_flags = self._R2F[new_role]
5432
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5433
        if of != nf:
5434
          result.append((desc, str(nf)))
5435
      (node.master_candidate, node.drained, node.offline) = new_flags
5436

    
5437
      # we locked all nodes, we adjust the CP before updating this node
5438
      if self.lock_all:
5439
        _AdjustCandidatePool(self, [node.name])
5440

    
5441
    if self.op.secondary_ip:
5442
      node.secondary_ip = self.op.secondary_ip
5443
      result.append(("secondary_ip", self.op.secondary_ip))
5444

    
5445
    # this will trigger configuration file update, if needed
5446
    self.cfg.Update(node, feedback_fn)
5447

    
5448
    # this will trigger job queue propagation or cleanup if the mc
5449
    # flag changed
5450
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5451
      self.context.ReaddNode(node)
5452

    
5453
    return result
5454

    
5455

    
5456
class LUNodePowercycle(NoHooksLU):
5457
  """Powercycles a node.
5458

5459
  """
5460
  REQ_BGL = False
5461

    
5462
  def CheckArguments(self):
5463
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5464
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5465
      raise errors.OpPrereqError("The node is the master and the force"
5466
                                 " parameter was not set",
5467
                                 errors.ECODE_INVAL)
5468

    
5469
  def ExpandNames(self):
5470
    """Locking for PowercycleNode.
5471

5472
    This is a last-resort option and shouldn't block on other
5473
    jobs. Therefore, we grab no locks.
5474

5475
    """
5476
    self.needed_locks = {}
5477

    
5478
  def Exec(self, feedback_fn):
5479
    """Reboots a node.
5480

5481
    """
5482
    result = self.rpc.call_node_powercycle(self.op.node_name,
5483
                                           self.cfg.GetHypervisorType())
5484
    result.Raise("Failed to schedule the reboot")
5485
    return result.payload
5486

    
5487

    
5488
class LUClusterQuery(NoHooksLU):
5489
  """Query cluster configuration.
5490

5491
  """
5492
  REQ_BGL = False
5493

    
5494
  def ExpandNames(self):
5495
    self.needed_locks = {}
5496

    
5497
  def Exec(self, feedback_fn):
5498
    """Return cluster config.
5499

5500
    """
5501
    cluster = self.cfg.GetClusterInfo()
5502
    os_hvp = {}
5503

    
5504
    # Filter just for enabled hypervisors
5505
    for os_name, hv_dict in cluster.os_hvp.items():
5506
      os_hvp[os_name] = {}
5507
      for hv_name, hv_params in hv_dict.items():
5508
        if hv_name in cluster.enabled_hypervisors:
5509
          os_hvp[os_name][hv_name] = hv_params
5510

    
5511
    # Convert ip_family to ip_version
5512
    primary_ip_version = constants.IP4_VERSION
5513
    if cluster.primary_ip_family == netutils.IP6Address.family:
5514
      primary_ip_version = constants.IP6_VERSION
5515

    
5516
    result = {
5517
      "software_version": constants.RELEASE_VERSION,
5518
      "protocol_version": constants.PROTOCOL_VERSION,
5519
      "config_version": constants.CONFIG_VERSION,
5520
      "os_api_version": max(constants.OS_API_VERSIONS),
5521
      "export_version": constants.EXPORT_VERSION,
5522
      "architecture": (platform.architecture()[0], platform.machine()),
5523
      "name": cluster.cluster_name,
5524
      "master": cluster.master_node,
5525
      "default_hypervisor": cluster.enabled_hypervisors[0],
5526
      "enabled_hypervisors": cluster.enabled_hypervisors,
5527
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5528
                        for hypervisor_name in cluster.enabled_hypervisors]),
5529
      "os_hvp": os_hvp,
5530
      "beparams": cluster.beparams,
5531
      "osparams": cluster.osparams,
5532
      "nicparams": cluster.nicparams,
5533
      "ndparams": cluster.ndparams,
5534
      "candidate_pool_size": cluster.candidate_pool_size,
5535
      "master_netdev": cluster.master_netdev,
5536
      "master_netmask": cluster.master_netmask,
5537
      "volume_group_name": cluster.volume_group_name,
5538
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5539
      "file_storage_dir": cluster.file_storage_dir,
5540
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5541
      "maintain_node_health": cluster.maintain_node_health,
5542
      "ctime": cluster.ctime,
5543
      "mtime": cluster.mtime,
5544
      "uuid": cluster.uuid,
5545
      "tags": list(cluster.GetTags()),
5546
      "uid_pool": cluster.uid_pool,
5547
      "default_iallocator": cluster.default_iallocator,
5548
      "reserved_lvs": cluster.reserved_lvs,
5549
      "primary_ip_version": primary_ip_version,
5550
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5551
      "hidden_os": cluster.hidden_os,
5552
      "blacklisted_os": cluster.blacklisted_os,
5553
      }
5554

    
5555
    return result
5556

    
5557

    
5558
class LUClusterConfigQuery(NoHooksLU):
5559
  """Return configuration values.
5560

5561
  """
5562
  REQ_BGL = False
5563
  _FIELDS_DYNAMIC = utils.FieldSet()
5564
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5565
                                  "watcher_pause", "volume_group_name")
5566

    
5567
  def CheckArguments(self):
5568
    _CheckOutputFields(static=self._FIELDS_STATIC,
5569
                       dynamic=self._FIELDS_DYNAMIC,
5570
                       selected=self.op.output_fields)
5571

    
5572
  def ExpandNames(self):
5573
    self.needed_locks = {}
5574

    
5575
  def Exec(self, feedback_fn):
5576
    """Dump a representation of the cluster config to the standard output.
5577

5578
    """
5579
    values = []
5580
    for field in self.op.output_fields:
5581
      if field == "cluster_name":
5582
        entry = self.cfg.GetClusterName()
5583
      elif field == "master_node":
5584
        entry = self.cfg.GetMasterNode()
5585
      elif field == "drain_flag":
5586
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5587
      elif field == "watcher_pause":
5588
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5589
      elif field == "volume_group_name":
5590
        entry = self.cfg.GetVGName()
5591
      else:
5592
        raise errors.ParameterError(field)
5593
      values.append(entry)
5594
    return values
5595

    
5596

    
5597
class LUInstanceActivateDisks(NoHooksLU):
5598
  """Bring up an instance's disks.
5599

5600
  """
5601
  REQ_BGL = False
5602

    
5603
  def ExpandNames(self):
5604
    self._ExpandAndLockInstance()
5605
    self.needed_locks[locking.LEVEL_NODE] = []
5606
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5607

    
5608
  def DeclareLocks(self, level):
5609
    if level == locking.LEVEL_NODE:
5610
      self._LockInstancesNodes()
5611

    
5612
  def CheckPrereq(self):
5613
    """Check prerequisites.
5614

5615
    This checks that the instance is in the cluster.
5616

5617
    """
5618
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5619
    assert self.instance is not None, \
5620
      "Cannot retrieve locked instance %s" % self.op.instance_name
5621
    _CheckNodeOnline(self, self.instance.primary_node)
5622

    
5623
  def Exec(self, feedback_fn):
5624
    """Activate the disks.
5625

5626
    """
5627
    disks_ok, disks_info = \
5628
              _AssembleInstanceDisks(self, self.instance,
5629
                                     ignore_size=self.op.ignore_size)
5630
    if not disks_ok:
5631
      raise errors.OpExecError("Cannot activate block devices")
5632

    
5633
    return disks_info
5634

    
5635

    
5636
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5637
                           ignore_size=False):
5638
  """Prepare the block devices for an instance.
5639

5640
  This sets up the block devices on all nodes.
5641

5642
  @type lu: L{LogicalUnit}
5643
  @param lu: the logical unit on whose behalf we execute
5644
  @type instance: L{objects.Instance}
5645
  @param instance: the instance for whose disks we assemble
5646
  @type disks: list of L{objects.Disk} or None
5647
  @param disks: which disks to assemble (or all, if None)
5648
  @type ignore_secondaries: boolean
5649
  @param ignore_secondaries: if true, errors on secondary nodes
5650
      won't result in an error return from the function
5651
  @type ignore_size: boolean
5652
  @param ignore_size: if true, the current known size of the disk
5653
      will not be used during the disk activation, useful for cases
5654
      when the size is wrong
5655
  @return: False if the operation failed, otherwise a list of
5656
      (host, instance_visible_name, node_visible_name)
5657
      with the mapping from node devices to instance devices
5658

5659
  """
5660
  device_info = []
5661
  disks_ok = True
5662
  iname = instance.name
5663
  disks = _ExpandCheckDisks(instance, disks)
5664

    
5665
  # With the two passes mechanism we try to reduce the window of
5666
  # opportunity for the race condition of switching DRBD to primary
5667
  # before handshaking occured, but we do not eliminate it
5668

    
5669
  # The proper fix would be to wait (with some limits) until the
5670
  # connection has been made and drbd transitions from WFConnection
5671
  # into any other network-connected state (Connected, SyncTarget,
5672
  # SyncSource, etc.)
5673

    
5674
  # 1st pass, assemble on all nodes in secondary mode
5675
  for idx, inst_disk in enumerate(disks):
5676
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5677
      if ignore_size:
5678
        node_disk = node_disk.Copy()
5679
        node_disk.UnsetSize()
5680
      lu.cfg.SetDiskID(node_disk, node)
5681
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5682
      msg = result.fail_msg
5683
      if msg:
5684
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5685
                           " (is_primary=False, pass=1): %s",
5686
                           inst_disk.iv_name, node, msg)
5687
        if not ignore_secondaries:
5688
          disks_ok = False
5689

    
5690
  # FIXME: race condition on drbd migration to primary
5691

    
5692
  # 2nd pass, do only the primary node
5693
  for idx, inst_disk in enumerate(disks):
5694
    dev_path = None
5695

    
5696
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5697
      if node != instance.primary_node:
5698
        continue
5699
      if ignore_size:
5700
        node_disk = node_disk.Copy()
5701
        node_disk.UnsetSize()
5702
      lu.cfg.SetDiskID(node_disk, node)
5703
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5704
      msg = result.fail_msg
5705
      if msg:
5706
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5707
                           " (is_primary=True, pass=2): %s",
5708
                           inst_disk.iv_name, node, msg)
5709
        disks_ok = False
5710
      else:
5711
        dev_path = result.payload
5712

    
5713
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5714

    
5715
  # leave the disks configured for the primary node
5716
  # this is a workaround that would be fixed better by
5717
  # improving the logical/physical id handling
5718
  for disk in disks:
5719
    lu.cfg.SetDiskID(disk, instance.primary_node)
5720

    
5721
  return disks_ok, device_info
5722

    
5723

    
5724
def _StartInstanceDisks(lu, instance, force):
5725
  """Start the disks of an instance.
5726

5727
  """
5728
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5729
                                           ignore_secondaries=force)
5730
  if not disks_ok:
5731
    _ShutdownInstanceDisks(lu, instance)
5732
    if force is not None and not force:
5733
      lu.proc.LogWarning("", hint="If the message above refers to a"
5734
                         " secondary node,"
5735
                         " you can retry the operation using '--force'.")
5736
    raise errors.OpExecError("Disk consistency error")
5737

    
5738

    
5739
class LUInstanceDeactivateDisks(NoHooksLU):
5740
  """Shutdown an instance's disks.
5741

5742
  """
5743
  REQ_BGL = False
5744

    
5745
  def ExpandNames(self):
5746
    self._ExpandAndLockInstance()
5747
    self.needed_locks[locking.LEVEL_NODE] = []
5748
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5749

    
5750
  def DeclareLocks(self, level):
5751
    if level == locking.LEVEL_NODE:
5752
      self._LockInstancesNodes()
5753

    
5754
  def CheckPrereq(self):
5755
    """Check prerequisites.
5756

5757
    This checks that the instance is in the cluster.
5758

5759
    """
5760
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5761
    assert self.instance is not None, \
5762
      "Cannot retrieve locked instance %s" % self.op.instance_name
5763

    
5764
  def Exec(self, feedback_fn):
5765
    """Deactivate the disks
5766

5767
    """
5768
    instance = self.instance
5769
    if self.op.force:
5770
      _ShutdownInstanceDisks(self, instance)
5771
    else:
5772
      _SafeShutdownInstanceDisks(self, instance)
5773

    
5774

    
5775
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5776
  """Shutdown block devices of an instance.
5777

5778
  This function checks if an instance is running, before calling
5779
  _ShutdownInstanceDisks.
5780

5781
  """
5782
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5783
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5784

    
5785

    
5786
def _ExpandCheckDisks(instance, disks):
5787
  """Return the instance disks selected by the disks list
5788

5789
  @type disks: list of L{objects.Disk} or None
5790
  @param disks: selected disks
5791
  @rtype: list of L{objects.Disk}
5792
  @return: selected instance disks to act on
5793

5794
  """
5795
  if disks is None:
5796
    return instance.disks
5797
  else:
5798
    if not set(disks).issubset(instance.disks):
5799
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5800
                                   " target instance")
5801
    return disks
5802

    
5803

    
5804
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5805
  """Shutdown block devices of an instance.
5806

5807
  This does the shutdown on all nodes of the instance.
5808

5809
  If the ignore_primary is false, errors on the primary node are
5810
  ignored.
5811

5812
  """
5813
  all_result = True
5814
  disks = _ExpandCheckDisks(instance, disks)
5815

    
5816
  for disk in disks:
5817
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5818
      lu.cfg.SetDiskID(top_disk, node)
5819
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5820
      msg = result.fail_msg
5821
      if msg:
5822
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5823
                      disk.iv_name, node, msg)
5824
        if ((node == instance.primary_node and not ignore_primary) or
5825
            (node != instance.primary_node and not result.offline)):
5826
          all_result = False
5827
  return all_result
5828

    
5829

    
5830
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5831
  """Checks if a node has enough free memory.
5832

5833
  This function check if a given node has the needed amount of free
5834
  memory. In case the node has less memory or we cannot get the
5835
  information from the node, this function raise an OpPrereqError
5836
  exception.
5837

5838
  @type lu: C{LogicalUnit}
5839
  @param lu: a logical unit from which we get configuration data
5840
  @type node: C{str}
5841
  @param node: the node to check
5842
  @type reason: C{str}
5843
  @param reason: string to use in the error message
5844
  @type requested: C{int}
5845
  @param requested: the amount of memory in MiB to check for
5846
  @type hypervisor_name: C{str}
5847
  @param hypervisor_name: the hypervisor to ask for memory stats
5848
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5849
      we cannot check the node
5850

5851
  """
5852
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5853
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5854
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5855
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5856
  if not isinstance(free_mem, int):
5857
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5858
                               " was '%s'" % (node, free_mem),
5859
                               errors.ECODE_ENVIRON)
5860
  if requested > free_mem:
5861
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5862
                               " needed %s MiB, available %s MiB" %
5863
                               (node, reason, requested, free_mem),
5864
                               errors.ECODE_NORES)
5865

    
5866

    
5867
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5868
  """Checks if nodes have enough free disk space in the all VGs.
5869

5870
  This function check if all given nodes have the needed amount of
5871
  free disk. In case any node has less disk or we cannot get the
5872
  information from the node, this function raise an OpPrereqError
5873
  exception.
5874

5875
  @type lu: C{LogicalUnit}
5876
  @param lu: a logical unit from which we get configuration data
5877
  @type nodenames: C{list}
5878
  @param nodenames: the list of node names to check
5879
  @type req_sizes: C{dict}
5880
  @param req_sizes: the hash of vg and corresponding amount of disk in
5881
      MiB to check for
5882
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5883
      or we cannot check the node
5884

5885
  """
5886
  for vg, req_size in req_sizes.items():
5887
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5888

    
5889

    
5890
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5891
  """Checks if nodes have enough free disk space in the specified VG.
5892

5893
  This function check if all given nodes have the needed amount of
5894
  free disk. In case any node has less disk or we cannot get the
5895
  information from the node, this function raise an OpPrereqError
5896
  exception.
5897

5898
  @type lu: C{LogicalUnit}
5899
  @param lu: a logical unit from which we get configuration data
5900
  @type nodenames: C{list}
5901
  @param nodenames: the list of node names to check
5902
  @type vg: C{str}
5903
  @param vg: the volume group to check
5904
  @type requested: C{int}
5905
  @param requested: the amount of disk in MiB to check for
5906
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5907
      or we cannot check the node
5908

5909
  """
5910
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5911
  for node in nodenames:
5912
    info = nodeinfo[node]
5913
    info.Raise("Cannot get current information from node %s" % node,
5914
               prereq=True, ecode=errors.ECODE_ENVIRON)
5915
    vg_free = info.payload.get("vg_free", None)
5916
    if not isinstance(vg_free, int):
5917
      raise errors.OpPrereqError("Can't compute free disk space on node"
5918
                                 " %s for vg %s, result was '%s'" %
5919
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5920
    if requested > vg_free:
5921
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5922
                                 " vg %s: required %d MiB, available %d MiB" %
5923
                                 (node, vg, requested, vg_free),
5924
                                 errors.ECODE_NORES)
5925

    
5926

    
5927
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
5928
  """Checks if nodes have enough physical CPUs
5929

5930
  This function checks if all given nodes have the needed number of
5931
  physical CPUs. In case any node has less CPUs or we cannot get the
5932
  information from the node, this function raises an OpPrereqError
5933
  exception.
5934

5935
  @type lu: C{LogicalUnit}
5936
  @param lu: a logical unit from which we get configuration data
5937
  @type nodenames: C{list}
5938
  @param nodenames: the list of node names to check
5939
  @type requested: C{int}
5940
  @param requested: the minimum acceptable number of physical CPUs
5941
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
5942
      or we cannot check the node
5943

5944
  """
5945
  nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
5946
  for node in nodenames:
5947
    info = nodeinfo[node]
5948
    info.Raise("Cannot get current information from node %s" % node,
5949
               prereq=True, ecode=errors.ECODE_ENVIRON)
5950
    num_cpus = info.payload.get("cpu_total", None)
5951
    if not isinstance(num_cpus, int):
5952
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
5953
                                 " on node %s, result was '%s'" %
5954
                                 (node, num_cpus), errors.ECODE_ENVIRON)
5955
    if requested > num_cpus:
5956
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
5957
                                 "required" % (node, num_cpus, requested),
5958
                                 errors.ECODE_NORES)
5959

    
5960

    
5961
class LUInstanceStartup(LogicalUnit):
5962
  """Starts an instance.
5963

5964
  """
5965
  HPATH = "instance-start"
5966
  HTYPE = constants.HTYPE_INSTANCE
5967
  REQ_BGL = False
5968

    
5969
  def CheckArguments(self):
5970
    # extra beparams
5971
    if self.op.beparams:
5972
      # fill the beparams dict
5973
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5974

    
5975
  def ExpandNames(self):
5976
    self._ExpandAndLockInstance()
5977

    
5978
  def BuildHooksEnv(self):
5979
    """Build hooks env.
5980

5981
    This runs on master, primary and secondary nodes of the instance.
5982

5983
    """
5984
    env = {
5985
      "FORCE": self.op.force,
5986
      }
5987

    
5988
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5989

    
5990
    return env
5991

    
5992
  def BuildHooksNodes(self):
5993
    """Build hooks nodes.
5994

5995
    """
5996
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5997
    return (nl, nl)
5998

    
5999
  def CheckPrereq(self):
6000
    """Check prerequisites.
6001

6002
    This checks that the instance is in the cluster.
6003

6004
    """
6005
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6006
    assert self.instance is not None, \
6007
      "Cannot retrieve locked instance %s" % self.op.instance_name
6008

    
6009
    # extra hvparams
6010
    if self.op.hvparams:
6011
      # check hypervisor parameter syntax (locally)
6012
      cluster = self.cfg.GetClusterInfo()
6013
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6014
      filled_hvp = cluster.FillHV(instance)
6015
      filled_hvp.update(self.op.hvparams)
6016
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6017
      hv_type.CheckParameterSyntax(filled_hvp)
6018
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6019

    
6020
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6021

    
6022
    if self.primary_offline and self.op.ignore_offline_nodes:
6023
      self.proc.LogWarning("Ignoring offline primary node")
6024

    
6025
      if self.op.hvparams or self.op.beparams:
6026
        self.proc.LogWarning("Overridden parameters are ignored")
6027
    else:
6028
      _CheckNodeOnline(self, instance.primary_node)
6029

    
6030
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6031

    
6032
      # check bridges existence
6033
      _CheckInstanceBridgesExist(self, instance)
6034

    
6035
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6036
                                                instance.name,
6037
                                                instance.hypervisor)
6038
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6039
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6040
      if not remote_info.payload: # not running already
6041
        _CheckNodeFreeMemory(self, instance.primary_node,
6042
                             "starting instance %s" % instance.name,
6043
                             bep[constants.BE_MEMORY], instance.hypervisor)
6044

    
6045
  def Exec(self, feedback_fn):
6046
    """Start the instance.
6047

6048
    """
6049
    instance = self.instance
6050
    force = self.op.force
6051

    
6052
    if not self.op.no_remember:
6053
      self.cfg.MarkInstanceUp(instance.name)
6054

    
6055
    if self.primary_offline:
6056
      assert self.op.ignore_offline_nodes
6057
      self.proc.LogInfo("Primary node offline, marked instance as started")
6058
    else:
6059
      node_current = instance.primary_node
6060

    
6061
      _StartInstanceDisks(self, instance, force)
6062

    
6063
      result = self.rpc.call_instance_start(node_current, instance,
6064
                                            self.op.hvparams, self.op.beparams,
6065
                                            self.op.startup_paused)
6066
      msg = result.fail_msg
6067
      if msg:
6068
        _ShutdownInstanceDisks(self, instance)
6069
        raise errors.OpExecError("Could not start instance: %s" % msg)
6070

    
6071

    
6072
class LUInstanceReboot(LogicalUnit):
6073
  """Reboot an instance.
6074

6075
  """
6076
  HPATH = "instance-reboot"
6077
  HTYPE = constants.HTYPE_INSTANCE
6078
  REQ_BGL = False
6079

    
6080
  def ExpandNames(self):
6081
    self._ExpandAndLockInstance()
6082

    
6083
  def BuildHooksEnv(self):
6084
    """Build hooks env.
6085

6086
    This runs on master, primary and secondary nodes of the instance.
6087

6088
    """
6089
    env = {
6090
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6091
      "REBOOT_TYPE": self.op.reboot_type,
6092
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6093
      }
6094

    
6095
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6096

    
6097
    return env
6098

    
6099
  def BuildHooksNodes(self):
6100
    """Build hooks nodes.
6101

6102
    """
6103
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6104
    return (nl, nl)
6105

    
6106
  def CheckPrereq(self):
6107
    """Check prerequisites.
6108

6109
    This checks that the instance is in the cluster.
6110

6111
    """
6112
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6113
    assert self.instance is not None, \
6114
      "Cannot retrieve locked instance %s" % self.op.instance_name
6115

    
6116
    _CheckNodeOnline(self, instance.primary_node)
6117

    
6118
    # check bridges existence
6119
    _CheckInstanceBridgesExist(self, instance)
6120

    
6121
  def Exec(self, feedback_fn):
6122
    """Reboot the instance.
6123

6124
    """
6125
    instance = self.instance
6126
    ignore_secondaries = self.op.ignore_secondaries
6127
    reboot_type = self.op.reboot_type
6128

    
6129
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6130
                                              instance.name,
6131
                                              instance.hypervisor)
6132
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6133
    instance_running = bool(remote_info.payload)
6134

    
6135
    node_current = instance.primary_node
6136

    
6137
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6138
                                            constants.INSTANCE_REBOOT_HARD]:
6139
      for disk in instance.disks:
6140
        self.cfg.SetDiskID(disk, node_current)
6141
      result = self.rpc.call_instance_reboot(node_current, instance,
6142
                                             reboot_type,
6143
                                             self.op.shutdown_timeout)
6144
      result.Raise("Could not reboot instance")
6145
    else:
6146
      if instance_running:
6147
        result = self.rpc.call_instance_shutdown(node_current, instance,
6148
                                                 self.op.shutdown_timeout)
6149
        result.Raise("Could not shutdown instance for full reboot")
6150
        _ShutdownInstanceDisks(self, instance)
6151
      else:
6152
        self.LogInfo("Instance %s was already stopped, starting now",
6153
                     instance.name)
6154
      _StartInstanceDisks(self, instance, ignore_secondaries)
6155
      result = self.rpc.call_instance_start(node_current, instance,
6156
                                            None, None, False)
6157
      msg = result.fail_msg
6158
      if msg:
6159
        _ShutdownInstanceDisks(self, instance)
6160
        raise errors.OpExecError("Could not start instance for"
6161
                                 " full reboot: %s" % msg)
6162

    
6163
    self.cfg.MarkInstanceUp(instance.name)
6164

    
6165

    
6166
class LUInstanceShutdown(LogicalUnit):
6167
  """Shutdown an instance.
6168

6169
  """
6170
  HPATH = "instance-stop"
6171
  HTYPE = constants.HTYPE_INSTANCE
6172
  REQ_BGL = False
6173

    
6174
  def ExpandNames(self):
6175
    self._ExpandAndLockInstance()
6176

    
6177
  def BuildHooksEnv(self):
6178
    """Build hooks env.
6179

6180
    This runs on master, primary and secondary nodes of the instance.
6181

6182
    """
6183
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6184
    env["TIMEOUT"] = self.op.timeout
6185
    return env
6186

    
6187
  def BuildHooksNodes(self):
6188
    """Build hooks nodes.
6189

6190
    """
6191
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6192
    return (nl, nl)
6193

    
6194
  def CheckPrereq(self):
6195
    """Check prerequisites.
6196

6197
    This checks that the instance is in the cluster.
6198

6199
    """
6200
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6201
    assert self.instance is not None, \
6202
      "Cannot retrieve locked instance %s" % self.op.instance_name
6203

    
6204
    self.primary_offline = \
6205
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6206

    
6207
    if self.primary_offline and self.op.ignore_offline_nodes:
6208
      self.proc.LogWarning("Ignoring offline primary node")
6209
    else:
6210
      _CheckNodeOnline(self, self.instance.primary_node)
6211

    
6212
  def Exec(self, feedback_fn):
6213
    """Shutdown the instance.
6214

6215
    """
6216
    instance = self.instance
6217
    node_current = instance.primary_node
6218
    timeout = self.op.timeout
6219

    
6220
    if not self.op.no_remember:
6221
      self.cfg.MarkInstanceDown(instance.name)
6222

    
6223
    if self.primary_offline:
6224
      assert self.op.ignore_offline_nodes
6225
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6226
    else:
6227
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6228
      msg = result.fail_msg
6229
      if msg:
6230
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6231

    
6232
      _ShutdownInstanceDisks(self, instance)
6233

    
6234

    
6235
class LUInstanceReinstall(LogicalUnit):
6236
  """Reinstall an instance.
6237

6238
  """
6239
  HPATH = "instance-reinstall"
6240
  HTYPE = constants.HTYPE_INSTANCE
6241
  REQ_BGL = False
6242

    
6243
  def ExpandNames(self):
6244
    self._ExpandAndLockInstance()
6245

    
6246
  def BuildHooksEnv(self):
6247
    """Build hooks env.
6248

6249
    This runs on master, primary and secondary nodes of the instance.
6250

6251
    """
6252
    return _BuildInstanceHookEnvByObject(self, self.instance)
6253

    
6254
  def BuildHooksNodes(self):
6255
    """Build hooks nodes.
6256

6257
    """
6258
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6259
    return (nl, nl)
6260

    
6261
  def CheckPrereq(self):
6262
    """Check prerequisites.
6263

6264
    This checks that the instance is in the cluster and is not running.
6265

6266
    """
6267
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6268
    assert instance is not None, \
6269
      "Cannot retrieve locked instance %s" % self.op.instance_name
6270
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6271
                     " offline, cannot reinstall")
6272
    for node in instance.secondary_nodes:
6273
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6274
                       " cannot reinstall")
6275

    
6276
    if instance.disk_template == constants.DT_DISKLESS:
6277
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6278
                                 self.op.instance_name,
6279
                                 errors.ECODE_INVAL)
6280
    _CheckInstanceDown(self, instance, "cannot reinstall")
6281

    
6282
    if self.op.os_type is not None:
6283
      # OS verification
6284
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6285
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6286
      instance_os = self.op.os_type
6287
    else:
6288
      instance_os = instance.os
6289

    
6290
    nodelist = list(instance.all_nodes)
6291

    
6292
    if self.op.osparams:
6293
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6294
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6295
      self.os_inst = i_osdict # the new dict (without defaults)
6296
    else:
6297
      self.os_inst = None
6298

    
6299
    self.instance = instance
6300

    
6301
  def Exec(self, feedback_fn):
6302
    """Reinstall the instance.
6303

6304
    """
6305
    inst = self.instance
6306

    
6307
    if self.op.os_type is not None:
6308
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6309
      inst.os = self.op.os_type
6310
      # Write to configuration
6311
      self.cfg.Update(inst, feedback_fn)
6312

    
6313
    _StartInstanceDisks(self, inst, None)
6314
    try:
6315
      feedback_fn("Running the instance OS create scripts...")
6316
      # FIXME: pass debug option from opcode to backend
6317
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6318
                                             self.op.debug_level,
6319
                                             osparams=self.os_inst)
6320
      result.Raise("Could not install OS for instance %s on node %s" %
6321
                   (inst.name, inst.primary_node))
6322
    finally:
6323
      _ShutdownInstanceDisks(self, inst)
6324

    
6325

    
6326
class LUInstanceRecreateDisks(LogicalUnit):
6327
  """Recreate an instance's missing disks.
6328

6329
  """
6330
  HPATH = "instance-recreate-disks"
6331
  HTYPE = constants.HTYPE_INSTANCE
6332
  REQ_BGL = False
6333

    
6334
  def CheckArguments(self):
6335
    # normalise the disk list
6336
    self.op.disks = sorted(frozenset(self.op.disks))
6337

    
6338
  def ExpandNames(self):
6339
    self._ExpandAndLockInstance()
6340
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6341
    if self.op.nodes:
6342
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6343
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6344
    else:
6345
      self.needed_locks[locking.LEVEL_NODE] = []
6346

    
6347
  def DeclareLocks(self, level):
6348
    if level == locking.LEVEL_NODE:
6349
      # if we replace the nodes, we only need to lock the old primary,
6350
      # otherwise we need to lock all nodes for disk re-creation
6351
      primary_only = bool(self.op.nodes)
6352
      self._LockInstancesNodes(primary_only=primary_only)
6353

    
6354
  def BuildHooksEnv(self):
6355
    """Build hooks env.
6356

6357
    This runs on master, primary and secondary nodes of the instance.
6358

6359
    """
6360
    return _BuildInstanceHookEnvByObject(self, self.instance)
6361

    
6362
  def BuildHooksNodes(self):
6363
    """Build hooks nodes.
6364

6365
    """
6366
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6367
    return (nl, nl)
6368

    
6369
  def CheckPrereq(self):
6370
    """Check prerequisites.
6371

6372
    This checks that the instance is in the cluster and is not running.
6373

6374
    """
6375
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6376
    assert instance is not None, \
6377
      "Cannot retrieve locked instance %s" % self.op.instance_name
6378
    if self.op.nodes:
6379
      if len(self.op.nodes) != len(instance.all_nodes):
6380
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6381
                                   " %d replacement nodes were specified" %
6382
                                   (instance.name, len(instance.all_nodes),
6383
                                    len(self.op.nodes)),
6384
                                   errors.ECODE_INVAL)
6385
      assert instance.disk_template != constants.DT_DRBD8 or \
6386
          len(self.op.nodes) == 2
6387
      assert instance.disk_template != constants.DT_PLAIN or \
6388
          len(self.op.nodes) == 1
6389
      primary_node = self.op.nodes[0]
6390
    else:
6391
      primary_node = instance.primary_node
6392
    _CheckNodeOnline(self, primary_node)
6393

    
6394
    if instance.disk_template == constants.DT_DISKLESS:
6395
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6396
                                 self.op.instance_name, errors.ECODE_INVAL)
6397
    # if we replace nodes *and* the old primary is offline, we don't
6398
    # check
6399
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6400
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6401
    if not (self.op.nodes and old_pnode.offline):
6402
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6403

    
6404
    if not self.op.disks:
6405
      self.op.disks = range(len(instance.disks))
6406
    else:
6407
      for idx in self.op.disks:
6408
        if idx >= len(instance.disks):
6409
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6410
                                     errors.ECODE_INVAL)
6411
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6412
      raise errors.OpPrereqError("Can't recreate disks partially and"
6413
                                 " change the nodes at the same time",
6414
                                 errors.ECODE_INVAL)
6415
    self.instance = instance
6416

    
6417
  def Exec(self, feedback_fn):
6418
    """Recreate the disks.
6419

6420
    """
6421
    instance = self.instance
6422

    
6423
    to_skip = []
6424
    mods = [] # keeps track of needed logical_id changes
6425

    
6426
    for idx, disk in enumerate(instance.disks):
6427
      if idx not in self.op.disks: # disk idx has not been passed in
6428
        to_skip.append(idx)
6429
        continue
6430
      # update secondaries for disks, if needed
6431
      if self.op.nodes:
6432
        if disk.dev_type == constants.LD_DRBD8:
6433
          # need to update the nodes and minors
6434
          assert len(self.op.nodes) == 2
6435
          assert len(disk.logical_id) == 6 # otherwise disk internals
6436
                                           # have changed
6437
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6438
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6439
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6440
                    new_minors[0], new_minors[1], old_secret)
6441
          assert len(disk.logical_id) == len(new_id)
6442
          mods.append((idx, new_id))
6443

    
6444
    # now that we have passed all asserts above, we can apply the mods
6445
    # in a single run (to avoid partial changes)
6446
    for idx, new_id in mods:
6447
      instance.disks[idx].logical_id = new_id
6448

    
6449
    # change primary node, if needed
6450
    if self.op.nodes:
6451
      instance.primary_node = self.op.nodes[0]
6452
      self.LogWarning("Changing the instance's nodes, you will have to"
6453
                      " remove any disks left on the older nodes manually")
6454

    
6455
    if self.op.nodes:
6456
      self.cfg.Update(instance, feedback_fn)
6457

    
6458
    _CreateDisks(self, instance, to_skip=to_skip)
6459

    
6460

    
6461
class LUInstanceRename(LogicalUnit):
6462
  """Rename an instance.
6463

6464
  """
6465
  HPATH = "instance-rename"
6466
  HTYPE = constants.HTYPE_INSTANCE
6467

    
6468
  def CheckArguments(self):
6469
    """Check arguments.
6470

6471
    """
6472
    if self.op.ip_check and not self.op.name_check:
6473
      # TODO: make the ip check more flexible and not depend on the name check
6474
      raise errors.OpPrereqError("IP address check requires a name check",
6475
                                 errors.ECODE_INVAL)
6476

    
6477
  def BuildHooksEnv(self):
6478
    """Build hooks env.
6479

6480
    This runs on master, primary and secondary nodes of the instance.
6481

6482
    """
6483
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6484
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6485
    return env
6486

    
6487
  def BuildHooksNodes(self):
6488
    """Build hooks nodes.
6489

6490
    """
6491
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6492
    return (nl, nl)
6493

    
6494
  def CheckPrereq(self):
6495
    """Check prerequisites.
6496

6497
    This checks that the instance is in the cluster and is not running.
6498

6499
    """
6500
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6501
                                                self.op.instance_name)
6502
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6503
    assert instance is not None
6504
    _CheckNodeOnline(self, instance.primary_node)
6505
    _CheckInstanceDown(self, instance, "cannot rename")
6506
    self.instance = instance
6507

    
6508
    new_name = self.op.new_name
6509
    if self.op.name_check:
6510
      hostname = netutils.GetHostname(name=new_name)
6511
      if hostname != new_name:
6512
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6513
                     hostname.name)
6514
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6515
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6516
                                    " same as given hostname '%s'") %
6517
                                    (hostname.name, self.op.new_name),
6518
                                    errors.ECODE_INVAL)
6519
      new_name = self.op.new_name = hostname.name
6520
      if (self.op.ip_check and
6521
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6522
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6523
                                   (hostname.ip, new_name),
6524
                                   errors.ECODE_NOTUNIQUE)
6525

    
6526
    instance_list = self.cfg.GetInstanceList()
6527
    if new_name in instance_list and new_name != instance.name:
6528
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6529
                                 new_name, errors.ECODE_EXISTS)
6530

    
6531
  def Exec(self, feedback_fn):
6532
    """Rename the instance.
6533

6534
    """
6535
    inst = self.instance
6536
    old_name = inst.name
6537

    
6538
    rename_file_storage = False
6539
    if (inst.disk_template in constants.DTS_FILEBASED and
6540
        self.op.new_name != inst.name):
6541
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6542
      rename_file_storage = True
6543

    
6544
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6545
    # Change the instance lock. This is definitely safe while we hold the BGL.
6546
    # Otherwise the new lock would have to be added in acquired mode.
6547
    assert self.REQ_BGL
6548
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6549
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6550

    
6551
    # re-read the instance from the configuration after rename
6552
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6553

    
6554
    if rename_file_storage:
6555
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6556
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6557
                                                     old_file_storage_dir,
6558
                                                     new_file_storage_dir)
6559
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6560
                   " (but the instance has been renamed in Ganeti)" %
6561
                   (inst.primary_node, old_file_storage_dir,
6562
                    new_file_storage_dir))
6563

    
6564
    _StartInstanceDisks(self, inst, None)
6565
    try:
6566
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6567
                                                 old_name, self.op.debug_level)
6568
      msg = result.fail_msg
6569
      if msg:
6570
        msg = ("Could not run OS rename script for instance %s on node %s"
6571
               " (but the instance has been renamed in Ganeti): %s" %
6572
               (inst.name, inst.primary_node, msg))
6573
        self.proc.LogWarning(msg)
6574
    finally:
6575
      _ShutdownInstanceDisks(self, inst)
6576

    
6577
    return inst.name
6578

    
6579

    
6580
class LUInstanceRemove(LogicalUnit):
6581
  """Remove an instance.
6582

6583
  """
6584
  HPATH = "instance-remove"
6585
  HTYPE = constants.HTYPE_INSTANCE
6586
  REQ_BGL = False
6587

    
6588
  def ExpandNames(self):
6589
    self._ExpandAndLockInstance()
6590
    self.needed_locks[locking.LEVEL_NODE] = []
6591
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6592

    
6593
  def DeclareLocks(self, level):
6594
    if level == locking.LEVEL_NODE:
6595
      self._LockInstancesNodes()
6596

    
6597
  def BuildHooksEnv(self):
6598
    """Build hooks env.
6599

6600
    This runs on master, primary and secondary nodes of the instance.
6601

6602
    """
6603
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6604
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6605
    return env
6606

    
6607
  def BuildHooksNodes(self):
6608
    """Build hooks nodes.
6609

6610
    """
6611
    nl = [self.cfg.GetMasterNode()]
6612
    nl_post = list(self.instance.all_nodes) + nl
6613
    return (nl, nl_post)
6614

    
6615
  def CheckPrereq(self):
6616
    """Check prerequisites.
6617

6618
    This checks that the instance is in the cluster.
6619

6620
    """
6621
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6622
    assert self.instance is not None, \
6623
      "Cannot retrieve locked instance %s" % self.op.instance_name
6624

    
6625
  def Exec(self, feedback_fn):
6626
    """Remove the instance.
6627

6628
    """
6629
    instance = self.instance
6630
    logging.info("Shutting down instance %s on node %s",
6631
                 instance.name, instance.primary_node)
6632

    
6633
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6634
                                             self.op.shutdown_timeout)
6635
    msg = result.fail_msg
6636
    if msg:
6637
      if self.op.ignore_failures:
6638
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6639
      else:
6640
        raise errors.OpExecError("Could not shutdown instance %s on"
6641
                                 " node %s: %s" %
6642
                                 (instance.name, instance.primary_node, msg))
6643

    
6644
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6645

    
6646

    
6647
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6648
  """Utility function to remove an instance.
6649

6650
  """
6651
  logging.info("Removing block devices for instance %s", instance.name)
6652

    
6653
  if not _RemoveDisks(lu, instance):
6654
    if not ignore_failures:
6655
      raise errors.OpExecError("Can't remove instance's disks")
6656
    feedback_fn("Warning: can't remove instance's disks")
6657

    
6658
  logging.info("Removing instance %s out of cluster config", instance.name)
6659

    
6660
  lu.cfg.RemoveInstance(instance.name)
6661

    
6662
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6663
    "Instance lock removal conflict"
6664

    
6665
  # Remove lock for the instance
6666
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6667

    
6668

    
6669
class LUInstanceQuery(NoHooksLU):
6670
  """Logical unit for querying instances.
6671

6672
  """
6673
  # pylint: disable=W0142
6674
  REQ_BGL = False
6675

    
6676
  def CheckArguments(self):
6677
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6678
                             self.op.output_fields, self.op.use_locking)
6679

    
6680
  def ExpandNames(self):
6681
    self.iq.ExpandNames(self)
6682

    
6683
  def DeclareLocks(self, level):
6684
    self.iq.DeclareLocks(self, level)
6685

    
6686
  def Exec(self, feedback_fn):
6687
    return self.iq.OldStyleQuery(self)
6688

    
6689

    
6690
class LUInstanceFailover(LogicalUnit):
6691
  """Failover an instance.
6692

6693
  """
6694
  HPATH = "instance-failover"
6695
  HTYPE = constants.HTYPE_INSTANCE
6696
  REQ_BGL = False
6697

    
6698
  def CheckArguments(self):
6699
    """Check the arguments.
6700

6701
    """
6702
    self.iallocator = getattr(self.op, "iallocator", None)
6703
    self.target_node = getattr(self.op, "target_node", None)
6704

    
6705
  def ExpandNames(self):
6706
    self._ExpandAndLockInstance()
6707

    
6708
    if self.op.target_node is not None:
6709
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6710

    
6711
    self.needed_locks[locking.LEVEL_NODE] = []
6712
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6713

    
6714
    ignore_consistency = self.op.ignore_consistency
6715
    shutdown_timeout = self.op.shutdown_timeout
6716
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6717
                                       cleanup=False,
6718
                                       failover=True,
6719
                                       ignore_consistency=ignore_consistency,
6720
                                       shutdown_timeout=shutdown_timeout)
6721
    self.tasklets = [self._migrater]
6722

    
6723
  def DeclareLocks(self, level):
6724
    if level == locking.LEVEL_NODE:
6725
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6726
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6727
        if self.op.target_node is None:
6728
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6729
        else:
6730
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6731
                                                   self.op.target_node]
6732
        del self.recalculate_locks[locking.LEVEL_NODE]
6733
      else:
6734
        self._LockInstancesNodes()
6735

    
6736
  def BuildHooksEnv(self):
6737
    """Build hooks env.
6738

6739
    This runs on master, primary and secondary nodes of the instance.
6740

6741
    """
6742
    instance = self._migrater.instance
6743
    source_node = instance.primary_node
6744
    target_node = self.op.target_node
6745
    env = {
6746
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6747
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6748
      "OLD_PRIMARY": source_node,
6749
      "NEW_PRIMARY": target_node,
6750
      }
6751

    
6752
    if instance.disk_template in constants.DTS_INT_MIRROR:
6753
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6754
      env["NEW_SECONDARY"] = source_node
6755
    else:
6756
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6757

    
6758
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6759

    
6760
    return env
6761

    
6762
  def BuildHooksNodes(self):
6763
    """Build hooks nodes.
6764

6765
    """
6766
    instance = self._migrater.instance
6767
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6768
    return (nl, nl + [instance.primary_node])
6769

    
6770

    
6771
class LUInstanceMigrate(LogicalUnit):
6772
  """Migrate an instance.
6773

6774
  This is migration without shutting down, compared to the failover,
6775
  which is done with shutdown.
6776

6777
  """
6778
  HPATH = "instance-migrate"
6779
  HTYPE = constants.HTYPE_INSTANCE
6780
  REQ_BGL = False
6781

    
6782
  def ExpandNames(self):
6783
    self._ExpandAndLockInstance()
6784

    
6785
    if self.op.target_node is not None:
6786
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6787

    
6788
    self.needed_locks[locking.LEVEL_NODE] = []
6789
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6790

    
6791
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6792
                                       cleanup=self.op.cleanup,
6793
                                       failover=False,
6794
                                       fallback=self.op.allow_failover)
6795
    self.tasklets = [self._migrater]
6796

    
6797
  def DeclareLocks(self, level):
6798
    if level == locking.LEVEL_NODE:
6799
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6800
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6801
        if self.op.target_node is None:
6802
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6803
        else:
6804
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6805
                                                   self.op.target_node]
6806
        del self.recalculate_locks[locking.LEVEL_NODE]
6807
      else:
6808
        self._LockInstancesNodes()
6809

    
6810
  def BuildHooksEnv(self):
6811
    """Build hooks env.
6812

6813
    This runs on master, primary and secondary nodes of the instance.
6814

6815
    """
6816
    instance = self._migrater.instance
6817
    source_node = instance.primary_node
6818
    target_node = self.op.target_node
6819
    env = _BuildInstanceHookEnvByObject(self, instance)
6820
    env.update({
6821
      "MIGRATE_LIVE": self._migrater.live,
6822
      "MIGRATE_CLEANUP": self.op.cleanup,
6823
      "OLD_PRIMARY": source_node,
6824
      "NEW_PRIMARY": target_node,
6825
      })
6826

    
6827
    if instance.disk_template in constants.DTS_INT_MIRROR:
6828
      env["OLD_SECONDARY"] = target_node
6829
      env["NEW_SECONDARY"] = source_node
6830
    else:
6831
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6832

    
6833
    return env
6834

    
6835
  def BuildHooksNodes(self):
6836
    """Build hooks nodes.
6837

6838
    """
6839
    instance = self._migrater.instance
6840
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6841
    return (nl, nl + [instance.primary_node])
6842

    
6843

    
6844
class LUInstanceMove(LogicalUnit):
6845
  """Move an instance by data-copying.
6846

6847
  """
6848
  HPATH = "instance-move"
6849
  HTYPE = constants.HTYPE_INSTANCE
6850
  REQ_BGL = False
6851

    
6852
  def ExpandNames(self):
6853
    self._ExpandAndLockInstance()
6854
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6855
    self.op.target_node = target_node
6856
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6857
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6858

    
6859
  def DeclareLocks(self, level):
6860
    if level == locking.LEVEL_NODE:
6861
      self._LockInstancesNodes(primary_only=True)
6862

    
6863
  def BuildHooksEnv(self):
6864
    """Build hooks env.
6865

6866
    This runs on master, primary and secondary nodes of the instance.
6867

6868
    """
6869
    env = {
6870
      "TARGET_NODE": self.op.target_node,
6871
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6872
      }
6873
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6874
    return env
6875

    
6876
  def BuildHooksNodes(self):
6877
    """Build hooks nodes.
6878

6879
    """
6880
    nl = [
6881
      self.cfg.GetMasterNode(),
6882
      self.instance.primary_node,
6883
      self.op.target_node,
6884
      ]
6885
    return (nl, nl)
6886

    
6887
  def CheckPrereq(self):
6888
    """Check prerequisites.
6889

6890
    This checks that the instance is in the cluster.
6891

6892
    """
6893
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6894
    assert self.instance is not None, \
6895
      "Cannot retrieve locked instance %s" % self.op.instance_name
6896

    
6897
    node = self.cfg.GetNodeInfo(self.op.target_node)
6898
    assert node is not None, \
6899
      "Cannot retrieve locked node %s" % self.op.target_node
6900

    
6901
    self.target_node = target_node = node.name
6902

    
6903
    if target_node == instance.primary_node:
6904
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6905
                                 (instance.name, target_node),
6906
                                 errors.ECODE_STATE)
6907

    
6908
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6909

    
6910
    for idx, dsk in enumerate(instance.disks):
6911
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6912
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6913
                                   " cannot copy" % idx, errors.ECODE_STATE)
6914

    
6915
    _CheckNodeOnline(self, target_node)
6916
    _CheckNodeNotDrained(self, target_node)
6917
    _CheckNodeVmCapable(self, target_node)
6918

    
6919
    if instance.admin_up:
6920
      # check memory requirements on the secondary node
6921
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6922
                           instance.name, bep[constants.BE_MEMORY],
6923
                           instance.hypervisor)
6924
    else:
6925
      self.LogInfo("Not checking memory on the secondary node as"
6926
                   " instance will not be started")
6927

    
6928
    # check bridge existance
6929
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6930

    
6931
  def Exec(self, feedback_fn):
6932
    """Move an instance.
6933

6934
    The move is done by shutting it down on its present node, copying
6935
    the data over (slow) and starting it on the new node.
6936

6937
    """
6938
    instance = self.instance
6939

    
6940
    source_node = instance.primary_node
6941
    target_node = self.target_node
6942

    
6943
    self.LogInfo("Shutting down instance %s on source node %s",
6944
                 instance.name, source_node)
6945

    
6946
    result = self.rpc.call_instance_shutdown(source_node, instance,
6947
                                             self.op.shutdown_timeout)
6948
    msg = result.fail_msg
6949
    if msg:
6950
      if self.op.ignore_consistency:
6951
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6952
                             " Proceeding anyway. Please make sure node"
6953
                             " %s is down. Error details: %s",
6954
                             instance.name, source_node, source_node, msg)
6955
      else:
6956
        raise errors.OpExecError("Could not shutdown instance %s on"
6957
                                 " node %s: %s" %
6958
                                 (instance.name, source_node, msg))
6959

    
6960
    # create the target disks
6961
    try:
6962
      _CreateDisks(self, instance, target_node=target_node)
6963
    except errors.OpExecError:
6964
      self.LogWarning("Device creation failed, reverting...")
6965
      try:
6966
        _RemoveDisks(self, instance, target_node=target_node)
6967
      finally:
6968
        self.cfg.ReleaseDRBDMinors(instance.name)
6969
        raise
6970

    
6971
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6972

    
6973
    errs = []
6974
    # activate, get path, copy the data over
6975
    for idx, disk in enumerate(instance.disks):
6976
      self.LogInfo("Copying data for disk %d", idx)
6977
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6978
                                               instance.name, True, idx)
6979
      if result.fail_msg:
6980
        self.LogWarning("Can't assemble newly created disk %d: %s",
6981
                        idx, result.fail_msg)
6982
        errs.append(result.fail_msg)
6983
        break
6984
      dev_path = result.payload
6985
      result = self.rpc.call_blockdev_export(source_node, disk,
6986
                                             target_node, dev_path,
6987
                                             cluster_name)
6988
      if result.fail_msg:
6989
        self.LogWarning("Can't copy data over for disk %d: %s",
6990
                        idx, result.fail_msg)
6991
        errs.append(result.fail_msg)
6992
        break
6993

    
6994
    if errs:
6995
      self.LogWarning("Some disks failed to copy, aborting")
6996
      try:
6997
        _RemoveDisks(self, instance, target_node=target_node)
6998
      finally:
6999
        self.cfg.ReleaseDRBDMinors(instance.name)
7000
        raise errors.OpExecError("Errors during disk copy: %s" %
7001
                                 (",".join(errs),))
7002

    
7003
    instance.primary_node = target_node
7004
    self.cfg.Update(instance, feedback_fn)
7005

    
7006
    self.LogInfo("Removing the disks on the original node")
7007
    _RemoveDisks(self, instance, target_node=source_node)
7008

    
7009
    # Only start the instance if it's marked as up
7010
    if instance.admin_up:
7011
      self.LogInfo("Starting instance %s on node %s",
7012
                   instance.name, target_node)
7013

    
7014
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7015
                                           ignore_secondaries=True)
7016
      if not disks_ok:
7017
        _ShutdownInstanceDisks(self, instance)
7018
        raise errors.OpExecError("Can't activate the instance's disks")
7019

    
7020
      result = self.rpc.call_instance_start(target_node, instance,
7021
                                            None, None, False)
7022
      msg = result.fail_msg
7023
      if msg:
7024
        _ShutdownInstanceDisks(self, instance)
7025
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7026
                                 (instance.name, target_node, msg))
7027

    
7028

    
7029
class LUNodeMigrate(LogicalUnit):
7030
  """Migrate all instances from a node.
7031

7032
  """
7033
  HPATH = "node-migrate"
7034
  HTYPE = constants.HTYPE_NODE
7035
  REQ_BGL = False
7036

    
7037
  def CheckArguments(self):
7038
    pass
7039

    
7040
  def ExpandNames(self):
7041
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7042

    
7043
    self.share_locks = _ShareAll()
7044
    self.needed_locks = {
7045
      locking.LEVEL_NODE: [self.op.node_name],
7046
      }
7047

    
7048
  def BuildHooksEnv(self):
7049
    """Build hooks env.
7050

7051
    This runs on the master, the primary and all the secondaries.
7052

7053
    """
7054
    return {
7055
      "NODE_NAME": self.op.node_name,
7056
      }
7057

    
7058
  def BuildHooksNodes(self):
7059
    """Build hooks nodes.
7060

7061
    """
7062
    nl = [self.cfg.GetMasterNode()]
7063
    return (nl, nl)
7064

    
7065
  def CheckPrereq(self):
7066
    pass
7067

    
7068
  def Exec(self, feedback_fn):
7069
    # Prepare jobs for migration instances
7070
    jobs = [
7071
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7072
                                 mode=self.op.mode,
7073
                                 live=self.op.live,
7074
                                 iallocator=self.op.iallocator,
7075
                                 target_node=self.op.target_node)]
7076
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7077
      ]
7078

    
7079
    # TODO: Run iallocator in this opcode and pass correct placement options to
7080
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7081
    # running the iallocator and the actual migration, a good consistency model
7082
    # will have to be found.
7083

    
7084
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7085
            frozenset([self.op.node_name]))
7086

    
7087
    return ResultWithJobs(jobs)
7088

    
7089

    
7090
class TLMigrateInstance(Tasklet):
7091
  """Tasklet class for instance migration.
7092

7093
  @type live: boolean
7094
  @ivar live: whether the migration will be done live or non-live;
7095
      this variable is initalized only after CheckPrereq has run
7096
  @type cleanup: boolean
7097
  @ivar cleanup: Wheater we cleanup from a failed migration
7098
  @type iallocator: string
7099
  @ivar iallocator: The iallocator used to determine target_node
7100
  @type target_node: string
7101
  @ivar target_node: If given, the target_node to reallocate the instance to
7102
  @type failover: boolean
7103
  @ivar failover: Whether operation results in failover or migration
7104
  @type fallback: boolean
7105
  @ivar fallback: Whether fallback to failover is allowed if migration not
7106
                  possible
7107
  @type ignore_consistency: boolean
7108
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7109
                            and target node
7110
  @type shutdown_timeout: int
7111
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7112

7113
  """
7114

    
7115
  # Constants
7116
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7117
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7118

    
7119
  def __init__(self, lu, instance_name, cleanup=False,
7120
               failover=False, fallback=False,
7121
               ignore_consistency=False,
7122
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7123
    """Initializes this class.
7124

7125
    """
7126
    Tasklet.__init__(self, lu)
7127

    
7128
    # Parameters
7129
    self.instance_name = instance_name
7130
    self.cleanup = cleanup
7131
    self.live = False # will be overridden later
7132
    self.failover = failover
7133
    self.fallback = fallback
7134
    self.ignore_consistency = ignore_consistency
7135
    self.shutdown_timeout = shutdown_timeout
7136

    
7137
  def CheckPrereq(self):
7138
    """Check prerequisites.
7139

7140
    This checks that the instance is in the cluster.
7141

7142
    """
7143
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7144
    instance = self.cfg.GetInstanceInfo(instance_name)
7145
    assert instance is not None
7146
    self.instance = instance
7147

    
7148
    if (not self.cleanup and not instance.admin_up and not self.failover and
7149
        self.fallback):
7150
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7151
                      " to failover")
7152
      self.failover = True
7153

    
7154
    if instance.disk_template not in constants.DTS_MIRRORED:
7155
      if self.failover:
7156
        text = "failovers"
7157
      else:
7158
        text = "migrations"
7159
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7160
                                 " %s" % (instance.disk_template, text),
7161
                                 errors.ECODE_STATE)
7162

    
7163
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7164
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7165

    
7166
      if self.lu.op.iallocator:
7167
        self._RunAllocator()
7168
      else:
7169
        # We set set self.target_node as it is required by
7170
        # BuildHooksEnv
7171
        self.target_node = self.lu.op.target_node
7172

    
7173
      # self.target_node is already populated, either directly or by the
7174
      # iallocator run
7175
      target_node = self.target_node
7176
      if self.target_node == instance.primary_node:
7177
        raise errors.OpPrereqError("Cannot migrate instance %s"
7178
                                   " to its primary (%s)" %
7179
                                   (instance.name, instance.primary_node))
7180

    
7181
      if len(self.lu.tasklets) == 1:
7182
        # It is safe to release locks only when we're the only tasklet
7183
        # in the LU
7184
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7185
                      keep=[instance.primary_node, self.target_node])
7186

    
7187
    else:
7188
      secondary_nodes = instance.secondary_nodes
7189
      if not secondary_nodes:
7190
        raise errors.ConfigurationError("No secondary node but using"
7191
                                        " %s disk template" %
7192
                                        instance.disk_template)
7193
      target_node = secondary_nodes[0]
7194
      if self.lu.op.iallocator or (self.lu.op.target_node and
7195
                                   self.lu.op.target_node != target_node):
7196
        if self.failover:
7197
          text = "failed over"
7198
        else:
7199
          text = "migrated"
7200
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7201
                                   " be %s to arbitrary nodes"
7202
                                   " (neither an iallocator nor a target"
7203
                                   " node can be passed)" %
7204
                                   (instance.disk_template, text),
7205
                                   errors.ECODE_INVAL)
7206

    
7207
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7208

    
7209
    # check memory requirements on the secondary node
7210
    if not self.failover or instance.admin_up:
7211
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7212
                           instance.name, i_be[constants.BE_MEMORY],
7213
                           instance.hypervisor)
7214
    else:
7215
      self.lu.LogInfo("Not checking memory on the secondary node as"
7216
                      " instance will not be started")
7217

    
7218
    # check bridge existance
7219
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7220

    
7221
    if not self.cleanup:
7222
      _CheckNodeNotDrained(self.lu, target_node)
7223
      if not self.failover:
7224
        result = self.rpc.call_instance_migratable(instance.primary_node,
7225
                                                   instance)
7226
        if result.fail_msg and self.fallback:
7227
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7228
                          " failover")
7229
          self.failover = True
7230
        else:
7231
          result.Raise("Can't migrate, please use failover",
7232
                       prereq=True, ecode=errors.ECODE_STATE)
7233

    
7234
    assert not (self.failover and self.cleanup)
7235

    
7236
    if not self.failover:
7237
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7238
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7239
                                   " parameters are accepted",
7240
                                   errors.ECODE_INVAL)
7241
      if self.lu.op.live is not None:
7242
        if self.lu.op.live:
7243
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7244
        else:
7245
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7246
        # reset the 'live' parameter to None so that repeated
7247
        # invocations of CheckPrereq do not raise an exception
7248
        self.lu.op.live = None
7249
      elif self.lu.op.mode is None:
7250
        # read the default value from the hypervisor
7251
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7252
                                                skip_globals=False)
7253
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7254

    
7255
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7256
    else:
7257
      # Failover is never live
7258
      self.live = False
7259

    
7260
  def _RunAllocator(self):
7261
    """Run the allocator based on input opcode.
7262

7263
    """
7264
    ial = IAllocator(self.cfg, self.rpc,
7265
                     mode=constants.IALLOCATOR_MODE_RELOC,
7266
                     name=self.instance_name,
7267
                     # TODO See why hail breaks with a single node below
7268
                     relocate_from=[self.instance.primary_node,
7269
                                    self.instance.primary_node],
7270
                     )
7271

    
7272
    ial.Run(self.lu.op.iallocator)
7273

    
7274
    if not ial.success:
7275
      raise errors.OpPrereqError("Can't compute nodes using"
7276
                                 " iallocator '%s': %s" %
7277
                                 (self.lu.op.iallocator, ial.info),
7278
                                 errors.ECODE_NORES)
7279
    if len(ial.result) != ial.required_nodes:
7280
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7281
                                 " of nodes (%s), required %s" %
7282
                                 (self.lu.op.iallocator, len(ial.result),
7283
                                  ial.required_nodes), errors.ECODE_FAULT)
7284
    self.target_node = ial.result[0]
7285
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7286
                 self.instance_name, self.lu.op.iallocator,
7287
                 utils.CommaJoin(ial.result))
7288

    
7289
  def _WaitUntilSync(self):
7290
    """Poll with custom rpc for disk sync.
7291

7292
    This uses our own step-based rpc call.
7293

7294
    """
7295
    self.feedback_fn("* wait until resync is done")
7296
    all_done = False
7297
    while not all_done:
7298
      all_done = True
7299
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7300
                                            self.nodes_ip,
7301
                                            self.instance.disks)
7302
      min_percent = 100
7303
      for node, nres in result.items():
7304
        nres.Raise("Cannot resync disks on node %s" % node)
7305
        node_done, node_percent = nres.payload
7306
        all_done = all_done and node_done
7307
        if node_percent is not None:
7308
          min_percent = min(min_percent, node_percent)
7309
      if not all_done:
7310
        if min_percent < 100:
7311
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7312
        time.sleep(2)
7313

    
7314
  def _EnsureSecondary(self, node):
7315
    """Demote a node to secondary.
7316

7317
    """
7318
    self.feedback_fn("* switching node %s to secondary mode" % node)
7319

    
7320
    for dev in self.instance.disks:
7321
      self.cfg.SetDiskID(dev, node)
7322

    
7323
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7324
                                          self.instance.disks)
7325
    result.Raise("Cannot change disk to secondary on node %s" % node)
7326

    
7327
  def _GoStandalone(self):
7328
    """Disconnect from the network.
7329

7330
    """
7331
    self.feedback_fn("* changing into standalone mode")
7332
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7333
                                               self.instance.disks)
7334
    for node, nres in result.items():
7335
      nres.Raise("Cannot disconnect disks node %s" % node)
7336

    
7337
  def _GoReconnect(self, multimaster):
7338
    """Reconnect to the network.
7339

7340
    """
7341
    if multimaster:
7342
      msg = "dual-master"
7343
    else:
7344
      msg = "single-master"
7345
    self.feedback_fn("* changing disks into %s mode" % msg)
7346
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7347
                                           self.instance.disks,
7348
                                           self.instance.name, multimaster)
7349
    for node, nres in result.items():
7350
      nres.Raise("Cannot change disks config on node %s" % node)
7351

    
7352
  def _ExecCleanup(self):
7353
    """Try to cleanup after a failed migration.
7354

7355
    The cleanup is done by:
7356
      - check that the instance is running only on one node
7357
        (and update the config if needed)
7358
      - change disks on its secondary node to secondary
7359
      - wait until disks are fully synchronized
7360
      - disconnect from the network
7361
      - change disks into single-master mode
7362
      - wait again until disks are fully synchronized
7363

7364
    """
7365
    instance = self.instance
7366
    target_node = self.target_node
7367
    source_node = self.source_node
7368

    
7369
    # check running on only one node
7370
    self.feedback_fn("* checking where the instance actually runs"
7371
                     " (if this hangs, the hypervisor might be in"
7372
                     " a bad state)")
7373
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7374
    for node, result in ins_l.items():
7375
      result.Raise("Can't contact node %s" % node)
7376

    
7377
    runningon_source = instance.name in ins_l[source_node].payload
7378
    runningon_target = instance.name in ins_l[target_node].payload
7379

    
7380
    if runningon_source and runningon_target:
7381
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7382
                               " or the hypervisor is confused; you will have"
7383
                               " to ensure manually that it runs only on one"
7384
                               " and restart this operation")
7385

    
7386
    if not (runningon_source or runningon_target):
7387
      raise errors.OpExecError("Instance does not seem to be running at all;"
7388
                               " in this case it's safer to repair by"
7389
                               " running 'gnt-instance stop' to ensure disk"
7390
                               " shutdown, and then restarting it")
7391

    
7392
    if runningon_target:
7393
      # the migration has actually succeeded, we need to update the config
7394
      self.feedback_fn("* instance running on secondary node (%s),"
7395
                       " updating config" % target_node)
7396
      instance.primary_node = target_node
7397
      self.cfg.Update(instance, self.feedback_fn)
7398
      demoted_node = source_node
7399
    else:
7400
      self.feedback_fn("* instance confirmed to be running on its"
7401
                       " primary node (%s)" % source_node)
7402
      demoted_node = target_node
7403

    
7404
    if instance.disk_template in constants.DTS_INT_MIRROR:
7405
      self._EnsureSecondary(demoted_node)
7406
      try:
7407
        self._WaitUntilSync()
7408
      except errors.OpExecError:
7409
        # we ignore here errors, since if the device is standalone, it
7410
        # won't be able to sync
7411
        pass
7412
      self._GoStandalone()
7413
      self._GoReconnect(False)
7414
      self._WaitUntilSync()
7415

    
7416
    self.feedback_fn("* done")
7417

    
7418
  def _RevertDiskStatus(self):
7419
    """Try to revert the disk status after a failed migration.
7420

7421
    """
7422
    target_node = self.target_node
7423
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7424
      return
7425

    
7426
    try:
7427
      self._EnsureSecondary(target_node)
7428
      self._GoStandalone()
7429
      self._GoReconnect(False)
7430
      self._WaitUntilSync()
7431
    except errors.OpExecError, err:
7432
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7433
                         " please try to recover the instance manually;"
7434
                         " error '%s'" % str(err))
7435

    
7436
  def _AbortMigration(self):
7437
    """Call the hypervisor code to abort a started migration.
7438

7439
    """
7440
    instance = self.instance
7441
    target_node = self.target_node
7442
    source_node = self.source_node
7443
    migration_info = self.migration_info
7444

    
7445
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7446
                                                                 instance,
7447
                                                                 migration_info,
7448
                                                                 False)
7449
    abort_msg = abort_result.fail_msg
7450
    if abort_msg:
7451
      logging.error("Aborting migration failed on target node %s: %s",
7452
                    target_node, abort_msg)
7453
      # Don't raise an exception here, as we stil have to try to revert the
7454
      # disk status, even if this step failed.
7455

    
7456
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7457
        instance, False, self.live)
7458
    abort_msg = abort_result.fail_msg
7459
    if abort_msg:
7460
      logging.error("Aborting migration failed on source node %s: %s",
7461
                    source_node, abort_msg)
7462

    
7463
  def _ExecMigration(self):
7464
    """Migrate an instance.
7465

7466
    The migrate is done by:
7467
      - change the disks into dual-master mode
7468
      - wait until disks are fully synchronized again
7469
      - migrate the instance
7470
      - change disks on the new secondary node (the old primary) to secondary
7471
      - wait until disks are fully synchronized
7472
      - change disks into single-master mode
7473

7474
    """
7475
    instance = self.instance
7476
    target_node = self.target_node
7477
    source_node = self.source_node
7478

    
7479
    # Check for hypervisor version mismatch and warn the user.
7480
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7481
                                       None, self.instance.hypervisor)
7482
    src_info = nodeinfo[source_node]
7483
    dst_info = nodeinfo[target_node]
7484

    
7485
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7486
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7487
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7488
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7489
      if src_version != dst_version:
7490
        self.feedback_fn("* warning: hypervisor version mismatch between"
7491
                         " source (%s) and target (%s) node" %
7492
                         (src_version, dst_version))
7493

    
7494
    self.feedback_fn("* checking disk consistency between source and target")
7495
    for dev in instance.disks:
7496
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7497
        raise errors.OpExecError("Disk %s is degraded or not fully"
7498
                                 " synchronized on target node,"
7499
                                 " aborting migration" % dev.iv_name)
7500

    
7501
    # First get the migration information from the remote node
7502
    result = self.rpc.call_migration_info(source_node, instance)
7503
    msg = result.fail_msg
7504
    if msg:
7505
      log_err = ("Failed fetching source migration information from %s: %s" %
7506
                 (source_node, msg))
7507
      logging.error(log_err)
7508
      raise errors.OpExecError(log_err)
7509

    
7510
    self.migration_info = migration_info = result.payload
7511

    
7512
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7513
      # Then switch the disks to master/master mode
7514
      self._EnsureSecondary(target_node)
7515
      self._GoStandalone()
7516
      self._GoReconnect(True)
7517
      self._WaitUntilSync()
7518

    
7519
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7520
    result = self.rpc.call_accept_instance(target_node,
7521
                                           instance,
7522
                                           migration_info,
7523
                                           self.nodes_ip[target_node])
7524

    
7525
    msg = result.fail_msg
7526
    if msg:
7527
      logging.error("Instance pre-migration failed, trying to revert"
7528
                    " disk status: %s", msg)
7529
      self.feedback_fn("Pre-migration failed, aborting")
7530
      self._AbortMigration()
7531
      self._RevertDiskStatus()
7532
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7533
                               (instance.name, msg))
7534

    
7535
    self.feedback_fn("* migrating instance to %s" % target_node)
7536
    result = self.rpc.call_instance_migrate(source_node, instance,
7537
                                            self.nodes_ip[target_node],
7538
                                            self.live)
7539
    msg = result.fail_msg
7540
    if msg:
7541
      logging.error("Instance migration failed, trying to revert"
7542
                    " disk status: %s", msg)
7543
      self.feedback_fn("Migration failed, aborting")
7544
      self._AbortMigration()
7545
      self._RevertDiskStatus()
7546
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7547
                               (instance.name, msg))
7548

    
7549
    self.feedback_fn("* starting memory transfer")
7550
    last_feedback = time.time()
7551
    while True:
7552
      result = self.rpc.call_instance_get_migration_status(source_node,
7553
                                                           instance)
7554
      msg = result.fail_msg
7555
      ms = result.payload   # MigrationStatus instance
7556
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7557
        logging.error("Instance migration failed, trying to revert"
7558
                      " disk status: %s", msg)
7559
        self.feedback_fn("Migration failed, aborting")
7560
        self._AbortMigration()
7561
        self._RevertDiskStatus()
7562
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7563
                                 (instance.name, msg))
7564

    
7565
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7566
        self.feedback_fn("* memory transfer complete")
7567
        break
7568

    
7569
      if (utils.TimeoutExpired(last_feedback,
7570
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7571
          ms.transferred_ram is not None):
7572
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7573
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7574
        last_feedback = time.time()
7575

    
7576
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7577

    
7578
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7579
                                                           instance,
7580
                                                           True,
7581
                                                           self.live)
7582
    msg = result.fail_msg
7583
    if msg:
7584
      logging.error("Instance migration succeeded, but finalization failed"
7585
                    " on the source node: %s", msg)
7586
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7587
                               msg)
7588

    
7589
    instance.primary_node = target_node
7590

    
7591
    # distribute new instance config to the other nodes
7592
    self.cfg.Update(instance, self.feedback_fn)
7593

    
7594
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7595
                                                           instance,
7596
                                                           migration_info,
7597
                                                           True)
7598
    msg = result.fail_msg
7599
    if msg:
7600
      logging.error("Instance migration succeeded, but finalization failed"
7601
                    " on the target node: %s", msg)
7602
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7603
                               msg)
7604

    
7605
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7606
      self._EnsureSecondary(source_node)
7607
      self._WaitUntilSync()
7608
      self._GoStandalone()
7609
      self._GoReconnect(False)
7610
      self._WaitUntilSync()
7611

    
7612
    self.feedback_fn("* done")
7613

    
7614
  def _ExecFailover(self):
7615
    """Failover an instance.
7616

7617
    The failover is done by shutting it down on its present node and
7618
    starting it on the secondary.
7619

7620
    """
7621
    instance = self.instance
7622
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7623

    
7624
    source_node = instance.primary_node
7625
    target_node = self.target_node
7626

    
7627
    if instance.admin_up:
7628
      self.feedback_fn("* checking disk consistency between source and target")
7629
      for dev in instance.disks:
7630
        # for drbd, these are drbd over lvm
7631
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7632
          if primary_node.offline:
7633
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7634
                             " target node %s" %
7635
                             (primary_node.name, dev.iv_name, target_node))
7636
          elif not self.ignore_consistency:
7637
            raise errors.OpExecError("Disk %s is degraded on target node,"
7638
                                     " aborting failover" % dev.iv_name)
7639
    else:
7640
      self.feedback_fn("* not checking disk consistency as instance is not"
7641
                       " running")
7642

    
7643
    self.feedback_fn("* shutting down instance on source node")
7644
    logging.info("Shutting down instance %s on node %s",
7645
                 instance.name, source_node)
7646

    
7647
    result = self.rpc.call_instance_shutdown(source_node, instance,
7648
                                             self.shutdown_timeout)
7649
    msg = result.fail_msg
7650
    if msg:
7651
      if self.ignore_consistency or primary_node.offline:
7652
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7653
                           " proceeding anyway; please make sure node"
7654
                           " %s is down; error details: %s",
7655
                           instance.name, source_node, source_node, msg)
7656
      else:
7657
        raise errors.OpExecError("Could not shutdown instance %s on"
7658
                                 " node %s: %s" %
7659
                                 (instance.name, source_node, msg))
7660

    
7661
    self.feedback_fn("* deactivating the instance's disks on source node")
7662
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7663
      raise errors.OpExecError("Can't shut down the instance's disks")
7664

    
7665
    instance.primary_node = target_node
7666
    # distribute new instance config to the other nodes
7667
    self.cfg.Update(instance, self.feedback_fn)
7668

    
7669
    # Only start the instance if it's marked as up
7670
    if instance.admin_up:
7671
      self.feedback_fn("* activating the instance's disks on target node %s" %
7672
                       target_node)
7673
      logging.info("Starting instance %s on node %s",
7674
                   instance.name, target_node)
7675

    
7676
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7677
                                           ignore_secondaries=True)
7678
      if not disks_ok:
7679
        _ShutdownInstanceDisks(self.lu, instance)
7680
        raise errors.OpExecError("Can't activate the instance's disks")
7681

    
7682
      self.feedback_fn("* starting the instance on the target node %s" %
7683
                       target_node)
7684
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7685
                                            False)
7686
      msg = result.fail_msg
7687
      if msg:
7688
        _ShutdownInstanceDisks(self.lu, instance)
7689
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7690
                                 (instance.name, target_node, msg))
7691

    
7692
  def Exec(self, feedback_fn):
7693
    """Perform the migration.
7694

7695
    """
7696
    self.feedback_fn = feedback_fn
7697
    self.source_node = self.instance.primary_node
7698

    
7699
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7700
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7701
      self.target_node = self.instance.secondary_nodes[0]
7702
      # Otherwise self.target_node has been populated either
7703
      # directly, or through an iallocator.
7704

    
7705
    self.all_nodes = [self.source_node, self.target_node]
7706
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7707
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7708

    
7709
    if self.failover:
7710
      feedback_fn("Failover instance %s" % self.instance.name)
7711
      self._ExecFailover()
7712
    else:
7713
      feedback_fn("Migrating instance %s" % self.instance.name)
7714

    
7715
      if self.cleanup:
7716
        return self._ExecCleanup()
7717
      else:
7718
        return self._ExecMigration()
7719

    
7720

    
7721
def _CreateBlockDev(lu, node, instance, device, force_create,
7722
                    info, force_open):
7723
  """Create a tree of block devices on a given node.
7724

7725
  If this device type has to be created on secondaries, create it and
7726
  all its children.
7727

7728
  If not, just recurse to children keeping the same 'force' value.
7729

7730
  @param lu: the lu on whose behalf we execute
7731
  @param node: the node on which to create the device
7732
  @type instance: L{objects.Instance}
7733
  @param instance: the instance which owns the device
7734
  @type device: L{objects.Disk}
7735
  @param device: the device to create
7736
  @type force_create: boolean
7737
  @param force_create: whether to force creation of this device; this
7738
      will be change to True whenever we find a device which has
7739
      CreateOnSecondary() attribute
7740
  @param info: the extra 'metadata' we should attach to the device
7741
      (this will be represented as a LVM tag)
7742
  @type force_open: boolean
7743
  @param force_open: this parameter will be passes to the
7744
      L{backend.BlockdevCreate} function where it specifies
7745
      whether we run on primary or not, and it affects both
7746
      the child assembly and the device own Open() execution
7747

7748
  """
7749
  if device.CreateOnSecondary():
7750
    force_create = True
7751

    
7752
  if device.children:
7753
    for child in device.children:
7754
      _CreateBlockDev(lu, node, instance, child, force_create,
7755
                      info, force_open)
7756

    
7757
  if not force_create:
7758
    return
7759

    
7760
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7761

    
7762

    
7763
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7764
  """Create a single block device on a given node.
7765

7766
  This will not recurse over children of the device, so they must be
7767
  created in advance.
7768

7769
  @param lu: the lu on whose behalf we execute
7770
  @param node: the node on which to create the device
7771
  @type instance: L{objects.Instance}
7772
  @param instance: the instance which owns the device
7773
  @type device: L{objects.Disk}
7774
  @param device: the device to create
7775
  @param info: the extra 'metadata' we should attach to the device
7776
      (this will be represented as a LVM tag)
7777
  @type force_open: boolean
7778
  @param force_open: this parameter will be passes to the
7779
      L{backend.BlockdevCreate} function where it specifies
7780
      whether we run on primary or not, and it affects both
7781
      the child assembly and the device own Open() execution
7782

7783
  """
7784
  lu.cfg.SetDiskID(device, node)
7785
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7786
                                       instance.name, force_open, info)
7787
  result.Raise("Can't create block device %s on"
7788
               " node %s for instance %s" % (device, node, instance.name))
7789
  if device.physical_id is None:
7790
    device.physical_id = result.payload
7791

    
7792

    
7793
def _GenerateUniqueNames(lu, exts):
7794
  """Generate a suitable LV name.
7795

7796
  This will generate a logical volume name for the given instance.
7797

7798
  """
7799
  results = []
7800
  for val in exts:
7801
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7802
    results.append("%s%s" % (new_id, val))
7803
  return results
7804

    
7805

    
7806
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7807
                         iv_name, p_minor, s_minor):
7808
  """Generate a drbd8 device complete with its children.
7809

7810
  """
7811
  assert len(vgnames) == len(names) == 2
7812
  port = lu.cfg.AllocatePort()
7813
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7814
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7815
                          logical_id=(vgnames[0], names[0]))
7816
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7817
                          logical_id=(vgnames[1], names[1]))
7818
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7819
                          logical_id=(primary, secondary, port,
7820
                                      p_minor, s_minor,
7821
                                      shared_secret),
7822
                          children=[dev_data, dev_meta],
7823
                          iv_name=iv_name)
7824
  return drbd_dev
7825

    
7826

    
7827
def _GenerateDiskTemplate(lu, template_name,
7828
                          instance_name, primary_node,
7829
                          secondary_nodes, disk_info,
7830
                          file_storage_dir, file_driver,
7831
                          base_index, feedback_fn):
7832
  """Generate the entire disk layout for a given template type.
7833

7834
  """
7835
  #TODO: compute space requirements
7836

    
7837
  vgname = lu.cfg.GetVGName()
7838
  disk_count = len(disk_info)
7839
  disks = []
7840
  if template_name == constants.DT_DISKLESS:
7841
    pass
7842
  elif template_name == constants.DT_PLAIN:
7843
    if len(secondary_nodes) != 0:
7844
      raise errors.ProgrammerError("Wrong template configuration")
7845

    
7846
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7847
                                      for i in range(disk_count)])
7848
    for idx, disk in enumerate(disk_info):
7849
      disk_index = idx + base_index
7850
      vg = disk.get(constants.IDISK_VG, vgname)
7851
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7852
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7853
                              size=disk[constants.IDISK_SIZE],
7854
                              logical_id=(vg, names[idx]),
7855
                              iv_name="disk/%d" % disk_index,
7856
                              mode=disk[constants.IDISK_MODE])
7857
      disks.append(disk_dev)
7858
  elif template_name == constants.DT_DRBD8:
7859
    if len(secondary_nodes) != 1:
7860
      raise errors.ProgrammerError("Wrong template configuration")
7861
    remote_node = secondary_nodes[0]
7862
    minors = lu.cfg.AllocateDRBDMinor(
7863
      [primary_node, remote_node] * len(disk_info), instance_name)
7864

    
7865
    names = []
7866
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7867
                                               for i in range(disk_count)]):
7868
      names.append(lv_prefix + "_data")
7869
      names.append(lv_prefix + "_meta")
7870
    for idx, disk in enumerate(disk_info):
7871
      disk_index = idx + base_index
7872
      data_vg = disk.get(constants.IDISK_VG, vgname)
7873
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7874
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7875
                                      disk[constants.IDISK_SIZE],
7876
                                      [data_vg, meta_vg],
7877
                                      names[idx * 2:idx * 2 + 2],
7878
                                      "disk/%d" % disk_index,
7879
                                      minors[idx * 2], minors[idx * 2 + 1])
7880
      disk_dev.mode = disk[constants.IDISK_MODE]
7881
      disks.append(disk_dev)
7882
  elif template_name == constants.DT_FILE:
7883
    if len(secondary_nodes) != 0:
7884
      raise errors.ProgrammerError("Wrong template configuration")
7885

    
7886
    opcodes.RequireFileStorage()
7887

    
7888
    for idx, disk in enumerate(disk_info):
7889
      disk_index = idx + base_index
7890
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7891
                              size=disk[constants.IDISK_SIZE],
7892
                              iv_name="disk/%d" % disk_index,
7893
                              logical_id=(file_driver,
7894
                                          "%s/disk%d" % (file_storage_dir,
7895
                                                         disk_index)),
7896
                              mode=disk[constants.IDISK_MODE])
7897
      disks.append(disk_dev)
7898
  elif template_name == constants.DT_SHARED_FILE:
7899
    if len(secondary_nodes) != 0:
7900
      raise errors.ProgrammerError("Wrong template configuration")
7901

    
7902
    opcodes.RequireSharedFileStorage()
7903

    
7904
    for idx, disk in enumerate(disk_info):
7905
      disk_index = idx + base_index
7906
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7907
                              size=disk[constants.IDISK_SIZE],
7908
                              iv_name="disk/%d" % disk_index,
7909
                              logical_id=(file_driver,
7910
                                          "%s/disk%d" % (file_storage_dir,
7911
                                                         disk_index)),
7912
                              mode=disk[constants.IDISK_MODE])
7913
      disks.append(disk_dev)
7914
  elif template_name == constants.DT_BLOCK:
7915
    if len(secondary_nodes) != 0:
7916
      raise errors.ProgrammerError("Wrong template configuration")
7917

    
7918
    for idx, disk in enumerate(disk_info):
7919
      disk_index = idx + base_index
7920
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7921
                              size=disk[constants.IDISK_SIZE],
7922
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7923
                                          disk[constants.IDISK_ADOPT]),
7924
                              iv_name="disk/%d" % disk_index,
7925
                              mode=disk[constants.IDISK_MODE])
7926
      disks.append(disk_dev)
7927

    
7928
  else:
7929
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7930
  return disks
7931

    
7932

    
7933
def _GetInstanceInfoText(instance):
7934
  """Compute that text that should be added to the disk's metadata.
7935

7936
  """
7937
  return "originstname+%s" % instance.name
7938

    
7939

    
7940
def _CalcEta(time_taken, written, total_size):
7941
  """Calculates the ETA based on size written and total size.
7942

7943
  @param time_taken: The time taken so far
7944
  @param written: amount written so far
7945
  @param total_size: The total size of data to be written
7946
  @return: The remaining time in seconds
7947

7948
  """
7949
  avg_time = time_taken / float(written)
7950
  return (total_size - written) * avg_time
7951

    
7952

    
7953
def _WipeDisks(lu, instance):
7954
  """Wipes instance disks.
7955

7956
  @type lu: L{LogicalUnit}
7957
  @param lu: the logical unit on whose behalf we execute
7958
  @type instance: L{objects.Instance}
7959
  @param instance: the instance whose disks we should create
7960
  @return: the success of the wipe
7961

7962
  """
7963
  node = instance.primary_node
7964

    
7965
  for device in instance.disks:
7966
    lu.cfg.SetDiskID(device, node)
7967

    
7968
  logging.info("Pause sync of instance %s disks", instance.name)
7969
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7970

    
7971
  for idx, success in enumerate(result.payload):
7972
    if not success:
7973
      logging.warn("pause-sync of instance %s for disks %d failed",
7974
                   instance.name, idx)
7975

    
7976
  try:
7977
    for idx, device in enumerate(instance.disks):
7978
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7979
      # MAX_WIPE_CHUNK at max
7980
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7981
                            constants.MIN_WIPE_CHUNK_PERCENT)
7982
      # we _must_ make this an int, otherwise rounding errors will
7983
      # occur
7984
      wipe_chunk_size = int(wipe_chunk_size)
7985

    
7986
      lu.LogInfo("* Wiping disk %d", idx)
7987
      logging.info("Wiping disk %d for instance %s, node %s using"
7988
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7989

    
7990
      offset = 0
7991
      size = device.size
7992
      last_output = 0
7993
      start_time = time.time()
7994

    
7995
      while offset < size:
7996
        wipe_size = min(wipe_chunk_size, size - offset)
7997
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7998
                      idx, offset, wipe_size)
7999
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8000
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8001
                     (idx, offset, wipe_size))
8002
        now = time.time()
8003
        offset += wipe_size
8004
        if now - last_output >= 60:
8005
          eta = _CalcEta(now - start_time, offset, size)
8006
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8007
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8008
          last_output = now
8009
  finally:
8010
    logging.info("Resume sync of instance %s disks", instance.name)
8011

    
8012
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8013

    
8014
    for idx, success in enumerate(result.payload):
8015
      if not success:
8016
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8017
                      " look at the status and troubleshoot the issue", idx)
8018
        logging.warn("resume-sync of instance %s for disks %d failed",
8019
                     instance.name, idx)
8020

    
8021

    
8022
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8023
  """Create all disks for an instance.
8024

8025
  This abstracts away some work from AddInstance.
8026

8027
  @type lu: L{LogicalUnit}
8028
  @param lu: the logical unit on whose behalf we execute
8029
  @type instance: L{objects.Instance}
8030
  @param instance: the instance whose disks we should create
8031
  @type to_skip: list
8032
  @param to_skip: list of indices to skip
8033
  @type target_node: string
8034
  @param target_node: if passed, overrides the target node for creation
8035
  @rtype: boolean
8036
  @return: the success of the creation
8037

8038
  """
8039
  info = _GetInstanceInfoText(instance)
8040
  if target_node is None:
8041
    pnode = instance.primary_node
8042
    all_nodes = instance.all_nodes
8043
  else:
8044
    pnode = target_node
8045
    all_nodes = [pnode]
8046

    
8047
  if instance.disk_template in constants.DTS_FILEBASED:
8048
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8049
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8050

    
8051
    result.Raise("Failed to create directory '%s' on"
8052
                 " node %s" % (file_storage_dir, pnode))
8053

    
8054
  # Note: this needs to be kept in sync with adding of disks in
8055
  # LUInstanceSetParams
8056
  for idx, device in enumerate(instance.disks):
8057
    if to_skip and idx in to_skip:
8058
      continue
8059
    logging.info("Creating volume %s for instance %s",
8060
                 device.iv_name, instance.name)
8061
    #HARDCODE
8062
    for node in all_nodes:
8063
      f_create = node == pnode
8064
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8065

    
8066

    
8067
def _RemoveDisks(lu, instance, target_node=None):
8068
  """Remove all disks for an instance.
8069

8070
  This abstracts away some work from `AddInstance()` and
8071
  `RemoveInstance()`. Note that in case some of the devices couldn't
8072
  be removed, the removal will continue with the other ones (compare
8073
  with `_CreateDisks()`).
8074

8075
  @type lu: L{LogicalUnit}
8076
  @param lu: the logical unit on whose behalf we execute
8077
  @type instance: L{objects.Instance}
8078
  @param instance: the instance whose disks we should remove
8079
  @type target_node: string
8080
  @param target_node: used to override the node on which to remove the disks
8081
  @rtype: boolean
8082
  @return: the success of the removal
8083

8084
  """
8085
  logging.info("Removing block devices for instance %s", instance.name)
8086

    
8087
  all_result = True
8088
  for device in instance.disks:
8089
    if target_node:
8090
      edata = [(target_node, device)]
8091
    else:
8092
      edata = device.ComputeNodeTree(instance.primary_node)
8093
    for node, disk in edata:
8094
      lu.cfg.SetDiskID(disk, node)
8095
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8096
      if msg:
8097
        lu.LogWarning("Could not remove block device %s on node %s,"
8098
                      " continuing anyway: %s", device.iv_name, node, msg)
8099
        all_result = False
8100

    
8101
  if instance.disk_template == constants.DT_FILE:
8102
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8103
    if target_node:
8104
      tgt = target_node
8105
    else:
8106
      tgt = instance.primary_node
8107
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8108
    if result.fail_msg:
8109
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8110
                    file_storage_dir, instance.primary_node, result.fail_msg)
8111
      all_result = False
8112

    
8113
  return all_result
8114

    
8115

    
8116
def _ComputeDiskSizePerVG(disk_template, disks):
8117
  """Compute disk size requirements in the volume group
8118

8119
  """
8120
  def _compute(disks, payload):
8121
    """Universal algorithm.
8122

8123
    """
8124
    vgs = {}
8125
    for disk in disks:
8126
      vgs[disk[constants.IDISK_VG]] = \
8127
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8128

    
8129
    return vgs
8130

    
8131
  # Required free disk space as a function of disk and swap space
8132
  req_size_dict = {
8133
    constants.DT_DISKLESS: {},
8134
    constants.DT_PLAIN: _compute(disks, 0),
8135
    # 128 MB are added for drbd metadata for each disk
8136
    constants.DT_DRBD8: _compute(disks, 128),
8137
    constants.DT_FILE: {},
8138
    constants.DT_SHARED_FILE: {},
8139
  }
8140

    
8141
  if disk_template not in req_size_dict:
8142
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8143
                                 " is unknown" % disk_template)
8144

    
8145
  return req_size_dict[disk_template]
8146

    
8147

    
8148
def _ComputeDiskSize(disk_template, disks):
8149
  """Compute disk size requirements in the volume group
8150

8151
  """
8152
  # Required free disk space as a function of disk and swap space
8153
  req_size_dict = {
8154
    constants.DT_DISKLESS: None,
8155
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8156
    # 128 MB are added for drbd metadata for each disk
8157
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8158
    constants.DT_FILE: None,
8159
    constants.DT_SHARED_FILE: 0,
8160
    constants.DT_BLOCK: 0,
8161
  }
8162

    
8163
  if disk_template not in req_size_dict:
8164
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8165
                                 " is unknown" % disk_template)
8166

    
8167
  return req_size_dict[disk_template]
8168

    
8169

    
8170
def _FilterVmNodes(lu, nodenames):
8171
  """Filters out non-vm_capable nodes from a list.
8172

8173
  @type lu: L{LogicalUnit}
8174
  @param lu: the logical unit for which we check
8175
  @type nodenames: list
8176
  @param nodenames: the list of nodes on which we should check
8177
  @rtype: list
8178
  @return: the list of vm-capable nodes
8179

8180
  """
8181
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8182
  return [name for name in nodenames if name not in vm_nodes]
8183

    
8184

    
8185
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8186
  """Hypervisor parameter validation.
8187

8188
  This function abstract the hypervisor parameter validation to be
8189
  used in both instance create and instance modify.
8190

8191
  @type lu: L{LogicalUnit}
8192
  @param lu: the logical unit for which we check
8193
  @type nodenames: list
8194
  @param nodenames: the list of nodes on which we should check
8195
  @type hvname: string
8196
  @param hvname: the name of the hypervisor we should use
8197
  @type hvparams: dict
8198
  @param hvparams: the parameters which we need to check
8199
  @raise errors.OpPrereqError: if the parameters are not valid
8200

8201
  """
8202
  nodenames = _FilterVmNodes(lu, nodenames)
8203
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8204
                                                  hvname,
8205
                                                  hvparams)
8206
  for node in nodenames:
8207
    info = hvinfo[node]
8208
    if info.offline:
8209
      continue
8210
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8211

    
8212

    
8213
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8214
  """OS parameters validation.
8215

8216
  @type lu: L{LogicalUnit}
8217
  @param lu: the logical unit for which we check
8218
  @type required: boolean
8219
  @param required: whether the validation should fail if the OS is not
8220
      found
8221
  @type nodenames: list
8222
  @param nodenames: the list of nodes on which we should check
8223
  @type osname: string
8224
  @param osname: the name of the hypervisor we should use
8225
  @type osparams: dict
8226
  @param osparams: the parameters which we need to check
8227
  @raise errors.OpPrereqError: if the parameters are not valid
8228

8229
  """
8230
  nodenames = _FilterVmNodes(lu, nodenames)
8231
  result = lu.rpc.call_os_validate(required, nodenames, osname,
8232
                                   [constants.OS_VALIDATE_PARAMETERS],
8233
                                   osparams)
8234
  for node, nres in result.items():
8235
    # we don't check for offline cases since this should be run only
8236
    # against the master node and/or an instance's nodes
8237
    nres.Raise("OS Parameters validation failed on node %s" % node)
8238
    if not nres.payload:
8239
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8240
                 osname, node)
8241

    
8242

    
8243
class LUInstanceCreate(LogicalUnit):
8244
  """Create an instance.
8245

8246
  """
8247
  HPATH = "instance-add"
8248
  HTYPE = constants.HTYPE_INSTANCE
8249
  REQ_BGL = False
8250

    
8251
  def CheckArguments(self):
8252
    """Check arguments.
8253

8254
    """
8255
    # do not require name_check to ease forward/backward compatibility
8256
    # for tools
8257
    if self.op.no_install and self.op.start:
8258
      self.LogInfo("No-installation mode selected, disabling startup")
8259
      self.op.start = False
8260
    # validate/normalize the instance name
8261
    self.op.instance_name = \
8262
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8263

    
8264
    if self.op.ip_check and not self.op.name_check:
8265
      # TODO: make the ip check more flexible and not depend on the name check
8266
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8267
                                 " check", errors.ECODE_INVAL)
8268

    
8269
    # check nics' parameter names
8270
    for nic in self.op.nics:
8271
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8272

    
8273
    # check disks. parameter names and consistent adopt/no-adopt strategy
8274
    has_adopt = has_no_adopt = False
8275
    for disk in self.op.disks:
8276
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8277
      if constants.IDISK_ADOPT in disk:
8278
        has_adopt = True
8279
      else:
8280
        has_no_adopt = True
8281
    if has_adopt and has_no_adopt:
8282
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8283
                                 errors.ECODE_INVAL)
8284
    if has_adopt:
8285
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8286
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8287
                                   " '%s' disk template" %
8288
                                   self.op.disk_template,
8289
                                   errors.ECODE_INVAL)
8290
      if self.op.iallocator is not None:
8291
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8292
                                   " iallocator script", errors.ECODE_INVAL)
8293
      if self.op.mode == constants.INSTANCE_IMPORT:
8294
        raise errors.OpPrereqError("Disk adoption not allowed for"
8295
                                   " instance import", errors.ECODE_INVAL)
8296
    else:
8297
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8298
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8299
                                   " but no 'adopt' parameter given" %
8300
                                   self.op.disk_template,
8301
                                   errors.ECODE_INVAL)
8302

    
8303
    self.adopt_disks = has_adopt
8304

    
8305
    # instance name verification
8306
    if self.op.name_check:
8307
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8308
      self.op.instance_name = self.hostname1.name
8309
      # used in CheckPrereq for ip ping check
8310
      self.check_ip = self.hostname1.ip
8311
    else:
8312
      self.check_ip = None
8313

    
8314
    # file storage checks
8315
    if (self.op.file_driver and
8316
        not self.op.file_driver in constants.FILE_DRIVER):
8317
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8318
                                 self.op.file_driver, errors.ECODE_INVAL)
8319

    
8320
    if self.op.disk_template == constants.DT_FILE:
8321
      opcodes.RequireFileStorage()
8322
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8323
      opcodes.RequireSharedFileStorage()
8324

    
8325
    ### Node/iallocator related checks
8326
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8327

    
8328
    if self.op.pnode is not None:
8329
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8330
        if self.op.snode is None:
8331
          raise errors.OpPrereqError("The networked disk templates need"
8332
                                     " a mirror node", errors.ECODE_INVAL)
8333
      elif self.op.snode:
8334
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8335
                        " template")
8336
        self.op.snode = None
8337

    
8338
    self._cds = _GetClusterDomainSecret()
8339

    
8340
    if self.op.mode == constants.INSTANCE_IMPORT:
8341
      # On import force_variant must be True, because if we forced it at
8342
      # initial install, our only chance when importing it back is that it
8343
      # works again!
8344
      self.op.force_variant = True
8345

    
8346
      if self.op.no_install:
8347
        self.LogInfo("No-installation mode has no effect during import")
8348

    
8349
    elif self.op.mode == constants.INSTANCE_CREATE:
8350
      if self.op.os_type is None:
8351
        raise errors.OpPrereqError("No guest OS specified",
8352
                                   errors.ECODE_INVAL)
8353
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8354
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8355
                                   " installation" % self.op.os_type,
8356
                                   errors.ECODE_STATE)
8357
      if self.op.disk_template is None:
8358
        raise errors.OpPrereqError("No disk template specified",
8359
                                   errors.ECODE_INVAL)
8360

    
8361
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8362
      # Check handshake to ensure both clusters have the same domain secret
8363
      src_handshake = self.op.source_handshake
8364
      if not src_handshake:
8365
        raise errors.OpPrereqError("Missing source handshake",
8366
                                   errors.ECODE_INVAL)
8367

    
8368
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8369
                                                           src_handshake)
8370
      if errmsg:
8371
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8372
                                   errors.ECODE_INVAL)
8373

    
8374
      # Load and check source CA
8375
      self.source_x509_ca_pem = self.op.source_x509_ca
8376
      if not self.source_x509_ca_pem:
8377
        raise errors.OpPrereqError("Missing source X509 CA",
8378
                                   errors.ECODE_INVAL)
8379

    
8380
      try:
8381
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8382
                                                    self._cds)
8383
      except OpenSSL.crypto.Error, err:
8384
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8385
                                   (err, ), errors.ECODE_INVAL)
8386

    
8387
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8388
      if errcode is not None:
8389
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8390
                                   errors.ECODE_INVAL)
8391

    
8392
      self.source_x509_ca = cert
8393

    
8394
      src_instance_name = self.op.source_instance_name
8395
      if not src_instance_name:
8396
        raise errors.OpPrereqError("Missing source instance name",
8397
                                   errors.ECODE_INVAL)
8398

    
8399
      self.source_instance_name = \
8400
          netutils.GetHostname(name=src_instance_name).name
8401

    
8402
    else:
8403
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8404
                                 self.op.mode, errors.ECODE_INVAL)
8405

    
8406
  def ExpandNames(self):
8407
    """ExpandNames for CreateInstance.
8408

8409
    Figure out the right locks for instance creation.
8410

8411
    """
8412
    self.needed_locks = {}
8413

    
8414
    instance_name = self.op.instance_name
8415
    # this is just a preventive check, but someone might still add this
8416
    # instance in the meantime, and creation will fail at lock-add time
8417
    if instance_name in self.cfg.GetInstanceList():
8418
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8419
                                 instance_name, errors.ECODE_EXISTS)
8420

    
8421
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8422

    
8423
    if self.op.iallocator:
8424
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8425
    else:
8426
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8427
      nodelist = [self.op.pnode]
8428
      if self.op.snode is not None:
8429
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8430
        nodelist.append(self.op.snode)
8431
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8432

    
8433
    # in case of import lock the source node too
8434
    if self.op.mode == constants.INSTANCE_IMPORT:
8435
      src_node = self.op.src_node
8436
      src_path = self.op.src_path
8437

    
8438
      if src_path is None:
8439
        self.op.src_path = src_path = self.op.instance_name
8440

    
8441
      if src_node is None:
8442
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8443
        self.op.src_node = None
8444
        if os.path.isabs(src_path):
8445
          raise errors.OpPrereqError("Importing an instance from a path"
8446
                                     " requires a source node option",
8447
                                     errors.ECODE_INVAL)
8448
      else:
8449
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8450
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8451
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8452
        if not os.path.isabs(src_path):
8453
          self.op.src_path = src_path = \
8454
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8455

    
8456
  def _RunAllocator(self):
8457
    """Run the allocator based on input opcode.
8458

8459
    """
8460
    nics = [n.ToDict() for n in self.nics]
8461
    ial = IAllocator(self.cfg, self.rpc,
8462
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8463
                     name=self.op.instance_name,
8464
                     disk_template=self.op.disk_template,
8465
                     tags=self.op.tags,
8466
                     os=self.op.os_type,
8467
                     vcpus=self.be_full[constants.BE_VCPUS],
8468
                     memory=self.be_full[constants.BE_MEMORY],
8469
                     disks=self.disks,
8470
                     nics=nics,
8471
                     hypervisor=self.op.hypervisor,
8472
                     )
8473

    
8474
    ial.Run(self.op.iallocator)
8475

    
8476
    if not ial.success:
8477
      raise errors.OpPrereqError("Can't compute nodes using"
8478
                                 " iallocator '%s': %s" %
8479
                                 (self.op.iallocator, ial.info),
8480
                                 errors.ECODE_NORES)
8481
    if len(ial.result) != ial.required_nodes:
8482
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8483
                                 " of nodes (%s), required %s" %
8484
                                 (self.op.iallocator, len(ial.result),
8485
                                  ial.required_nodes), errors.ECODE_FAULT)
8486
    self.op.pnode = ial.result[0]
8487
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8488
                 self.op.instance_name, self.op.iallocator,
8489
                 utils.CommaJoin(ial.result))
8490
    if ial.required_nodes == 2:
8491
      self.op.snode = ial.result[1]
8492

    
8493
  def BuildHooksEnv(self):
8494
    """Build hooks env.
8495

8496
    This runs on master, primary and secondary nodes of the instance.
8497

8498
    """
8499
    env = {
8500
      "ADD_MODE": self.op.mode,
8501
      }
8502
    if self.op.mode == constants.INSTANCE_IMPORT:
8503
      env["SRC_NODE"] = self.op.src_node
8504
      env["SRC_PATH"] = self.op.src_path
8505
      env["SRC_IMAGES"] = self.src_images
8506

    
8507
    env.update(_BuildInstanceHookEnv(
8508
      name=self.op.instance_name,
8509
      primary_node=self.op.pnode,
8510
      secondary_nodes=self.secondaries,
8511
      status=self.op.start,
8512
      os_type=self.op.os_type,
8513
      memory=self.be_full[constants.BE_MEMORY],
8514
      vcpus=self.be_full[constants.BE_VCPUS],
8515
      nics=_NICListToTuple(self, self.nics),
8516
      disk_template=self.op.disk_template,
8517
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8518
             for d in self.disks],
8519
      bep=self.be_full,
8520
      hvp=self.hv_full,
8521
      hypervisor_name=self.op.hypervisor,
8522
      tags=self.op.tags,
8523
    ))
8524

    
8525
    return env
8526

    
8527
  def BuildHooksNodes(self):
8528
    """Build hooks nodes.
8529

8530
    """
8531
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8532
    return nl, nl
8533

    
8534
  def _ReadExportInfo(self):
8535
    """Reads the export information from disk.
8536

8537
    It will override the opcode source node and path with the actual
8538
    information, if these two were not specified before.
8539

8540
    @return: the export information
8541

8542
    """
8543
    assert self.op.mode == constants.INSTANCE_IMPORT
8544

    
8545
    src_node = self.op.src_node
8546
    src_path = self.op.src_path
8547

    
8548
    if src_node is None:
8549
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8550
      exp_list = self.rpc.call_export_list(locked_nodes)
8551
      found = False
8552
      for node in exp_list:
8553
        if exp_list[node].fail_msg:
8554
          continue
8555
        if src_path in exp_list[node].payload:
8556
          found = True
8557
          self.op.src_node = src_node = node
8558
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8559
                                                       src_path)
8560
          break
8561
      if not found:
8562
        raise errors.OpPrereqError("No export found for relative path %s" %
8563
                                    src_path, errors.ECODE_INVAL)
8564

    
8565
    _CheckNodeOnline(self, src_node)
8566
    result = self.rpc.call_export_info(src_node, src_path)
8567
    result.Raise("No export or invalid export found in dir %s" % src_path)
8568

    
8569
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8570
    if not export_info.has_section(constants.INISECT_EXP):
8571
      raise errors.ProgrammerError("Corrupted export config",
8572
                                   errors.ECODE_ENVIRON)
8573

    
8574
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8575
    if (int(ei_version) != constants.EXPORT_VERSION):
8576
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8577
                                 (ei_version, constants.EXPORT_VERSION),
8578
                                 errors.ECODE_ENVIRON)
8579
    return export_info
8580

    
8581
  def _ReadExportParams(self, einfo):
8582
    """Use export parameters as defaults.
8583

8584
    In case the opcode doesn't specify (as in override) some instance
8585
    parameters, then try to use them from the export information, if
8586
    that declares them.
8587

8588
    """
8589
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8590

    
8591
    if self.op.disk_template is None:
8592
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8593
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8594
                                          "disk_template")
8595
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8596
          raise errors.OpPrereqError("Disk template specified in configuration"
8597
                                     " file is not one of the allowed values:"
8598
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8599
      else:
8600
        raise errors.OpPrereqError("No disk template specified and the export"
8601
                                   " is missing the disk_template information",
8602
                                   errors.ECODE_INVAL)
8603

    
8604
    if not self.op.disks:
8605
      disks = []
8606
      # TODO: import the disk iv_name too
8607
      for idx in range(constants.MAX_DISKS):
8608
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8609
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8610
          disks.append({constants.IDISK_SIZE: disk_sz})
8611
      self.op.disks = disks
8612
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8613
        raise errors.OpPrereqError("No disk info specified and the export"
8614
                                   " is missing the disk information",
8615
                                   errors.ECODE_INVAL)
8616

    
8617
    if not self.op.nics:
8618
      nics = []
8619
      for idx in range(constants.MAX_NICS):
8620
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8621
          ndict = {}
8622
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8623
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8624
            ndict[name] = v
8625
          nics.append(ndict)
8626
        else:
8627
          break
8628
      self.op.nics = nics
8629

    
8630
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8631
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8632

    
8633
    if (self.op.hypervisor is None and
8634
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8635
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8636

    
8637
    if einfo.has_section(constants.INISECT_HYP):
8638
      # use the export parameters but do not override the ones
8639
      # specified by the user
8640
      for name, value in einfo.items(constants.INISECT_HYP):
8641
        if name not in self.op.hvparams:
8642
          self.op.hvparams[name] = value
8643

    
8644
    if einfo.has_section(constants.INISECT_BEP):
8645
      # use the parameters, without overriding
8646
      for name, value in einfo.items(constants.INISECT_BEP):
8647
        if name not in self.op.beparams:
8648
          self.op.beparams[name] = value
8649
    else:
8650
      # try to read the parameters old style, from the main section
8651
      for name in constants.BES_PARAMETERS:
8652
        if (name not in self.op.beparams and
8653
            einfo.has_option(constants.INISECT_INS, name)):
8654
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8655

    
8656
    if einfo.has_section(constants.INISECT_OSP):
8657
      # use the parameters, without overriding
8658
      for name, value in einfo.items(constants.INISECT_OSP):
8659
        if name not in self.op.osparams:
8660
          self.op.osparams[name] = value
8661

    
8662
  def _RevertToDefaults(self, cluster):
8663
    """Revert the instance parameters to the default values.
8664

8665
    """
8666
    # hvparams
8667
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8668
    for name in self.op.hvparams.keys():
8669
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8670
        del self.op.hvparams[name]
8671
    # beparams
8672
    be_defs = cluster.SimpleFillBE({})
8673
    for name in self.op.beparams.keys():
8674
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8675
        del self.op.beparams[name]
8676
    # nic params
8677
    nic_defs = cluster.SimpleFillNIC({})
8678
    for nic in self.op.nics:
8679
      for name in constants.NICS_PARAMETERS:
8680
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8681
          del nic[name]
8682
    # osparams
8683
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8684
    for name in self.op.osparams.keys():
8685
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8686
        del self.op.osparams[name]
8687

    
8688
  def _CalculateFileStorageDir(self):
8689
    """Calculate final instance file storage dir.
8690

8691
    """
8692
    # file storage dir calculation/check
8693
    self.instance_file_storage_dir = None
8694
    if self.op.disk_template in constants.DTS_FILEBASED:
8695
      # build the full file storage dir path
8696
      joinargs = []
8697

    
8698
      if self.op.disk_template == constants.DT_SHARED_FILE:
8699
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8700
      else:
8701
        get_fsd_fn = self.cfg.GetFileStorageDir
8702

    
8703
      cfg_storagedir = get_fsd_fn()
8704
      if not cfg_storagedir:
8705
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8706
      joinargs.append(cfg_storagedir)
8707

    
8708
      if self.op.file_storage_dir is not None:
8709
        joinargs.append(self.op.file_storage_dir)
8710

    
8711
      joinargs.append(self.op.instance_name)
8712

    
8713
      # pylint: disable=W0142
8714
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8715

    
8716
  def CheckPrereq(self):
8717
    """Check prerequisites.
8718

8719
    """
8720
    self._CalculateFileStorageDir()
8721

    
8722
    if self.op.mode == constants.INSTANCE_IMPORT:
8723
      export_info = self._ReadExportInfo()
8724
      self._ReadExportParams(export_info)
8725

    
8726
    if (not self.cfg.GetVGName() and
8727
        self.op.disk_template not in constants.DTS_NOT_LVM):
8728
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8729
                                 " instances", errors.ECODE_STATE)
8730

    
8731
    if (self.op.hypervisor is None or
8732
        self.op.hypervisor == constants.VALUE_AUTO):
8733
      self.op.hypervisor = self.cfg.GetHypervisorType()
8734

    
8735
    cluster = self.cfg.GetClusterInfo()
8736
    enabled_hvs = cluster.enabled_hypervisors
8737
    if self.op.hypervisor not in enabled_hvs:
8738
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8739
                                 " cluster (%s)" % (self.op.hypervisor,
8740
                                  ",".join(enabled_hvs)),
8741
                                 errors.ECODE_STATE)
8742

    
8743
    # Check tag validity
8744
    for tag in self.op.tags:
8745
      objects.TaggableObject.ValidateTag(tag)
8746

    
8747
    # check hypervisor parameter syntax (locally)
8748
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8749
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8750
                                      self.op.hvparams)
8751
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8752
    hv_type.CheckParameterSyntax(filled_hvp)
8753
    self.hv_full = filled_hvp
8754
    # check that we don't specify global parameters on an instance
8755
    _CheckGlobalHvParams(self.op.hvparams)
8756

    
8757
    # fill and remember the beparams dict
8758
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
8759
    for param, value in self.op.beparams.iteritems():
8760
      if value == constants.VALUE_AUTO:
8761
        self.op.beparams[param] = default_beparams[param]
8762
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8763
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8764

    
8765
    # build os parameters
8766
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8767

    
8768
    # now that hvp/bep are in final format, let's reset to defaults,
8769
    # if told to do so
8770
    if self.op.identify_defaults:
8771
      self._RevertToDefaults(cluster)
8772

    
8773
    # NIC buildup
8774
    self.nics = []
8775
    for idx, nic in enumerate(self.op.nics):
8776
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8777
      nic_mode = nic_mode_req
8778
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8779
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8780

    
8781
      # in routed mode, for the first nic, the default ip is 'auto'
8782
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8783
        default_ip_mode = constants.VALUE_AUTO
8784
      else:
8785
        default_ip_mode = constants.VALUE_NONE
8786

    
8787
      # ip validity checks
8788
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8789
      if ip is None or ip.lower() == constants.VALUE_NONE:
8790
        nic_ip = None
8791
      elif ip.lower() == constants.VALUE_AUTO:
8792
        if not self.op.name_check:
8793
          raise errors.OpPrereqError("IP address set to auto but name checks"
8794
                                     " have been skipped",
8795
                                     errors.ECODE_INVAL)
8796
        nic_ip = self.hostname1.ip
8797
      else:
8798
        if not netutils.IPAddress.IsValid(ip):
8799
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8800
                                     errors.ECODE_INVAL)
8801
        nic_ip = ip
8802

    
8803
      # TODO: check the ip address for uniqueness
8804
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8805
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8806
                                   errors.ECODE_INVAL)
8807

    
8808
      # MAC address verification
8809
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8810
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8811
        mac = utils.NormalizeAndValidateMac(mac)
8812

    
8813
        try:
8814
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8815
        except errors.ReservationError:
8816
          raise errors.OpPrereqError("MAC address %s already in use"
8817
                                     " in cluster" % mac,
8818
                                     errors.ECODE_NOTUNIQUE)
8819

    
8820
      #  Build nic parameters
8821
      link = nic.get(constants.INIC_LINK, None)
8822
      if link == constants.VALUE_AUTO:
8823
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8824
      nicparams = {}
8825
      if nic_mode_req:
8826
        nicparams[constants.NIC_MODE] = nic_mode
8827
      if link:
8828
        nicparams[constants.NIC_LINK] = link
8829

    
8830
      check_params = cluster.SimpleFillNIC(nicparams)
8831
      objects.NIC.CheckParameterSyntax(check_params)
8832
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8833

    
8834
    # disk checks/pre-build
8835
    default_vg = self.cfg.GetVGName()
8836
    self.disks = []
8837
    for disk in self.op.disks:
8838
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8839
      if mode not in constants.DISK_ACCESS_SET:
8840
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8841
                                   mode, errors.ECODE_INVAL)
8842
      size = disk.get(constants.IDISK_SIZE, None)
8843
      if size is None:
8844
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8845
      try:
8846
        size = int(size)
8847
      except (TypeError, ValueError):
8848
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8849
                                   errors.ECODE_INVAL)
8850

    
8851
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8852
      new_disk = {
8853
        constants.IDISK_SIZE: size,
8854
        constants.IDISK_MODE: mode,
8855
        constants.IDISK_VG: data_vg,
8856
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8857
        }
8858
      if constants.IDISK_ADOPT in disk:
8859
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8860
      self.disks.append(new_disk)
8861

    
8862
    if self.op.mode == constants.INSTANCE_IMPORT:
8863
      disk_images = []
8864
      for idx in range(len(self.disks)):
8865
        option = "disk%d_dump" % idx
8866
        if export_info.has_option(constants.INISECT_INS, option):
8867
          # FIXME: are the old os-es, disk sizes, etc. useful?
8868
          export_name = export_info.get(constants.INISECT_INS, option)
8869
          image = utils.PathJoin(self.op.src_path, export_name)
8870
          disk_images.append(image)
8871
        else:
8872
          disk_images.append(False)
8873

    
8874
      self.src_images = disk_images
8875

    
8876
      old_name = export_info.get(constants.INISECT_INS, "name")
8877
      if self.op.instance_name == old_name:
8878
        for idx, nic in enumerate(self.nics):
8879
          if nic.mac == constants.VALUE_AUTO:
8880
            nic_mac_ini = "nic%d_mac" % idx
8881
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8882

    
8883
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8884

    
8885
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8886
    if self.op.ip_check:
8887
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8888
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8889
                                   (self.check_ip, self.op.instance_name),
8890
                                   errors.ECODE_NOTUNIQUE)
8891

    
8892
    #### mac address generation
8893
    # By generating here the mac address both the allocator and the hooks get
8894
    # the real final mac address rather than the 'auto' or 'generate' value.
8895
    # There is a race condition between the generation and the instance object
8896
    # creation, which means that we know the mac is valid now, but we're not
8897
    # sure it will be when we actually add the instance. If things go bad
8898
    # adding the instance will abort because of a duplicate mac, and the
8899
    # creation job will fail.
8900
    for nic in self.nics:
8901
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8902
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8903

    
8904
    #### allocator run
8905

    
8906
    if self.op.iallocator is not None:
8907
      self._RunAllocator()
8908

    
8909
    #### node related checks
8910

    
8911
    # check primary node
8912
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8913
    assert self.pnode is not None, \
8914
      "Cannot retrieve locked node %s" % self.op.pnode
8915
    if pnode.offline:
8916
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8917
                                 pnode.name, errors.ECODE_STATE)
8918
    if pnode.drained:
8919
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8920
                                 pnode.name, errors.ECODE_STATE)
8921
    if not pnode.vm_capable:
8922
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8923
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8924

    
8925
    self.secondaries = []
8926

    
8927
    # mirror node verification
8928
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8929
      if self.op.snode == pnode.name:
8930
        raise errors.OpPrereqError("The secondary node cannot be the"
8931
                                   " primary node", errors.ECODE_INVAL)
8932
      _CheckNodeOnline(self, self.op.snode)
8933
      _CheckNodeNotDrained(self, self.op.snode)
8934
      _CheckNodeVmCapable(self, self.op.snode)
8935
      self.secondaries.append(self.op.snode)
8936

    
8937
    nodenames = [pnode.name] + self.secondaries
8938

    
8939
    if not self.adopt_disks:
8940
      # Check lv size requirements, if not adopting
8941
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8942
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8943

    
8944
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8945
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8946
                                disk[constants.IDISK_ADOPT])
8947
                     for disk in self.disks])
8948
      if len(all_lvs) != len(self.disks):
8949
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8950
                                   errors.ECODE_INVAL)
8951
      for lv_name in all_lvs:
8952
        try:
8953
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8954
          # to ReserveLV uses the same syntax
8955
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8956
        except errors.ReservationError:
8957
          raise errors.OpPrereqError("LV named %s used by another instance" %
8958
                                     lv_name, errors.ECODE_NOTUNIQUE)
8959

    
8960
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8961
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8962

    
8963
      node_lvs = self.rpc.call_lv_list([pnode.name],
8964
                                       vg_names.payload.keys())[pnode.name]
8965
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8966
      node_lvs = node_lvs.payload
8967

    
8968
      delta = all_lvs.difference(node_lvs.keys())
8969
      if delta:
8970
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8971
                                   utils.CommaJoin(delta),
8972
                                   errors.ECODE_INVAL)
8973
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8974
      if online_lvs:
8975
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8976
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8977
                                   errors.ECODE_STATE)
8978
      # update the size of disk based on what is found
8979
      for dsk in self.disks:
8980
        dsk[constants.IDISK_SIZE] = \
8981
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8982
                                        dsk[constants.IDISK_ADOPT])][0]))
8983

    
8984
    elif self.op.disk_template == constants.DT_BLOCK:
8985
      # Normalize and de-duplicate device paths
8986
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8987
                       for disk in self.disks])
8988
      if len(all_disks) != len(self.disks):
8989
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8990
                                   errors.ECODE_INVAL)
8991
      baddisks = [d for d in all_disks
8992
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8993
      if baddisks:
8994
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8995
                                   " cannot be adopted" %
8996
                                   (", ".join(baddisks),
8997
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8998
                                   errors.ECODE_INVAL)
8999

    
9000
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9001
                                            list(all_disks))[pnode.name]
9002
      node_disks.Raise("Cannot get block device information from node %s" %
9003
                       pnode.name)
9004
      node_disks = node_disks.payload
9005
      delta = all_disks.difference(node_disks.keys())
9006
      if delta:
9007
        raise errors.OpPrereqError("Missing block device(s): %s" %
9008
                                   utils.CommaJoin(delta),
9009
                                   errors.ECODE_INVAL)
9010
      for dsk in self.disks:
9011
        dsk[constants.IDISK_SIZE] = \
9012
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9013

    
9014
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9015

    
9016
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9017
    # check OS parameters (remotely)
9018
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9019

    
9020
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9021

    
9022
    # memory check on primary node
9023
    if self.op.start:
9024
      _CheckNodeFreeMemory(self, self.pnode.name,
9025
                           "creating instance %s" % self.op.instance_name,
9026
                           self.be_full[constants.BE_MEMORY],
9027
                           self.op.hypervisor)
9028

    
9029
    self.dry_run_result = list(nodenames)
9030

    
9031
  def Exec(self, feedback_fn):
9032
    """Create and add the instance to the cluster.
9033

9034
    """
9035
    instance = self.op.instance_name
9036
    pnode_name = self.pnode.name
9037

    
9038
    ht_kind = self.op.hypervisor
9039
    if ht_kind in constants.HTS_REQ_PORT:
9040
      network_port = self.cfg.AllocatePort()
9041
    else:
9042
      network_port = None
9043

    
9044
    disks = _GenerateDiskTemplate(self,
9045
                                  self.op.disk_template,
9046
                                  instance, pnode_name,
9047
                                  self.secondaries,
9048
                                  self.disks,
9049
                                  self.instance_file_storage_dir,
9050
                                  self.op.file_driver,
9051
                                  0,
9052
                                  feedback_fn)
9053

    
9054
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9055
                            primary_node=pnode_name,
9056
                            nics=self.nics, disks=disks,
9057
                            disk_template=self.op.disk_template,
9058
                            admin_up=False,
9059
                            network_port=network_port,
9060
                            beparams=self.op.beparams,
9061
                            hvparams=self.op.hvparams,
9062
                            hypervisor=self.op.hypervisor,
9063
                            osparams=self.op.osparams,
9064
                            )
9065

    
9066
    if self.op.tags:
9067
      for tag in self.op.tags:
9068
        iobj.AddTag(tag)
9069

    
9070
    if self.adopt_disks:
9071
      if self.op.disk_template == constants.DT_PLAIN:
9072
        # rename LVs to the newly-generated names; we need to construct
9073
        # 'fake' LV disks with the old data, plus the new unique_id
9074
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9075
        rename_to = []
9076
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9077
          rename_to.append(t_dsk.logical_id)
9078
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9079
          self.cfg.SetDiskID(t_dsk, pnode_name)
9080
        result = self.rpc.call_blockdev_rename(pnode_name,
9081
                                               zip(tmp_disks, rename_to))
9082
        result.Raise("Failed to rename adoped LVs")
9083
    else:
9084
      feedback_fn("* creating instance disks...")
9085
      try:
9086
        _CreateDisks(self, iobj)
9087
      except errors.OpExecError:
9088
        self.LogWarning("Device creation failed, reverting...")
9089
        try:
9090
          _RemoveDisks(self, iobj)
9091
        finally:
9092
          self.cfg.ReleaseDRBDMinors(instance)
9093
          raise
9094

    
9095
    feedback_fn("adding instance %s to cluster config" % instance)
9096

    
9097
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9098

    
9099
    # Declare that we don't want to remove the instance lock anymore, as we've
9100
    # added the instance to the config
9101
    del self.remove_locks[locking.LEVEL_INSTANCE]
9102

    
9103
    if self.op.mode == constants.INSTANCE_IMPORT:
9104
      # Release unused nodes
9105
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9106
    else:
9107
      # Release all nodes
9108
      _ReleaseLocks(self, locking.LEVEL_NODE)
9109

    
9110
    disk_abort = False
9111
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9112
      feedback_fn("* wiping instance disks...")
9113
      try:
9114
        _WipeDisks(self, iobj)
9115
      except errors.OpExecError, err:
9116
        logging.exception("Wiping disks failed")
9117
        self.LogWarning("Wiping instance disks failed (%s)", err)
9118
        disk_abort = True
9119

    
9120
    if disk_abort:
9121
      # Something is already wrong with the disks, don't do anything else
9122
      pass
9123
    elif self.op.wait_for_sync:
9124
      disk_abort = not _WaitForSync(self, iobj)
9125
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9126
      # make sure the disks are not degraded (still sync-ing is ok)
9127
      feedback_fn("* checking mirrors status")
9128
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9129
    else:
9130
      disk_abort = False
9131

    
9132
    if disk_abort:
9133
      _RemoveDisks(self, iobj)
9134
      self.cfg.RemoveInstance(iobj.name)
9135
      # Make sure the instance lock gets removed
9136
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9137
      raise errors.OpExecError("There are some degraded disks for"
9138
                               " this instance")
9139

    
9140
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9141
      if self.op.mode == constants.INSTANCE_CREATE:
9142
        if not self.op.no_install:
9143
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9144
                        not self.op.wait_for_sync)
9145
          if pause_sync:
9146
            feedback_fn("* pausing disk sync to install instance OS")
9147
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9148
                                                              iobj.disks, True)
9149
            for idx, success in enumerate(result.payload):
9150
              if not success:
9151
                logging.warn("pause-sync of instance %s for disk %d failed",
9152
                             instance, idx)
9153

    
9154
          feedback_fn("* running the instance OS create scripts...")
9155
          # FIXME: pass debug option from opcode to backend
9156
          os_add_result = \
9157
            self.rpc.call_instance_os_add(pnode_name, iobj, False,
9158
                                          self.op.debug_level)
9159
          if pause_sync:
9160
            feedback_fn("* resuming disk sync")
9161
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9162
                                                              iobj.disks, False)
9163
            for idx, success in enumerate(result.payload):
9164
              if not success:
9165
                logging.warn("resume-sync of instance %s for disk %d failed",
9166
                             instance, idx)
9167

    
9168
          os_add_result.Raise("Could not add os for instance %s"
9169
                              " on node %s" % (instance, pnode_name))
9170

    
9171
      elif self.op.mode == constants.INSTANCE_IMPORT:
9172
        feedback_fn("* running the instance OS import scripts...")
9173

    
9174
        transfers = []
9175

    
9176
        for idx, image in enumerate(self.src_images):
9177
          if not image:
9178
            continue
9179

    
9180
          # FIXME: pass debug option from opcode to backend
9181
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9182
                                             constants.IEIO_FILE, (image, ),
9183
                                             constants.IEIO_SCRIPT,
9184
                                             (iobj.disks[idx], idx),
9185
                                             None)
9186
          transfers.append(dt)
9187

    
9188
        import_result = \
9189
          masterd.instance.TransferInstanceData(self, feedback_fn,
9190
                                                self.op.src_node, pnode_name,
9191
                                                self.pnode.secondary_ip,
9192
                                                iobj, transfers)
9193
        if not compat.all(import_result):
9194
          self.LogWarning("Some disks for instance %s on node %s were not"
9195
                          " imported successfully" % (instance, pnode_name))
9196

    
9197
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9198
        feedback_fn("* preparing remote import...")
9199
        # The source cluster will stop the instance before attempting to make a
9200
        # connection. In some cases stopping an instance can take a long time,
9201
        # hence the shutdown timeout is added to the connection timeout.
9202
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9203
                           self.op.source_shutdown_timeout)
9204
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9205

    
9206
        assert iobj.primary_node == self.pnode.name
9207
        disk_results = \
9208
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9209
                                        self.source_x509_ca,
9210
                                        self._cds, timeouts)
9211
        if not compat.all(disk_results):
9212
          # TODO: Should the instance still be started, even if some disks
9213
          # failed to import (valid for local imports, too)?
9214
          self.LogWarning("Some disks for instance %s on node %s were not"
9215
                          " imported successfully" % (instance, pnode_name))
9216

    
9217
        # Run rename script on newly imported instance
9218
        assert iobj.name == instance
9219
        feedback_fn("Running rename script for %s" % instance)
9220
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9221
                                                   self.source_instance_name,
9222
                                                   self.op.debug_level)
9223
        if result.fail_msg:
9224
          self.LogWarning("Failed to run rename script for %s on node"
9225
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9226

    
9227
      else:
9228
        # also checked in the prereq part
9229
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9230
                                     % self.op.mode)
9231

    
9232
    if self.op.start:
9233
      iobj.admin_up = True
9234
      self.cfg.Update(iobj, feedback_fn)
9235
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9236
      feedback_fn("* starting instance...")
9237
      result = self.rpc.call_instance_start(pnode_name, iobj,
9238
                                            None, None, False)
9239
      result.Raise("Could not start instance")
9240

    
9241
    return list(iobj.all_nodes)
9242

    
9243

    
9244
class LUInstanceConsole(NoHooksLU):
9245
  """Connect to an instance's console.
9246

9247
  This is somewhat special in that it returns the command line that
9248
  you need to run on the master node in order to connect to the
9249
  console.
9250

9251
  """
9252
  REQ_BGL = False
9253

    
9254
  def ExpandNames(self):
9255
    self._ExpandAndLockInstance()
9256

    
9257
  def CheckPrereq(self):
9258
    """Check prerequisites.
9259

9260
    This checks that the instance is in the cluster.
9261

9262
    """
9263
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9264
    assert self.instance is not None, \
9265
      "Cannot retrieve locked instance %s" % self.op.instance_name
9266
    _CheckNodeOnline(self, self.instance.primary_node)
9267

    
9268
  def Exec(self, feedback_fn):
9269
    """Connect to the console of an instance
9270

9271
    """
9272
    instance = self.instance
9273
    node = instance.primary_node
9274

    
9275
    node_insts = self.rpc.call_instance_list([node],
9276
                                             [instance.hypervisor])[node]
9277
    node_insts.Raise("Can't get node information from %s" % node)
9278

    
9279
    if instance.name not in node_insts.payload:
9280
      if instance.admin_up:
9281
        state = constants.INSTST_ERRORDOWN
9282
      else:
9283
        state = constants.INSTST_ADMINDOWN
9284
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9285
                               (instance.name, state))
9286

    
9287
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9288

    
9289
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9290

    
9291

    
9292
def _GetInstanceConsole(cluster, instance):
9293
  """Returns console information for an instance.
9294

9295
  @type cluster: L{objects.Cluster}
9296
  @type instance: L{objects.Instance}
9297
  @rtype: dict
9298

9299
  """
9300
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9301
  # beparams and hvparams are passed separately, to avoid editing the
9302
  # instance and then saving the defaults in the instance itself.
9303
  hvparams = cluster.FillHV(instance)
9304
  beparams = cluster.FillBE(instance)
9305
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9306

    
9307
  assert console.instance == instance.name
9308
  assert console.Validate()
9309

    
9310
  return console.ToDict()
9311

    
9312

    
9313
class LUInstanceReplaceDisks(LogicalUnit):
9314
  """Replace the disks of an instance.
9315

9316
  """
9317
  HPATH = "mirrors-replace"
9318
  HTYPE = constants.HTYPE_INSTANCE
9319
  REQ_BGL = False
9320

    
9321
  def CheckArguments(self):
9322
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9323
                                  self.op.iallocator)
9324

    
9325
  def ExpandNames(self):
9326
    self._ExpandAndLockInstance()
9327

    
9328
    assert locking.LEVEL_NODE not in self.needed_locks
9329
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9330

    
9331
    assert self.op.iallocator is None or self.op.remote_node is None, \
9332
      "Conflicting options"
9333

    
9334
    if self.op.remote_node is not None:
9335
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9336

    
9337
      # Warning: do not remove the locking of the new secondary here
9338
      # unless DRBD8.AddChildren is changed to work in parallel;
9339
      # currently it doesn't since parallel invocations of
9340
      # FindUnusedMinor will conflict
9341
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9342
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9343
    else:
9344
      self.needed_locks[locking.LEVEL_NODE] = []
9345
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9346

    
9347
      if self.op.iallocator is not None:
9348
        # iallocator will select a new node in the same group
9349
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9350

    
9351
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9352
                                   self.op.iallocator, self.op.remote_node,
9353
                                   self.op.disks, False, self.op.early_release)
9354

    
9355
    self.tasklets = [self.replacer]
9356

    
9357
  def DeclareLocks(self, level):
9358
    if level == locking.LEVEL_NODEGROUP:
9359
      assert self.op.remote_node is None
9360
      assert self.op.iallocator is not None
9361
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9362

    
9363
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9364
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9365
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9366

    
9367
    elif level == locking.LEVEL_NODE:
9368
      if self.op.iallocator is not None:
9369
        assert self.op.remote_node is None
9370
        assert not self.needed_locks[locking.LEVEL_NODE]
9371

    
9372
        # Lock member nodes of all locked groups
9373
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9374
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9375
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9376
      else:
9377
        self._LockInstancesNodes()
9378

    
9379
  def BuildHooksEnv(self):
9380
    """Build hooks env.
9381

9382
    This runs on the master, the primary and all the secondaries.
9383

9384
    """
9385
    instance = self.replacer.instance
9386
    env = {
9387
      "MODE": self.op.mode,
9388
      "NEW_SECONDARY": self.op.remote_node,
9389
      "OLD_SECONDARY": instance.secondary_nodes[0],
9390
      }
9391
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9392
    return env
9393

    
9394
  def BuildHooksNodes(self):
9395
    """Build hooks nodes.
9396

9397
    """
9398
    instance = self.replacer.instance
9399
    nl = [
9400
      self.cfg.GetMasterNode(),
9401
      instance.primary_node,
9402
      ]
9403
    if self.op.remote_node is not None:
9404
      nl.append(self.op.remote_node)
9405
    return nl, nl
9406

    
9407
  def CheckPrereq(self):
9408
    """Check prerequisites.
9409

9410
    """
9411
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9412
            self.op.iallocator is None)
9413

    
9414
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9415
    if owned_groups:
9416
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9417

    
9418
    return LogicalUnit.CheckPrereq(self)
9419

    
9420

    
9421
class TLReplaceDisks(Tasklet):
9422
  """Replaces disks for an instance.
9423

9424
  Note: Locking is not within the scope of this class.
9425

9426
  """
9427
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9428
               disks, delay_iallocator, early_release):
9429
    """Initializes this class.
9430

9431
    """
9432
    Tasklet.__init__(self, lu)
9433

    
9434
    # Parameters
9435
    self.instance_name = instance_name
9436
    self.mode = mode
9437
    self.iallocator_name = iallocator_name
9438
    self.remote_node = remote_node
9439
    self.disks = disks
9440
    self.delay_iallocator = delay_iallocator
9441
    self.early_release = early_release
9442

    
9443
    # Runtime data
9444
    self.instance = None
9445
    self.new_node = None
9446
    self.target_node = None
9447
    self.other_node = None
9448
    self.remote_node_info = None
9449
    self.node_secondary_ip = None
9450

    
9451
  @staticmethod
9452
  def CheckArguments(mode, remote_node, iallocator):
9453
    """Helper function for users of this class.
9454

9455
    """
9456
    # check for valid parameter combination
9457
    if mode == constants.REPLACE_DISK_CHG:
9458
      if remote_node is None and iallocator is None:
9459
        raise errors.OpPrereqError("When changing the secondary either an"
9460
                                   " iallocator script must be used or the"
9461
                                   " new node given", errors.ECODE_INVAL)
9462

    
9463
      if remote_node is not None and iallocator is not None:
9464
        raise errors.OpPrereqError("Give either the iallocator or the new"
9465
                                   " secondary, not both", errors.ECODE_INVAL)
9466

    
9467
    elif remote_node is not None or iallocator is not None:
9468
      # Not replacing the secondary
9469
      raise errors.OpPrereqError("The iallocator and new node options can"
9470
                                 " only be used when changing the"
9471
                                 " secondary node", errors.ECODE_INVAL)
9472

    
9473
  @staticmethod
9474
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9475
    """Compute a new secondary node using an IAllocator.
9476

9477
    """
9478
    ial = IAllocator(lu.cfg, lu.rpc,
9479
                     mode=constants.IALLOCATOR_MODE_RELOC,
9480
                     name=instance_name,
9481
                     relocate_from=list(relocate_from))
9482

    
9483
    ial.Run(iallocator_name)
9484

    
9485
    if not ial.success:
9486
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9487
                                 " %s" % (iallocator_name, ial.info),
9488
                                 errors.ECODE_NORES)
9489

    
9490
    if len(ial.result) != ial.required_nodes:
9491
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9492
                                 " of nodes (%s), required %s" %
9493
                                 (iallocator_name,
9494
                                  len(ial.result), ial.required_nodes),
9495
                                 errors.ECODE_FAULT)
9496

    
9497
    remote_node_name = ial.result[0]
9498

    
9499
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9500
               instance_name, remote_node_name)
9501

    
9502
    return remote_node_name
9503

    
9504
  def _FindFaultyDisks(self, node_name):
9505
    """Wrapper for L{_FindFaultyInstanceDisks}.
9506

9507
    """
9508
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9509
                                    node_name, True)
9510

    
9511
  def _CheckDisksActivated(self, instance):
9512
    """Checks if the instance disks are activated.
9513

9514
    @param instance: The instance to check disks
9515
    @return: True if they are activated, False otherwise
9516

9517
    """
9518
    nodes = instance.all_nodes
9519

    
9520
    for idx, dev in enumerate(instance.disks):
9521
      for node in nodes:
9522
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9523
        self.cfg.SetDiskID(dev, node)
9524

    
9525
        result = self.rpc.call_blockdev_find(node, dev)
9526

    
9527
        if result.offline:
9528
          continue
9529
        elif result.fail_msg or not result.payload:
9530
          return False
9531

    
9532
    return True
9533

    
9534
  def CheckPrereq(self):
9535
    """Check prerequisites.
9536

9537
    This checks that the instance is in the cluster.
9538

9539
    """
9540
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9541
    assert instance is not None, \
9542
      "Cannot retrieve locked instance %s" % self.instance_name
9543

    
9544
    if instance.disk_template != constants.DT_DRBD8:
9545
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9546
                                 " instances", errors.ECODE_INVAL)
9547

    
9548
    if len(instance.secondary_nodes) != 1:
9549
      raise errors.OpPrereqError("The instance has a strange layout,"
9550
                                 " expected one secondary but found %d" %
9551
                                 len(instance.secondary_nodes),
9552
                                 errors.ECODE_FAULT)
9553

    
9554
    if not self.delay_iallocator:
9555
      self._CheckPrereq2()
9556

    
9557
  def _CheckPrereq2(self):
9558
    """Check prerequisites, second part.
9559

9560
    This function should always be part of CheckPrereq. It was separated and is
9561
    now called from Exec because during node evacuation iallocator was only
9562
    called with an unmodified cluster model, not taking planned changes into
9563
    account.
9564

9565
    """
9566
    instance = self.instance
9567
    secondary_node = instance.secondary_nodes[0]
9568

    
9569
    if self.iallocator_name is None:
9570
      remote_node = self.remote_node
9571
    else:
9572
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9573
                                       instance.name, instance.secondary_nodes)
9574

    
9575
    if remote_node is None:
9576
      self.remote_node_info = None
9577
    else:
9578
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9579
             "Remote node '%s' is not locked" % remote_node
9580

    
9581
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9582
      assert self.remote_node_info is not None, \
9583
        "Cannot retrieve locked node %s" % remote_node
9584

    
9585
    if remote_node == self.instance.primary_node:
9586
      raise errors.OpPrereqError("The specified node is the primary node of"
9587
                                 " the instance", errors.ECODE_INVAL)
9588

    
9589
    if remote_node == secondary_node:
9590
      raise errors.OpPrereqError("The specified node is already the"
9591
                                 " secondary node of the instance",
9592
                                 errors.ECODE_INVAL)
9593

    
9594
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9595
                                    constants.REPLACE_DISK_CHG):
9596
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9597
                                 errors.ECODE_INVAL)
9598

    
9599
    if self.mode == constants.REPLACE_DISK_AUTO:
9600
      if not self._CheckDisksActivated(instance):
9601
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9602
                                   " first" % self.instance_name,
9603
                                   errors.ECODE_STATE)
9604
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9605
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9606

    
9607
      if faulty_primary and faulty_secondary:
9608
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9609
                                   " one node and can not be repaired"
9610
                                   " automatically" % self.instance_name,
9611
                                   errors.ECODE_STATE)
9612

    
9613
      if faulty_primary:
9614
        self.disks = faulty_primary
9615
        self.target_node = instance.primary_node
9616
        self.other_node = secondary_node
9617
        check_nodes = [self.target_node, self.other_node]
9618
      elif faulty_secondary:
9619
        self.disks = faulty_secondary
9620
        self.target_node = secondary_node
9621
        self.other_node = instance.primary_node
9622
        check_nodes = [self.target_node, self.other_node]
9623
      else:
9624
        self.disks = []
9625
        check_nodes = []
9626

    
9627
    else:
9628
      # Non-automatic modes
9629
      if self.mode == constants.REPLACE_DISK_PRI:
9630
        self.target_node = instance.primary_node
9631
        self.other_node = secondary_node
9632
        check_nodes = [self.target_node, self.other_node]
9633

    
9634
      elif self.mode == constants.REPLACE_DISK_SEC:
9635
        self.target_node = secondary_node
9636
        self.other_node = instance.primary_node
9637
        check_nodes = [self.target_node, self.other_node]
9638

    
9639
      elif self.mode == constants.REPLACE_DISK_CHG:
9640
        self.new_node = remote_node
9641
        self.other_node = instance.primary_node
9642
        self.target_node = secondary_node
9643
        check_nodes = [self.new_node, self.other_node]
9644

    
9645
        _CheckNodeNotDrained(self.lu, remote_node)
9646
        _CheckNodeVmCapable(self.lu, remote_node)
9647

    
9648
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9649
        assert old_node_info is not None
9650
        if old_node_info.offline and not self.early_release:
9651
          # doesn't make sense to delay the release
9652
          self.early_release = True
9653
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9654
                          " early-release mode", secondary_node)
9655

    
9656
      else:
9657
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9658
                                     self.mode)
9659

    
9660
      # If not specified all disks should be replaced
9661
      if not self.disks:
9662
        self.disks = range(len(self.instance.disks))
9663

    
9664
    for node in check_nodes:
9665
      _CheckNodeOnline(self.lu, node)
9666

    
9667
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9668
                                                          self.other_node,
9669
                                                          self.target_node]
9670
                              if node_name is not None)
9671

    
9672
    # Release unneeded node locks
9673
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9674

    
9675
    # Release any owned node group
9676
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9677
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9678

    
9679
    # Check whether disks are valid
9680
    for disk_idx in self.disks:
9681
      instance.FindDisk(disk_idx)
9682

    
9683
    # Get secondary node IP addresses
9684
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9685
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9686

    
9687
  def Exec(self, feedback_fn):
9688
    """Execute disk replacement.
9689

9690
    This dispatches the disk replacement to the appropriate handler.
9691

9692
    """
9693
    if self.delay_iallocator:
9694
      self._CheckPrereq2()
9695

    
9696
    if __debug__:
9697
      # Verify owned locks before starting operation
9698
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9699
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9700
          ("Incorrect node locks, owning %s, expected %s" %
9701
           (owned_nodes, self.node_secondary_ip.keys()))
9702

    
9703
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9704
      assert list(owned_instances) == [self.instance_name], \
9705
          "Instance '%s' not locked" % self.instance_name
9706

    
9707
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9708
          "Should not own any node group lock at this point"
9709

    
9710
    if not self.disks:
9711
      feedback_fn("No disks need replacement")
9712
      return
9713

    
9714
    feedback_fn("Replacing disk(s) %s for %s" %
9715
                (utils.CommaJoin(self.disks), self.instance.name))
9716

    
9717
    activate_disks = (not self.instance.admin_up)
9718

    
9719
    # Activate the instance disks if we're replacing them on a down instance
9720
    if activate_disks:
9721
      _StartInstanceDisks(self.lu, self.instance, True)
9722

    
9723
    try:
9724
      # Should we replace the secondary node?
9725
      if self.new_node is not None:
9726
        fn = self._ExecDrbd8Secondary
9727
      else:
9728
        fn = self._ExecDrbd8DiskOnly
9729

    
9730
      result = fn(feedback_fn)
9731
    finally:
9732
      # Deactivate the instance disks if we're replacing them on a
9733
      # down instance
9734
      if activate_disks:
9735
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9736

    
9737
    if __debug__:
9738
      # Verify owned locks
9739
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9740
      nodes = frozenset(self.node_secondary_ip)
9741
      assert ((self.early_release and not owned_nodes) or
9742
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9743
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9744
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9745

    
9746
    return result
9747

    
9748
  def _CheckVolumeGroup(self, nodes):
9749
    self.lu.LogInfo("Checking volume groups")
9750

    
9751
    vgname = self.cfg.GetVGName()
9752

    
9753
    # Make sure volume group exists on all involved nodes
9754
    results = self.rpc.call_vg_list(nodes)
9755
    if not results:
9756
      raise errors.OpExecError("Can't list volume groups on the nodes")
9757

    
9758
    for node in nodes:
9759
      res = results[node]
9760
      res.Raise("Error checking node %s" % node)
9761
      if vgname not in res.payload:
9762
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9763
                                 (vgname, node))
9764

    
9765
  def _CheckDisksExistence(self, nodes):
9766
    # Check disk existence
9767
    for idx, dev in enumerate(self.instance.disks):
9768
      if idx not in self.disks:
9769
        continue
9770

    
9771
      for node in nodes:
9772
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9773
        self.cfg.SetDiskID(dev, node)
9774

    
9775
        result = self.rpc.call_blockdev_find(node, dev)
9776

    
9777
        msg = result.fail_msg
9778
        if msg or not result.payload:
9779
          if not msg:
9780
            msg = "disk not found"
9781
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9782
                                   (idx, node, msg))
9783

    
9784
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9785
    for idx, dev in enumerate(self.instance.disks):
9786
      if idx not in self.disks:
9787
        continue
9788

    
9789
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9790
                      (idx, node_name))
9791

    
9792
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9793
                                   ldisk=ldisk):
9794
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9795
                                 " replace disks for instance %s" %
9796
                                 (node_name, self.instance.name))
9797

    
9798
  def _CreateNewStorage(self, node_name):
9799
    """Create new storage on the primary or secondary node.
9800

9801
    This is only used for same-node replaces, not for changing the
9802
    secondary node, hence we don't want to modify the existing disk.
9803

9804
    """
9805
    iv_names = {}
9806

    
9807
    for idx, dev in enumerate(self.instance.disks):
9808
      if idx not in self.disks:
9809
        continue
9810

    
9811
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9812

    
9813
      self.cfg.SetDiskID(dev, node_name)
9814

    
9815
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9816
      names = _GenerateUniqueNames(self.lu, lv_names)
9817

    
9818
      vg_data = dev.children[0].logical_id[0]
9819
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9820
                             logical_id=(vg_data, names[0]))
9821
      vg_meta = dev.children[1].logical_id[0]
9822
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9823
                             logical_id=(vg_meta, names[1]))
9824

    
9825
      new_lvs = [lv_data, lv_meta]
9826
      old_lvs = [child.Copy() for child in dev.children]
9827
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9828

    
9829
      # we pass force_create=True to force the LVM creation
9830
      for new_lv in new_lvs:
9831
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9832
                        _GetInstanceInfoText(self.instance), False)
9833

    
9834
    return iv_names
9835

    
9836
  def _CheckDevices(self, node_name, iv_names):
9837
    for name, (dev, _, _) in iv_names.iteritems():
9838
      self.cfg.SetDiskID(dev, node_name)
9839

    
9840
      result = self.rpc.call_blockdev_find(node_name, dev)
9841

    
9842
      msg = result.fail_msg
9843
      if msg or not result.payload:
9844
        if not msg:
9845
          msg = "disk not found"
9846
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9847
                                 (name, msg))
9848

    
9849
      if result.payload.is_degraded:
9850
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9851

    
9852
  def _RemoveOldStorage(self, node_name, iv_names):
9853
    for name, (_, old_lvs, _) in iv_names.iteritems():
9854
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9855

    
9856
      for lv in old_lvs:
9857
        self.cfg.SetDiskID(lv, node_name)
9858

    
9859
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9860
        if msg:
9861
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9862
                             hint="remove unused LVs manually")
9863

    
9864
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9865
    """Replace a disk on the primary or secondary for DRBD 8.
9866

9867
    The algorithm for replace is quite complicated:
9868

9869
      1. for each disk to be replaced:
9870

9871
        1. create new LVs on the target node with unique names
9872
        1. detach old LVs from the drbd device
9873
        1. rename old LVs to name_replaced.<time_t>
9874
        1. rename new LVs to old LVs
9875
        1. attach the new LVs (with the old names now) to the drbd device
9876

9877
      1. wait for sync across all devices
9878

9879
      1. for each modified disk:
9880

9881
        1. remove old LVs (which have the name name_replaces.<time_t>)
9882

9883
    Failures are not very well handled.
9884

9885
    """
9886
    steps_total = 6
9887

    
9888
    # Step: check device activation
9889
    self.lu.LogStep(1, steps_total, "Check device existence")
9890
    self._CheckDisksExistence([self.other_node, self.target_node])
9891
    self._CheckVolumeGroup([self.target_node, self.other_node])
9892

    
9893
    # Step: check other node consistency
9894
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9895
    self._CheckDisksConsistency(self.other_node,
9896
                                self.other_node == self.instance.primary_node,
9897
                                False)
9898

    
9899
    # Step: create new storage
9900
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9901
    iv_names = self._CreateNewStorage(self.target_node)
9902

    
9903
    # Step: for each lv, detach+rename*2+attach
9904
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9905
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9906
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9907

    
9908
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9909
                                                     old_lvs)
9910
      result.Raise("Can't detach drbd from local storage on node"
9911
                   " %s for device %s" % (self.target_node, dev.iv_name))
9912
      #dev.children = []
9913
      #cfg.Update(instance)
9914

    
9915
      # ok, we created the new LVs, so now we know we have the needed
9916
      # storage; as such, we proceed on the target node to rename
9917
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9918
      # using the assumption that logical_id == physical_id (which in
9919
      # turn is the unique_id on that node)
9920

    
9921
      # FIXME(iustin): use a better name for the replaced LVs
9922
      temp_suffix = int(time.time())
9923
      ren_fn = lambda d, suff: (d.physical_id[0],
9924
                                d.physical_id[1] + "_replaced-%s" % suff)
9925

    
9926
      # Build the rename list based on what LVs exist on the node
9927
      rename_old_to_new = []
9928
      for to_ren in old_lvs:
9929
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9930
        if not result.fail_msg and result.payload:
9931
          # device exists
9932
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9933

    
9934
      self.lu.LogInfo("Renaming the old LVs on the target node")
9935
      result = self.rpc.call_blockdev_rename(self.target_node,
9936
                                             rename_old_to_new)
9937
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9938

    
9939
      # Now we rename the new LVs to the old LVs
9940
      self.lu.LogInfo("Renaming the new LVs on the target node")
9941
      rename_new_to_old = [(new, old.physical_id)
9942
                           for old, new in zip(old_lvs, new_lvs)]
9943
      result = self.rpc.call_blockdev_rename(self.target_node,
9944
                                             rename_new_to_old)
9945
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9946

    
9947
      # Intermediate steps of in memory modifications
9948
      for old, new in zip(old_lvs, new_lvs):
9949
        new.logical_id = old.logical_id
9950
        self.cfg.SetDiskID(new, self.target_node)
9951

    
9952
      # We need to modify old_lvs so that removal later removes the
9953
      # right LVs, not the newly added ones; note that old_lvs is a
9954
      # copy here
9955
      for disk in old_lvs:
9956
        disk.logical_id = ren_fn(disk, temp_suffix)
9957
        self.cfg.SetDiskID(disk, self.target_node)
9958

    
9959
      # Now that the new lvs have the old name, we can add them to the device
9960
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9961
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9962
                                                  new_lvs)
9963
      msg = result.fail_msg
9964
      if msg:
9965
        for new_lv in new_lvs:
9966
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9967
                                               new_lv).fail_msg
9968
          if msg2:
9969
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9970
                               hint=("cleanup manually the unused logical"
9971
                                     "volumes"))
9972
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9973

    
9974
    cstep = 5
9975
    if self.early_release:
9976
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9977
      cstep += 1
9978
      self._RemoveOldStorage(self.target_node, iv_names)
9979
      # WARNING: we release both node locks here, do not do other RPCs
9980
      # than WaitForSync to the primary node
9981
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9982
                    names=[self.target_node, self.other_node])
9983

    
9984
    # Wait for sync
9985
    # This can fail as the old devices are degraded and _WaitForSync
9986
    # does a combined result over all disks, so we don't check its return value
9987
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9988
    cstep += 1
9989
    _WaitForSync(self.lu, self.instance)
9990

    
9991
    # Check all devices manually
9992
    self._CheckDevices(self.instance.primary_node, iv_names)
9993

    
9994
    # Step: remove old storage
9995
    if not self.early_release:
9996
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9997
      cstep += 1
9998
      self._RemoveOldStorage(self.target_node, iv_names)
9999

    
10000
  def _ExecDrbd8Secondary(self, feedback_fn):
10001
    """Replace the secondary node for DRBD 8.
10002

10003
    The algorithm for replace is quite complicated:
10004
      - for all disks of the instance:
10005
        - create new LVs on the new node with same names
10006
        - shutdown the drbd device on the old secondary
10007
        - disconnect the drbd network on the primary
10008
        - create the drbd device on the new secondary
10009
        - network attach the drbd on the primary, using an artifice:
10010
          the drbd code for Attach() will connect to the network if it
10011
          finds a device which is connected to the good local disks but
10012
          not network enabled
10013
      - wait for sync across all devices
10014
      - remove all disks from the old secondary
10015

10016
    Failures are not very well handled.
10017

10018
    """
10019
    steps_total = 6
10020

    
10021
    pnode = self.instance.primary_node
10022

    
10023
    # Step: check device activation
10024
    self.lu.LogStep(1, steps_total, "Check device existence")
10025
    self._CheckDisksExistence([self.instance.primary_node])
10026
    self._CheckVolumeGroup([self.instance.primary_node])
10027

    
10028
    # Step: check other node consistency
10029
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10030
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10031

    
10032
    # Step: create new storage
10033
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10034
    for idx, dev in enumerate(self.instance.disks):
10035
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10036
                      (self.new_node, idx))
10037
      # we pass force_create=True to force LVM creation
10038
      for new_lv in dev.children:
10039
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10040
                        _GetInstanceInfoText(self.instance), False)
10041

    
10042
    # Step 4: dbrd minors and drbd setups changes
10043
    # after this, we must manually remove the drbd minors on both the
10044
    # error and the success paths
10045
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10046
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10047
                                         for dev in self.instance.disks],
10048
                                        self.instance.name)
10049
    logging.debug("Allocated minors %r", minors)
10050

    
10051
    iv_names = {}
10052
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10053
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10054
                      (self.new_node, idx))
10055
      # create new devices on new_node; note that we create two IDs:
10056
      # one without port, so the drbd will be activated without
10057
      # networking information on the new node at this stage, and one
10058
      # with network, for the latter activation in step 4
10059
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10060
      if self.instance.primary_node == o_node1:
10061
        p_minor = o_minor1
10062
      else:
10063
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10064
        p_minor = o_minor2
10065

    
10066
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10067
                      p_minor, new_minor, o_secret)
10068
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10069
                    p_minor, new_minor, o_secret)
10070

    
10071
      iv_names[idx] = (dev, dev.children, new_net_id)
10072
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10073
                    new_net_id)
10074
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10075
                              logical_id=new_alone_id,
10076
                              children=dev.children,
10077
                              size=dev.size)
10078
      try:
10079
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10080
                              _GetInstanceInfoText(self.instance), False)
10081
      except errors.GenericError:
10082
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10083
        raise
10084

    
10085
    # We have new devices, shutdown the drbd on the old secondary
10086
    for idx, dev in enumerate(self.instance.disks):
10087
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10088
      self.cfg.SetDiskID(dev, self.target_node)
10089
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10090
      if msg:
10091
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10092
                           "node: %s" % (idx, msg),
10093
                           hint=("Please cleanup this device manually as"
10094
                                 " soon as possible"))
10095

    
10096
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10097
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10098
                                               self.instance.disks)[pnode]
10099

    
10100
    msg = result.fail_msg
10101
    if msg:
10102
      # detaches didn't succeed (unlikely)
10103
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10104
      raise errors.OpExecError("Can't detach the disks from the network on"
10105
                               " old node: %s" % (msg,))
10106

    
10107
    # if we managed to detach at least one, we update all the disks of
10108
    # the instance to point to the new secondary
10109
    self.lu.LogInfo("Updating instance configuration")
10110
    for dev, _, new_logical_id in iv_names.itervalues():
10111
      dev.logical_id = new_logical_id
10112
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10113

    
10114
    self.cfg.Update(self.instance, feedback_fn)
10115

    
10116
    # and now perform the drbd attach
10117
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10118
                    " (standalone => connected)")
10119
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10120
                                            self.new_node],
10121
                                           self.node_secondary_ip,
10122
                                           self.instance.disks,
10123
                                           self.instance.name,
10124
                                           False)
10125
    for to_node, to_result in result.items():
10126
      msg = to_result.fail_msg
10127
      if msg:
10128
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10129
                           to_node, msg,
10130
                           hint=("please do a gnt-instance info to see the"
10131
                                 " status of disks"))
10132
    cstep = 5
10133
    if self.early_release:
10134
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10135
      cstep += 1
10136
      self._RemoveOldStorage(self.target_node, iv_names)
10137
      # WARNING: we release all node locks here, do not do other RPCs
10138
      # than WaitForSync to the primary node
10139
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10140
                    names=[self.instance.primary_node,
10141
                           self.target_node,
10142
                           self.new_node])
10143

    
10144
    # Wait for sync
10145
    # This can fail as the old devices are degraded and _WaitForSync
10146
    # does a combined result over all disks, so we don't check its return value
10147
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10148
    cstep += 1
10149
    _WaitForSync(self.lu, self.instance)
10150

    
10151
    # Check all devices manually
10152
    self._CheckDevices(self.instance.primary_node, iv_names)
10153

    
10154
    # Step: remove old storage
10155
    if not self.early_release:
10156
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10157
      self._RemoveOldStorage(self.target_node, iv_names)
10158

    
10159

    
10160
class LURepairNodeStorage(NoHooksLU):
10161
  """Repairs the volume group on a node.
10162

10163
  """
10164
  REQ_BGL = False
10165

    
10166
  def CheckArguments(self):
10167
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10168

    
10169
    storage_type = self.op.storage_type
10170

    
10171
    if (constants.SO_FIX_CONSISTENCY not in
10172
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10173
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10174
                                 " repaired" % storage_type,
10175
                                 errors.ECODE_INVAL)
10176

    
10177
  def ExpandNames(self):
10178
    self.needed_locks = {
10179
      locking.LEVEL_NODE: [self.op.node_name],
10180
      }
10181

    
10182
  def _CheckFaultyDisks(self, instance, node_name):
10183
    """Ensure faulty disks abort the opcode or at least warn."""
10184
    try:
10185
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10186
                                  node_name, True):
10187
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10188
                                   " node '%s'" % (instance.name, node_name),
10189
                                   errors.ECODE_STATE)
10190
    except errors.OpPrereqError, err:
10191
      if self.op.ignore_consistency:
10192
        self.proc.LogWarning(str(err.args[0]))
10193
      else:
10194
        raise
10195

    
10196
  def CheckPrereq(self):
10197
    """Check prerequisites.
10198

10199
    """
10200
    # Check whether any instance on this node has faulty disks
10201
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10202
      if not inst.admin_up:
10203
        continue
10204
      check_nodes = set(inst.all_nodes)
10205
      check_nodes.discard(self.op.node_name)
10206
      for inst_node_name in check_nodes:
10207
        self._CheckFaultyDisks(inst, inst_node_name)
10208

    
10209
  def Exec(self, feedback_fn):
10210
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10211
                (self.op.name, self.op.node_name))
10212

    
10213
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10214
    result = self.rpc.call_storage_execute(self.op.node_name,
10215
                                           self.op.storage_type, st_args,
10216
                                           self.op.name,
10217
                                           constants.SO_FIX_CONSISTENCY)
10218
    result.Raise("Failed to repair storage unit '%s' on %s" %
10219
                 (self.op.name, self.op.node_name))
10220

    
10221

    
10222
class LUNodeEvacuate(NoHooksLU):
10223
  """Evacuates instances off a list of nodes.
10224

10225
  """
10226
  REQ_BGL = False
10227

    
10228
  def CheckArguments(self):
10229
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10230

    
10231
  def ExpandNames(self):
10232
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10233

    
10234
    if self.op.remote_node is not None:
10235
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10236
      assert self.op.remote_node
10237

    
10238
      if self.op.remote_node == self.op.node_name:
10239
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10240
                                   " secondary node", errors.ECODE_INVAL)
10241

    
10242
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10243
        raise errors.OpPrereqError("Without the use of an iallocator only"
10244
                                   " secondary instances can be evacuated",
10245
                                   errors.ECODE_INVAL)
10246

    
10247
    # Declare locks
10248
    self.share_locks = _ShareAll()
10249
    self.needed_locks = {
10250
      locking.LEVEL_INSTANCE: [],
10251
      locking.LEVEL_NODEGROUP: [],
10252
      locking.LEVEL_NODE: [],
10253
      }
10254

    
10255
    if self.op.remote_node is None:
10256
      # Iallocator will choose any node(s) in the same group
10257
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10258
    else:
10259
      group_nodes = frozenset([self.op.remote_node])
10260

    
10261
    # Determine nodes to be locked
10262
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10263

    
10264
  def _DetermineInstances(self):
10265
    """Builds list of instances to operate on.
10266

10267
    """
10268
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10269

    
10270
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10271
      # Primary instances only
10272
      inst_fn = _GetNodePrimaryInstances
10273
      assert self.op.remote_node is None, \
10274
        "Evacuating primary instances requires iallocator"
10275
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10276
      # Secondary instances only
10277
      inst_fn = _GetNodeSecondaryInstances
10278
    else:
10279
      # All instances
10280
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10281
      inst_fn = _GetNodeInstances
10282

    
10283
    return inst_fn(self.cfg, self.op.node_name)
10284

    
10285
  def DeclareLocks(self, level):
10286
    if level == locking.LEVEL_INSTANCE:
10287
      # Lock instances optimistically, needs verification once node and group
10288
      # locks have been acquired
10289
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10290
        set(i.name for i in self._DetermineInstances())
10291

    
10292
    elif level == locking.LEVEL_NODEGROUP:
10293
      # Lock node groups optimistically, needs verification once nodes have
10294
      # been acquired
10295
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10296
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10297

    
10298
    elif level == locking.LEVEL_NODE:
10299
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10300

    
10301
  def CheckPrereq(self):
10302
    # Verify locks
10303
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10304
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10305
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10306

    
10307
    assert owned_nodes == self.lock_nodes
10308

    
10309
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10310
    if owned_groups != wanted_groups:
10311
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10312
                               " current groups are '%s', used to be '%s'" %
10313
                               (utils.CommaJoin(wanted_groups),
10314
                                utils.CommaJoin(owned_groups)))
10315

    
10316
    # Determine affected instances
10317
    self.instances = self._DetermineInstances()
10318
    self.instance_names = [i.name for i in self.instances]
10319

    
10320
    if set(self.instance_names) != owned_instances:
10321
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10322
                               " were acquired, current instances are '%s',"
10323
                               " used to be '%s'" %
10324
                               (self.op.node_name,
10325
                                utils.CommaJoin(self.instance_names),
10326
                                utils.CommaJoin(owned_instances)))
10327

    
10328
    if self.instance_names:
10329
      self.LogInfo("Evacuating instances from node '%s': %s",
10330
                   self.op.node_name,
10331
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10332
    else:
10333
      self.LogInfo("No instances to evacuate from node '%s'",
10334
                   self.op.node_name)
10335

    
10336
    if self.op.remote_node is not None:
10337
      for i in self.instances:
10338
        if i.primary_node == self.op.remote_node:
10339
          raise errors.OpPrereqError("Node %s is the primary node of"
10340
                                     " instance %s, cannot use it as"
10341
                                     " secondary" %
10342
                                     (self.op.remote_node, i.name),
10343
                                     errors.ECODE_INVAL)
10344

    
10345
  def Exec(self, feedback_fn):
10346
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10347

    
10348
    if not self.instance_names:
10349
      # No instances to evacuate
10350
      jobs = []
10351

    
10352
    elif self.op.iallocator is not None:
10353
      # TODO: Implement relocation to other group
10354
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10355
                       evac_mode=self.op.mode,
10356
                       instances=list(self.instance_names))
10357

    
10358
      ial.Run(self.op.iallocator)
10359

    
10360
      if not ial.success:
10361
        raise errors.OpPrereqError("Can't compute node evacuation using"
10362
                                   " iallocator '%s': %s" %
10363
                                   (self.op.iallocator, ial.info),
10364
                                   errors.ECODE_NORES)
10365

    
10366
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10367

    
10368
    elif self.op.remote_node is not None:
10369
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10370
      jobs = [
10371
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10372
                                        remote_node=self.op.remote_node,
10373
                                        disks=[],
10374
                                        mode=constants.REPLACE_DISK_CHG,
10375
                                        early_release=self.op.early_release)]
10376
        for instance_name in self.instance_names
10377
        ]
10378

    
10379
    else:
10380
      raise errors.ProgrammerError("No iallocator or remote node")
10381

    
10382
    return ResultWithJobs(jobs)
10383

    
10384

    
10385
def _SetOpEarlyRelease(early_release, op):
10386
  """Sets C{early_release} flag on opcodes if available.
10387

10388
  """
10389
  try:
10390
    op.early_release = early_release
10391
  except AttributeError:
10392
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10393

    
10394
  return op
10395

    
10396

    
10397
def _NodeEvacDest(use_nodes, group, nodes):
10398
  """Returns group or nodes depending on caller's choice.
10399

10400
  """
10401
  if use_nodes:
10402
    return utils.CommaJoin(nodes)
10403
  else:
10404
    return group
10405

    
10406

    
10407
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10408
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10409

10410
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10411
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10412

10413
  @type lu: L{LogicalUnit}
10414
  @param lu: Logical unit instance
10415
  @type alloc_result: tuple/list
10416
  @param alloc_result: Result from iallocator
10417
  @type early_release: bool
10418
  @param early_release: Whether to release locks early if possible
10419
  @type use_nodes: bool
10420
  @param use_nodes: Whether to display node names instead of groups
10421

10422
  """
10423
  (moved, failed, jobs) = alloc_result
10424

    
10425
  if failed:
10426
    lu.LogWarning("Unable to evacuate instances %s",
10427
                  utils.CommaJoin("%s (%s)" % (name, reason)
10428
                                  for (name, reason) in failed))
10429

    
10430
  if moved:
10431
    lu.LogInfo("Instances to be moved: %s",
10432
               utils.CommaJoin("%s (to %s)" %
10433
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10434
                               for (name, group, nodes) in moved))
10435

    
10436
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10437
              map(opcodes.OpCode.LoadOpCode, ops))
10438
          for ops in jobs]
10439

    
10440

    
10441
class LUInstanceGrowDisk(LogicalUnit):
10442
  """Grow a disk of an instance.
10443

10444
  """
10445
  HPATH = "disk-grow"
10446
  HTYPE = constants.HTYPE_INSTANCE
10447
  REQ_BGL = False
10448

    
10449
  def ExpandNames(self):
10450
    self._ExpandAndLockInstance()
10451
    self.needed_locks[locking.LEVEL_NODE] = []
10452
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10453

    
10454
  def DeclareLocks(self, level):
10455
    if level == locking.LEVEL_NODE:
10456
      self._LockInstancesNodes()
10457

    
10458
  def BuildHooksEnv(self):
10459
    """Build hooks env.
10460

10461
    This runs on the master, the primary and all the secondaries.
10462

10463
    """
10464
    env = {
10465
      "DISK": self.op.disk,
10466
      "AMOUNT": self.op.amount,
10467
      }
10468
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10469
    return env
10470

    
10471
  def BuildHooksNodes(self):
10472
    """Build hooks nodes.
10473

10474
    """
10475
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10476
    return (nl, nl)
10477

    
10478
  def CheckPrereq(self):
10479
    """Check prerequisites.
10480

10481
    This checks that the instance is in the cluster.
10482

10483
    """
10484
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10485
    assert instance is not None, \
10486
      "Cannot retrieve locked instance %s" % self.op.instance_name
10487
    nodenames = list(instance.all_nodes)
10488
    for node in nodenames:
10489
      _CheckNodeOnline(self, node)
10490

    
10491
    self.instance = instance
10492

    
10493
    if instance.disk_template not in constants.DTS_GROWABLE:
10494
      raise errors.OpPrereqError("Instance's disk layout does not support"
10495
                                 " growing", errors.ECODE_INVAL)
10496

    
10497
    self.disk = instance.FindDisk(self.op.disk)
10498

    
10499
    if instance.disk_template not in (constants.DT_FILE,
10500
                                      constants.DT_SHARED_FILE):
10501
      # TODO: check the free disk space for file, when that feature will be
10502
      # supported
10503
      _CheckNodesFreeDiskPerVG(self, nodenames,
10504
                               self.disk.ComputeGrowth(self.op.amount))
10505

    
10506
  def Exec(self, feedback_fn):
10507
    """Execute disk grow.
10508

10509
    """
10510
    instance = self.instance
10511
    disk = self.disk
10512

    
10513
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10514
    if not disks_ok:
10515
      raise errors.OpExecError("Cannot activate block device to grow")
10516

    
10517
    # First run all grow ops in dry-run mode
10518
    for node in instance.all_nodes:
10519
      self.cfg.SetDiskID(disk, node)
10520
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10521
      result.Raise("Grow request failed to node %s" % node)
10522

    
10523
    # We know that (as far as we can test) operations across different
10524
    # nodes will succeed, time to run it for real
10525
    for node in instance.all_nodes:
10526
      self.cfg.SetDiskID(disk, node)
10527
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10528
      result.Raise("Grow request failed to node %s" % node)
10529

    
10530
      # TODO: Rewrite code to work properly
10531
      # DRBD goes into sync mode for a short amount of time after executing the
10532
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10533
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10534
      # time is a work-around.
10535
      time.sleep(5)
10536

    
10537
    disk.RecordGrow(self.op.amount)
10538
    self.cfg.Update(instance, feedback_fn)
10539
    if self.op.wait_for_sync:
10540
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10541
      if disk_abort:
10542
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10543
                             " status; please check the instance")
10544
      if not instance.admin_up:
10545
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10546
    elif not instance.admin_up:
10547
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10548
                           " not supposed to be running because no wait for"
10549
                           " sync mode was requested")
10550

    
10551

    
10552
class LUInstanceQueryData(NoHooksLU):
10553
  """Query runtime instance data.
10554

10555
  """
10556
  REQ_BGL = False
10557

    
10558
  def ExpandNames(self):
10559
    self.needed_locks = {}
10560

    
10561
    # Use locking if requested or when non-static information is wanted
10562
    if not (self.op.static or self.op.use_locking):
10563
      self.LogWarning("Non-static data requested, locks need to be acquired")
10564
      self.op.use_locking = True
10565

    
10566
    if self.op.instances or not self.op.use_locking:
10567
      # Expand instance names right here
10568
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10569
    else:
10570
      # Will use acquired locks
10571
      self.wanted_names = None
10572

    
10573
    if self.op.use_locking:
10574
      self.share_locks = _ShareAll()
10575

    
10576
      if self.wanted_names is None:
10577
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10578
      else:
10579
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10580

    
10581
      self.needed_locks[locking.LEVEL_NODE] = []
10582
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10583

    
10584
  def DeclareLocks(self, level):
10585
    if self.op.use_locking and level == locking.LEVEL_NODE:
10586
      self._LockInstancesNodes()
10587

    
10588
  def CheckPrereq(self):
10589
    """Check prerequisites.
10590

10591
    This only checks the optional instance list against the existing names.
10592

10593
    """
10594
    if self.wanted_names is None:
10595
      assert self.op.use_locking, "Locking was not used"
10596
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10597

    
10598
    self.wanted_instances = \
10599
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10600

    
10601
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10602
    """Returns the status of a block device
10603

10604
    """
10605
    if self.op.static or not node:
10606
      return None
10607

    
10608
    self.cfg.SetDiskID(dev, node)
10609

    
10610
    result = self.rpc.call_blockdev_find(node, dev)
10611
    if result.offline:
10612
      return None
10613

    
10614
    result.Raise("Can't compute disk status for %s" % instance_name)
10615

    
10616
    status = result.payload
10617
    if status is None:
10618
      return None
10619

    
10620
    return (status.dev_path, status.major, status.minor,
10621
            status.sync_percent, status.estimated_time,
10622
            status.is_degraded, status.ldisk_status)
10623

    
10624
  def _ComputeDiskStatus(self, instance, snode, dev):
10625
    """Compute block device status.
10626

10627
    """
10628
    if dev.dev_type in constants.LDS_DRBD:
10629
      # we change the snode then (otherwise we use the one passed in)
10630
      if dev.logical_id[0] == instance.primary_node:
10631
        snode = dev.logical_id[1]
10632
      else:
10633
        snode = dev.logical_id[0]
10634

    
10635
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10636
                                              instance.name, dev)
10637
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10638

    
10639
    if dev.children:
10640
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10641
                                        instance, snode),
10642
                         dev.children)
10643
    else:
10644
      dev_children = []
10645

    
10646
    return {
10647
      "iv_name": dev.iv_name,
10648
      "dev_type": dev.dev_type,
10649
      "logical_id": dev.logical_id,
10650
      "physical_id": dev.physical_id,
10651
      "pstatus": dev_pstatus,
10652
      "sstatus": dev_sstatus,
10653
      "children": dev_children,
10654
      "mode": dev.mode,
10655
      "size": dev.size,
10656
      }
10657

    
10658
  def Exec(self, feedback_fn):
10659
    """Gather and return data"""
10660
    result = {}
10661

    
10662
    cluster = self.cfg.GetClusterInfo()
10663

    
10664
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10665
                                          for i in self.wanted_instances)
10666
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10667
      if self.op.static or pnode.offline:
10668
        remote_state = None
10669
        if pnode.offline:
10670
          self.LogWarning("Primary node %s is marked offline, returning static"
10671
                          " information only for instance %s" %
10672
                          (pnode.name, instance.name))
10673
      else:
10674
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10675
                                                  instance.name,
10676
                                                  instance.hypervisor)
10677
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10678
        remote_info = remote_info.payload
10679
        if remote_info and "state" in remote_info:
10680
          remote_state = "up"
10681
        else:
10682
          remote_state = "down"
10683

    
10684
      if instance.admin_up:
10685
        config_state = "up"
10686
      else:
10687
        config_state = "down"
10688

    
10689
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10690
                  instance.disks)
10691

    
10692
      result[instance.name] = {
10693
        "name": instance.name,
10694
        "config_state": config_state,
10695
        "run_state": remote_state,
10696
        "pnode": instance.primary_node,
10697
        "snodes": instance.secondary_nodes,
10698
        "os": instance.os,
10699
        # this happens to be the same format used for hooks
10700
        "nics": _NICListToTuple(self, instance.nics),
10701
        "disk_template": instance.disk_template,
10702
        "disks": disks,
10703
        "hypervisor": instance.hypervisor,
10704
        "network_port": instance.network_port,
10705
        "hv_instance": instance.hvparams,
10706
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10707
        "be_instance": instance.beparams,
10708
        "be_actual": cluster.FillBE(instance),
10709
        "os_instance": instance.osparams,
10710
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10711
        "serial_no": instance.serial_no,
10712
        "mtime": instance.mtime,
10713
        "ctime": instance.ctime,
10714
        "uuid": instance.uuid,
10715
        }
10716

    
10717
    return result
10718

    
10719

    
10720
class LUInstanceSetParams(LogicalUnit):
10721
  """Modifies an instances's parameters.
10722

10723
  """
10724
  HPATH = "instance-modify"
10725
  HTYPE = constants.HTYPE_INSTANCE
10726
  REQ_BGL = False
10727

    
10728
  def CheckArguments(self):
10729
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10730
            self.op.hvparams or self.op.beparams or self.op.os_name):
10731
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10732

    
10733
    if self.op.hvparams:
10734
      _CheckGlobalHvParams(self.op.hvparams)
10735

    
10736
    # Disk validation
10737
    disk_addremove = 0
10738
    for disk_op, disk_dict in self.op.disks:
10739
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10740
      if disk_op == constants.DDM_REMOVE:
10741
        disk_addremove += 1
10742
        continue
10743
      elif disk_op == constants.DDM_ADD:
10744
        disk_addremove += 1
10745
      else:
10746
        if not isinstance(disk_op, int):
10747
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10748
        if not isinstance(disk_dict, dict):
10749
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10750
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10751

    
10752
      if disk_op == constants.DDM_ADD:
10753
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10754
        if mode not in constants.DISK_ACCESS_SET:
10755
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10756
                                     errors.ECODE_INVAL)
10757
        size = disk_dict.get(constants.IDISK_SIZE, None)
10758
        if size is None:
10759
          raise errors.OpPrereqError("Required disk parameter size missing",
10760
                                     errors.ECODE_INVAL)
10761
        try:
10762
          size = int(size)
10763
        except (TypeError, ValueError), err:
10764
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10765
                                     str(err), errors.ECODE_INVAL)
10766
        disk_dict[constants.IDISK_SIZE] = size
10767
      else:
10768
        # modification of disk
10769
        if constants.IDISK_SIZE in disk_dict:
10770
          raise errors.OpPrereqError("Disk size change not possible, use"
10771
                                     " grow-disk", errors.ECODE_INVAL)
10772

    
10773
    if disk_addremove > 1:
10774
      raise errors.OpPrereqError("Only one disk add or remove operation"
10775
                                 " supported at a time", errors.ECODE_INVAL)
10776

    
10777
    if self.op.disks and self.op.disk_template is not None:
10778
      raise errors.OpPrereqError("Disk template conversion and other disk"
10779
                                 " changes not supported at the same time",
10780
                                 errors.ECODE_INVAL)
10781

    
10782
    if (self.op.disk_template and
10783
        self.op.disk_template in constants.DTS_INT_MIRROR and
10784
        self.op.remote_node is None):
10785
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10786
                                 " one requires specifying a secondary node",
10787
                                 errors.ECODE_INVAL)
10788

    
10789
    # NIC validation
10790
    nic_addremove = 0
10791
    for nic_op, nic_dict in self.op.nics:
10792
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10793
      if nic_op == constants.DDM_REMOVE:
10794
        nic_addremove += 1
10795
        continue
10796
      elif nic_op == constants.DDM_ADD:
10797
        nic_addremove += 1
10798
      else:
10799
        if not isinstance(nic_op, int):
10800
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10801
        if not isinstance(nic_dict, dict):
10802
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10803
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10804

    
10805
      # nic_dict should be a dict
10806
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10807
      if nic_ip is not None:
10808
        if nic_ip.lower() == constants.VALUE_NONE:
10809
          nic_dict[constants.INIC_IP] = None
10810
        else:
10811
          if not netutils.IPAddress.IsValid(nic_ip):
10812
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10813
                                       errors.ECODE_INVAL)
10814

    
10815
      nic_bridge = nic_dict.get("bridge", None)
10816
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10817
      if nic_bridge and nic_link:
10818
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10819
                                   " at the same time", errors.ECODE_INVAL)
10820
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10821
        nic_dict["bridge"] = None
10822
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10823
        nic_dict[constants.INIC_LINK] = None
10824

    
10825
      if nic_op == constants.DDM_ADD:
10826
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10827
        if nic_mac is None:
10828
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10829

    
10830
      if constants.INIC_MAC in nic_dict:
10831
        nic_mac = nic_dict[constants.INIC_MAC]
10832
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10833
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10834

    
10835
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10836
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10837
                                     " modifying an existing nic",
10838
                                     errors.ECODE_INVAL)
10839

    
10840
    if nic_addremove > 1:
10841
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10842
                                 " supported at a time", errors.ECODE_INVAL)
10843

    
10844
  def ExpandNames(self):
10845
    self._ExpandAndLockInstance()
10846
    self.needed_locks[locking.LEVEL_NODE] = []
10847
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10848

    
10849
  def DeclareLocks(self, level):
10850
    if level == locking.LEVEL_NODE:
10851
      self._LockInstancesNodes()
10852
      if self.op.disk_template and self.op.remote_node:
10853
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10854
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10855

    
10856
  def BuildHooksEnv(self):
10857
    """Build hooks env.
10858

10859
    This runs on the master, primary and secondaries.
10860

10861
    """
10862
    args = dict()
10863
    if constants.BE_MEMORY in self.be_new:
10864
      args["memory"] = self.be_new[constants.BE_MEMORY]
10865
    if constants.BE_VCPUS in self.be_new:
10866
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10867
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10868
    # information at all.
10869
    if self.op.nics:
10870
      args["nics"] = []
10871
      nic_override = dict(self.op.nics)
10872
      for idx, nic in enumerate(self.instance.nics):
10873
        if idx in nic_override:
10874
          this_nic_override = nic_override[idx]
10875
        else:
10876
          this_nic_override = {}
10877
        if constants.INIC_IP in this_nic_override:
10878
          ip = this_nic_override[constants.INIC_IP]
10879
        else:
10880
          ip = nic.ip
10881
        if constants.INIC_MAC in this_nic_override:
10882
          mac = this_nic_override[constants.INIC_MAC]
10883
        else:
10884
          mac = nic.mac
10885
        if idx in self.nic_pnew:
10886
          nicparams = self.nic_pnew[idx]
10887
        else:
10888
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10889
        mode = nicparams[constants.NIC_MODE]
10890
        link = nicparams[constants.NIC_LINK]
10891
        args["nics"].append((ip, mac, mode, link))
10892
      if constants.DDM_ADD in nic_override:
10893
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10894
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10895
        nicparams = self.nic_pnew[constants.DDM_ADD]
10896
        mode = nicparams[constants.NIC_MODE]
10897
        link = nicparams[constants.NIC_LINK]
10898
        args["nics"].append((ip, mac, mode, link))
10899
      elif constants.DDM_REMOVE in nic_override:
10900
        del args["nics"][-1]
10901

    
10902
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10903
    if self.op.disk_template:
10904
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10905

    
10906
    return env
10907

    
10908
  def BuildHooksNodes(self):
10909
    """Build hooks nodes.
10910

10911
    """
10912
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10913
    return (nl, nl)
10914

    
10915
  def CheckPrereq(self):
10916
    """Check prerequisites.
10917

10918
    This only checks the instance list against the existing names.
10919

10920
    """
10921
    # checking the new params on the primary/secondary nodes
10922

    
10923
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10924
    cluster = self.cluster = self.cfg.GetClusterInfo()
10925
    assert self.instance is not None, \
10926
      "Cannot retrieve locked instance %s" % self.op.instance_name
10927
    pnode = instance.primary_node
10928
    nodelist = list(instance.all_nodes)
10929

    
10930
    # OS change
10931
    if self.op.os_name and not self.op.force:
10932
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10933
                      self.op.force_variant)
10934
      instance_os = self.op.os_name
10935
    else:
10936
      instance_os = instance.os
10937

    
10938
    if self.op.disk_template:
10939
      if instance.disk_template == self.op.disk_template:
10940
        raise errors.OpPrereqError("Instance already has disk template %s" %
10941
                                   instance.disk_template, errors.ECODE_INVAL)
10942

    
10943
      if (instance.disk_template,
10944
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10945
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10946
                                   " %s to %s" % (instance.disk_template,
10947
                                                  self.op.disk_template),
10948
                                   errors.ECODE_INVAL)
10949
      _CheckInstanceDown(self, instance, "cannot change disk template")
10950
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10951
        if self.op.remote_node == pnode:
10952
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10953
                                     " as the primary node of the instance" %
10954
                                     self.op.remote_node, errors.ECODE_STATE)
10955
        _CheckNodeOnline(self, self.op.remote_node)
10956
        _CheckNodeNotDrained(self, self.op.remote_node)
10957
        # FIXME: here we assume that the old instance type is DT_PLAIN
10958
        assert instance.disk_template == constants.DT_PLAIN
10959
        disks = [{constants.IDISK_SIZE: d.size,
10960
                  constants.IDISK_VG: d.logical_id[0]}
10961
                 for d in instance.disks]
10962
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10963
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10964

    
10965
    # hvparams processing
10966
    if self.op.hvparams:
10967
      hv_type = instance.hypervisor
10968
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10969
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10970
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10971

    
10972
      # local check
10973
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10974
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10975
      self.hv_proposed = self.hv_new = hv_new # the new actual values
10976
      self.hv_inst = i_hvdict # the new dict (without defaults)
10977
    else:
10978
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
10979
                                              instance.hvparams)
10980
      self.hv_new = self.hv_inst = {}
10981

    
10982
    # beparams processing
10983
    if self.op.beparams:
10984
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10985
                                   use_none=True)
10986
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10987
      be_new = cluster.SimpleFillBE(i_bedict)
10988
      self.be_proposed = self.be_new = be_new # the new actual values
10989
      self.be_inst = i_bedict # the new dict (without defaults)
10990
    else:
10991
      self.be_new = self.be_inst = {}
10992
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
10993
    be_old = cluster.FillBE(instance)
10994

    
10995
    # CPU param validation -- checking every time a paramtere is
10996
    # changed to cover all cases where either CPU mask or vcpus have
10997
    # changed
10998
    if (constants.BE_VCPUS in self.be_proposed and
10999
        constants.HV_CPU_MASK in self.hv_proposed):
11000
      cpu_list = \
11001
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11002
      # Verify mask is consistent with number of vCPUs. Can skip this
11003
      # test if only 1 entry in the CPU mask, which means same mask
11004
      # is applied to all vCPUs.
11005
      if (len(cpu_list) > 1 and
11006
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11007
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11008
                                   " CPU mask [%s]" %
11009
                                   (self.be_proposed[constants.BE_VCPUS],
11010
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11011
                                   errors.ECODE_INVAL)
11012

    
11013
      # Only perform this test if a new CPU mask is given
11014
      if constants.HV_CPU_MASK in self.hv_new:
11015
        # Calculate the largest CPU number requested
11016
        max_requested_cpu = max(map(max, cpu_list))
11017
        # Check that all of the instance's nodes have enough physical CPUs to
11018
        # satisfy the requested CPU mask
11019
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11020
                                max_requested_cpu + 1, instance.hypervisor)
11021

    
11022
    # osparams processing
11023
    if self.op.osparams:
11024
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11025
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11026
      self.os_inst = i_osdict # the new dict (without defaults)
11027
    else:
11028
      self.os_inst = {}
11029

    
11030
    self.warn = []
11031

    
11032
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11033
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11034
      mem_check_list = [pnode]
11035
      if be_new[constants.BE_AUTO_BALANCE]:
11036
        # either we changed auto_balance to yes or it was from before
11037
        mem_check_list.extend(instance.secondary_nodes)
11038
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11039
                                                  instance.hypervisor)
11040
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11041
                                         instance.hypervisor)
11042
      pninfo = nodeinfo[pnode]
11043
      msg = pninfo.fail_msg
11044
      if msg:
11045
        # Assume the primary node is unreachable and go ahead
11046
        self.warn.append("Can't get info from primary node %s: %s" %
11047
                         (pnode, msg))
11048
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
11049
        self.warn.append("Node data from primary node %s doesn't contain"
11050
                         " free memory information" % pnode)
11051
      elif instance_info.fail_msg:
11052
        self.warn.append("Can't get instance runtime information: %s" %
11053
                        instance_info.fail_msg)
11054
      else:
11055
        if instance_info.payload:
11056
          current_mem = int(instance_info.payload["memory"])
11057
        else:
11058
          # Assume instance not running
11059
          # (there is a slight race condition here, but it's not very probable,
11060
          # and we have no other way to check)
11061
          current_mem = 0
11062
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11063
                    pninfo.payload["memory_free"])
11064
        if miss_mem > 0:
11065
          raise errors.OpPrereqError("This change will prevent the instance"
11066
                                     " from starting, due to %d MB of memory"
11067
                                     " missing on its primary node" % miss_mem,
11068
                                     errors.ECODE_NORES)
11069

    
11070
      if be_new[constants.BE_AUTO_BALANCE]:
11071
        for node, nres in nodeinfo.items():
11072
          if node not in instance.secondary_nodes:
11073
            continue
11074
          nres.Raise("Can't get info from secondary node %s" % node,
11075
                     prereq=True, ecode=errors.ECODE_STATE)
11076
          if not isinstance(nres.payload.get("memory_free", None), int):
11077
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11078
                                       " memory information" % node,
11079
                                       errors.ECODE_STATE)
11080
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11081
            raise errors.OpPrereqError("This change will prevent the instance"
11082
                                       " from failover to its secondary node"
11083
                                       " %s, due to not enough memory" % node,
11084
                                       errors.ECODE_STATE)
11085

    
11086
    # NIC processing
11087
    self.nic_pnew = {}
11088
    self.nic_pinst = {}
11089
    for nic_op, nic_dict in self.op.nics:
11090
      if nic_op == constants.DDM_REMOVE:
11091
        if not instance.nics:
11092
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11093
                                     errors.ECODE_INVAL)
11094
        continue
11095
      if nic_op != constants.DDM_ADD:
11096
        # an existing nic
11097
        if not instance.nics:
11098
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11099
                                     " no NICs" % nic_op,
11100
                                     errors.ECODE_INVAL)
11101
        if nic_op < 0 or nic_op >= len(instance.nics):
11102
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11103
                                     " are 0 to %d" %
11104
                                     (nic_op, len(instance.nics) - 1),
11105
                                     errors.ECODE_INVAL)
11106
        old_nic_params = instance.nics[nic_op].nicparams
11107
        old_nic_ip = instance.nics[nic_op].ip
11108
      else:
11109
        old_nic_params = {}
11110
        old_nic_ip = None
11111

    
11112
      update_params_dict = dict([(key, nic_dict[key])
11113
                                 for key in constants.NICS_PARAMETERS
11114
                                 if key in nic_dict])
11115

    
11116
      if "bridge" in nic_dict:
11117
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11118

    
11119
      new_nic_params = _GetUpdatedParams(old_nic_params,
11120
                                         update_params_dict)
11121
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11122
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11123
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11124
      self.nic_pinst[nic_op] = new_nic_params
11125
      self.nic_pnew[nic_op] = new_filled_nic_params
11126
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11127

    
11128
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11129
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11130
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11131
        if msg:
11132
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11133
          if self.op.force:
11134
            self.warn.append(msg)
11135
          else:
11136
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11137
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11138
        if constants.INIC_IP in nic_dict:
11139
          nic_ip = nic_dict[constants.INIC_IP]
11140
        else:
11141
          nic_ip = old_nic_ip
11142
        if nic_ip is None:
11143
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11144
                                     " on a routed nic", errors.ECODE_INVAL)
11145
      if constants.INIC_MAC in nic_dict:
11146
        nic_mac = nic_dict[constants.INIC_MAC]
11147
        if nic_mac is None:
11148
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11149
                                     errors.ECODE_INVAL)
11150
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11151
          # otherwise generate the mac
11152
          nic_dict[constants.INIC_MAC] = \
11153
            self.cfg.GenerateMAC(self.proc.GetECId())
11154
        else:
11155
          # or validate/reserve the current one
11156
          try:
11157
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11158
          except errors.ReservationError:
11159
            raise errors.OpPrereqError("MAC address %s already in use"
11160
                                       " in cluster" % nic_mac,
11161
                                       errors.ECODE_NOTUNIQUE)
11162

    
11163
    # DISK processing
11164
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11165
      raise errors.OpPrereqError("Disk operations not supported for"
11166
                                 " diskless instances",
11167
                                 errors.ECODE_INVAL)
11168
    for disk_op, _ in self.op.disks:
11169
      if disk_op == constants.DDM_REMOVE:
11170
        if len(instance.disks) == 1:
11171
          raise errors.OpPrereqError("Cannot remove the last disk of"
11172
                                     " an instance", errors.ECODE_INVAL)
11173
        _CheckInstanceDown(self, instance, "cannot remove disks")
11174

    
11175
      if (disk_op == constants.DDM_ADD and
11176
          len(instance.disks) >= constants.MAX_DISKS):
11177
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11178
                                   " add more" % constants.MAX_DISKS,
11179
                                   errors.ECODE_STATE)
11180
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11181
        # an existing disk
11182
        if disk_op < 0 or disk_op >= len(instance.disks):
11183
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11184
                                     " are 0 to %d" %
11185
                                     (disk_op, len(instance.disks)),
11186
                                     errors.ECODE_INVAL)
11187

    
11188
    return
11189

    
11190
  def _ConvertPlainToDrbd(self, feedback_fn):
11191
    """Converts an instance from plain to drbd.
11192

11193
    """
11194
    feedback_fn("Converting template to drbd")
11195
    instance = self.instance
11196
    pnode = instance.primary_node
11197
    snode = self.op.remote_node
11198

    
11199
    # create a fake disk info for _GenerateDiskTemplate
11200
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11201
                  constants.IDISK_VG: d.logical_id[0]}
11202
                 for d in instance.disks]
11203
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11204
                                      instance.name, pnode, [snode],
11205
                                      disk_info, None, None, 0, feedback_fn)
11206
    info = _GetInstanceInfoText(instance)
11207
    feedback_fn("Creating aditional volumes...")
11208
    # first, create the missing data and meta devices
11209
    for disk in new_disks:
11210
      # unfortunately this is... not too nice
11211
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11212
                            info, True)
11213
      for child in disk.children:
11214
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11215
    # at this stage, all new LVs have been created, we can rename the
11216
    # old ones
11217
    feedback_fn("Renaming original volumes...")
11218
    rename_list = [(o, n.children[0].logical_id)
11219
                   for (o, n) in zip(instance.disks, new_disks)]
11220
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11221
    result.Raise("Failed to rename original LVs")
11222

    
11223
    feedback_fn("Initializing DRBD devices...")
11224
    # all child devices are in place, we can now create the DRBD devices
11225
    for disk in new_disks:
11226
      for node in [pnode, snode]:
11227
        f_create = node == pnode
11228
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11229

    
11230
    # at this point, the instance has been modified
11231
    instance.disk_template = constants.DT_DRBD8
11232
    instance.disks = new_disks
11233
    self.cfg.Update(instance, feedback_fn)
11234

    
11235
    # disks are created, waiting for sync
11236
    disk_abort = not _WaitForSync(self, instance,
11237
                                  oneshot=not self.op.wait_for_sync)
11238
    if disk_abort:
11239
      raise errors.OpExecError("There are some degraded disks for"
11240
                               " this instance, please cleanup manually")
11241

    
11242
  def _ConvertDrbdToPlain(self, feedback_fn):
11243
    """Converts an instance from drbd to plain.
11244

11245
    """
11246
    instance = self.instance
11247
    assert len(instance.secondary_nodes) == 1
11248
    pnode = instance.primary_node
11249
    snode = instance.secondary_nodes[0]
11250
    feedback_fn("Converting template to plain")
11251

    
11252
    old_disks = instance.disks
11253
    new_disks = [d.children[0] for d in old_disks]
11254

    
11255
    # copy over size and mode
11256
    for parent, child in zip(old_disks, new_disks):
11257
      child.size = parent.size
11258
      child.mode = parent.mode
11259

    
11260
    # update instance structure
11261
    instance.disks = new_disks
11262
    instance.disk_template = constants.DT_PLAIN
11263
    self.cfg.Update(instance, feedback_fn)
11264

    
11265
    feedback_fn("Removing volumes on the secondary node...")
11266
    for disk in old_disks:
11267
      self.cfg.SetDiskID(disk, snode)
11268
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11269
      if msg:
11270
        self.LogWarning("Could not remove block device %s on node %s,"
11271
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11272

    
11273
    feedback_fn("Removing unneeded volumes on the primary node...")
11274
    for idx, disk in enumerate(old_disks):
11275
      meta = disk.children[1]
11276
      self.cfg.SetDiskID(meta, pnode)
11277
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11278
      if msg:
11279
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11280
                        " continuing anyway: %s", idx, pnode, msg)
11281

    
11282
  def Exec(self, feedback_fn):
11283
    """Modifies an instance.
11284

11285
    All parameters take effect only at the next restart of the instance.
11286

11287
    """
11288
    # Process here the warnings from CheckPrereq, as we don't have a
11289
    # feedback_fn there.
11290
    for warn in self.warn:
11291
      feedback_fn("WARNING: %s" % warn)
11292

    
11293
    result = []
11294
    instance = self.instance
11295
    # disk changes
11296
    for disk_op, disk_dict in self.op.disks:
11297
      if disk_op == constants.DDM_REMOVE:
11298
        # remove the last disk
11299
        device = instance.disks.pop()
11300
        device_idx = len(instance.disks)
11301
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11302
          self.cfg.SetDiskID(disk, node)
11303
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11304
          if msg:
11305
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11306
                            " continuing anyway", device_idx, node, msg)
11307
        result.append(("disk/%d" % device_idx, "remove"))
11308
      elif disk_op == constants.DDM_ADD:
11309
        # add a new disk
11310
        if instance.disk_template in (constants.DT_FILE,
11311
                                        constants.DT_SHARED_FILE):
11312
          file_driver, file_path = instance.disks[0].logical_id
11313
          file_path = os.path.dirname(file_path)
11314
        else:
11315
          file_driver = file_path = None
11316
        disk_idx_base = len(instance.disks)
11317
        new_disk = _GenerateDiskTemplate(self,
11318
                                         instance.disk_template,
11319
                                         instance.name, instance.primary_node,
11320
                                         instance.secondary_nodes,
11321
                                         [disk_dict],
11322
                                         file_path,
11323
                                         file_driver,
11324
                                         disk_idx_base, feedback_fn)[0]
11325
        instance.disks.append(new_disk)
11326
        info = _GetInstanceInfoText(instance)
11327

    
11328
        logging.info("Creating volume %s for instance %s",
11329
                     new_disk.iv_name, instance.name)
11330
        # Note: this needs to be kept in sync with _CreateDisks
11331
        #HARDCODE
11332
        for node in instance.all_nodes:
11333
          f_create = node == instance.primary_node
11334
          try:
11335
            _CreateBlockDev(self, node, instance, new_disk,
11336
                            f_create, info, f_create)
11337
          except errors.OpExecError, err:
11338
            self.LogWarning("Failed to create volume %s (%s) on"
11339
                            " node %s: %s",
11340
                            new_disk.iv_name, new_disk, node, err)
11341
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11342
                       (new_disk.size, new_disk.mode)))
11343
      else:
11344
        # change a given disk
11345
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11346
        result.append(("disk.mode/%d" % disk_op,
11347
                       disk_dict[constants.IDISK_MODE]))
11348

    
11349
    if self.op.disk_template:
11350
      r_shut = _ShutdownInstanceDisks(self, instance)
11351
      if not r_shut:
11352
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11353
                                 " proceed with disk template conversion")
11354
      mode = (instance.disk_template, self.op.disk_template)
11355
      try:
11356
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11357
      except:
11358
        self.cfg.ReleaseDRBDMinors(instance.name)
11359
        raise
11360
      result.append(("disk_template", self.op.disk_template))
11361

    
11362
    # NIC changes
11363
    for nic_op, nic_dict in self.op.nics:
11364
      if nic_op == constants.DDM_REMOVE:
11365
        # remove the last nic
11366
        del instance.nics[-1]
11367
        result.append(("nic.%d" % len(instance.nics), "remove"))
11368
      elif nic_op == constants.DDM_ADD:
11369
        # mac and bridge should be set, by now
11370
        mac = nic_dict[constants.INIC_MAC]
11371
        ip = nic_dict.get(constants.INIC_IP, None)
11372
        nicparams = self.nic_pinst[constants.DDM_ADD]
11373
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11374
        instance.nics.append(new_nic)
11375
        result.append(("nic.%d" % (len(instance.nics) - 1),
11376
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11377
                       (new_nic.mac, new_nic.ip,
11378
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11379
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11380
                       )))
11381
      else:
11382
        for key in (constants.INIC_MAC, constants.INIC_IP):
11383
          if key in nic_dict:
11384
            setattr(instance.nics[nic_op], key, nic_dict[key])
11385
        if nic_op in self.nic_pinst:
11386
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11387
        for key, val in nic_dict.iteritems():
11388
          result.append(("nic.%s/%d" % (key, nic_op), val))
11389

    
11390
    # hvparams changes
11391
    if self.op.hvparams:
11392
      instance.hvparams = self.hv_inst
11393
      for key, val in self.op.hvparams.iteritems():
11394
        result.append(("hv/%s" % key, val))
11395

    
11396
    # beparams changes
11397
    if self.op.beparams:
11398
      instance.beparams = self.be_inst
11399
      for key, val in self.op.beparams.iteritems():
11400
        result.append(("be/%s" % key, val))
11401

    
11402
    # OS change
11403
    if self.op.os_name:
11404
      instance.os = self.op.os_name
11405

    
11406
    # osparams changes
11407
    if self.op.osparams:
11408
      instance.osparams = self.os_inst
11409
      for key, val in self.op.osparams.iteritems():
11410
        result.append(("os/%s" % key, val))
11411

    
11412
    self.cfg.Update(instance, feedback_fn)
11413

    
11414
    return result
11415

    
11416
  _DISK_CONVERSIONS = {
11417
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11418
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11419
    }
11420

    
11421

    
11422
class LUInstanceChangeGroup(LogicalUnit):
11423
  HPATH = "instance-change-group"
11424
  HTYPE = constants.HTYPE_INSTANCE
11425
  REQ_BGL = False
11426

    
11427
  def ExpandNames(self):
11428
    self.share_locks = _ShareAll()
11429
    self.needed_locks = {
11430
      locking.LEVEL_NODEGROUP: [],
11431
      locking.LEVEL_NODE: [],
11432
      }
11433

    
11434
    self._ExpandAndLockInstance()
11435

    
11436
    if self.op.target_groups:
11437
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11438
                                  self.op.target_groups)
11439
    else:
11440
      self.req_target_uuids = None
11441

    
11442
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11443

    
11444
  def DeclareLocks(self, level):
11445
    if level == locking.LEVEL_NODEGROUP:
11446
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11447

    
11448
      if self.req_target_uuids:
11449
        lock_groups = set(self.req_target_uuids)
11450

    
11451
        # Lock all groups used by instance optimistically; this requires going
11452
        # via the node before it's locked, requiring verification later on
11453
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11454
        lock_groups.update(instance_groups)
11455
      else:
11456
        # No target groups, need to lock all of them
11457
        lock_groups = locking.ALL_SET
11458

    
11459
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11460

    
11461
    elif level == locking.LEVEL_NODE:
11462
      if self.req_target_uuids:
11463
        # Lock all nodes used by instances
11464
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11465
        self._LockInstancesNodes()
11466

    
11467
        # Lock all nodes in all potential target groups
11468
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11469
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11470
        member_nodes = [node_name
11471
                        for group in lock_groups
11472
                        for node_name in self.cfg.GetNodeGroup(group).members]
11473
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11474
      else:
11475
        # Lock all nodes as all groups are potential targets
11476
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11477

    
11478
  def CheckPrereq(self):
11479
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11480
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11481
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11482

    
11483
    assert (self.req_target_uuids is None or
11484
            owned_groups.issuperset(self.req_target_uuids))
11485
    assert owned_instances == set([self.op.instance_name])
11486

    
11487
    # Get instance information
11488
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11489

    
11490
    # Check if node groups for locked instance are still correct
11491
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11492
      ("Instance %s's nodes changed while we kept the lock" %
11493
       self.op.instance_name)
11494

    
11495
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11496
                                           owned_groups)
11497

    
11498
    if self.req_target_uuids:
11499
      # User requested specific target groups
11500
      self.target_uuids = self.req_target_uuids
11501
    else:
11502
      # All groups except those used by the instance are potential targets
11503
      self.target_uuids = owned_groups - inst_groups
11504

    
11505
    conflicting_groups = self.target_uuids & inst_groups
11506
    if conflicting_groups:
11507
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11508
                                 " used by the instance '%s'" %
11509
                                 (utils.CommaJoin(conflicting_groups),
11510
                                  self.op.instance_name),
11511
                                 errors.ECODE_INVAL)
11512

    
11513
    if not self.target_uuids:
11514
      raise errors.OpPrereqError("There are no possible target groups",
11515
                                 errors.ECODE_INVAL)
11516

    
11517
  def BuildHooksEnv(self):
11518
    """Build hooks env.
11519

11520
    """
11521
    assert self.target_uuids
11522

    
11523
    env = {
11524
      "TARGET_GROUPS": " ".join(self.target_uuids),
11525
      }
11526

    
11527
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11528

    
11529
    return env
11530

    
11531
  def BuildHooksNodes(self):
11532
    """Build hooks nodes.
11533

11534
    """
11535
    mn = self.cfg.GetMasterNode()
11536
    return ([mn], [mn])
11537

    
11538
  def Exec(self, feedback_fn):
11539
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11540

    
11541
    assert instances == [self.op.instance_name], "Instance not locked"
11542

    
11543
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11544
                     instances=instances, target_groups=list(self.target_uuids))
11545

    
11546
    ial.Run(self.op.iallocator)
11547

    
11548
    if not ial.success:
11549
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11550
                                 " instance '%s' using iallocator '%s': %s" %
11551
                                 (self.op.instance_name, self.op.iallocator,
11552
                                  ial.info),
11553
                                 errors.ECODE_NORES)
11554

    
11555
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11556

    
11557
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11558
                 " instance '%s'", len(jobs), self.op.instance_name)
11559

    
11560
    return ResultWithJobs(jobs)
11561

    
11562

    
11563
class LUBackupQuery(NoHooksLU):
11564
  """Query the exports list
11565

11566
  """
11567
  REQ_BGL = False
11568

    
11569
  def ExpandNames(self):
11570
    self.needed_locks = {}
11571
    self.share_locks[locking.LEVEL_NODE] = 1
11572
    if not self.op.nodes:
11573
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11574
    else:
11575
      self.needed_locks[locking.LEVEL_NODE] = \
11576
        _GetWantedNodes(self, self.op.nodes)
11577

    
11578
  def Exec(self, feedback_fn):
11579
    """Compute the list of all the exported system images.
11580

11581
    @rtype: dict
11582
    @return: a dictionary with the structure node->(export-list)
11583
        where export-list is a list of the instances exported on
11584
        that node.
11585

11586
    """
11587
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11588
    rpcresult = self.rpc.call_export_list(self.nodes)
11589
    result = {}
11590
    for node in rpcresult:
11591
      if rpcresult[node].fail_msg:
11592
        result[node] = False
11593
      else:
11594
        result[node] = rpcresult[node].payload
11595

    
11596
    return result
11597

    
11598

    
11599
class LUBackupPrepare(NoHooksLU):
11600
  """Prepares an instance for an export and returns useful information.
11601

11602
  """
11603
  REQ_BGL = False
11604

    
11605
  def ExpandNames(self):
11606
    self._ExpandAndLockInstance()
11607

    
11608
  def CheckPrereq(self):
11609
    """Check prerequisites.
11610

11611
    """
11612
    instance_name = self.op.instance_name
11613

    
11614
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11615
    assert self.instance is not None, \
11616
          "Cannot retrieve locked instance %s" % self.op.instance_name
11617
    _CheckNodeOnline(self, self.instance.primary_node)
11618

    
11619
    self._cds = _GetClusterDomainSecret()
11620

    
11621
  def Exec(self, feedback_fn):
11622
    """Prepares an instance for an export.
11623

11624
    """
11625
    instance = self.instance
11626

    
11627
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11628
      salt = utils.GenerateSecret(8)
11629

    
11630
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11631
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11632
                                              constants.RIE_CERT_VALIDITY)
11633
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11634

    
11635
      (name, cert_pem) = result.payload
11636

    
11637
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11638
                                             cert_pem)
11639

    
11640
      return {
11641
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11642
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11643
                          salt),
11644
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11645
        }
11646

    
11647
    return None
11648

    
11649

    
11650
class LUBackupExport(LogicalUnit):
11651
  """Export an instance to an image in the cluster.
11652

11653
  """
11654
  HPATH = "instance-export"
11655
  HTYPE = constants.HTYPE_INSTANCE
11656
  REQ_BGL = False
11657

    
11658
  def CheckArguments(self):
11659
    """Check the arguments.
11660

11661
    """
11662
    self.x509_key_name = self.op.x509_key_name
11663
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11664

    
11665
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11666
      if not self.x509_key_name:
11667
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11668
                                   errors.ECODE_INVAL)
11669

    
11670
      if not self.dest_x509_ca_pem:
11671
        raise errors.OpPrereqError("Missing destination X509 CA",
11672
                                   errors.ECODE_INVAL)
11673

    
11674
  def ExpandNames(self):
11675
    self._ExpandAndLockInstance()
11676

    
11677
    # Lock all nodes for local exports
11678
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11679
      # FIXME: lock only instance primary and destination node
11680
      #
11681
      # Sad but true, for now we have do lock all nodes, as we don't know where
11682
      # the previous export might be, and in this LU we search for it and
11683
      # remove it from its current node. In the future we could fix this by:
11684
      #  - making a tasklet to search (share-lock all), then create the
11685
      #    new one, then one to remove, after
11686
      #  - removing the removal operation altogether
11687
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11688

    
11689
  def DeclareLocks(self, level):
11690
    """Last minute lock declaration."""
11691
    # All nodes are locked anyway, so nothing to do here.
11692

    
11693
  def BuildHooksEnv(self):
11694
    """Build hooks env.
11695

11696
    This will run on the master, primary node and target node.
11697

11698
    """
11699
    env = {
11700
      "EXPORT_MODE": self.op.mode,
11701
      "EXPORT_NODE": self.op.target_node,
11702
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11703
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11704
      # TODO: Generic function for boolean env variables
11705
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11706
      }
11707

    
11708
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11709

    
11710
    return env
11711

    
11712
  def BuildHooksNodes(self):
11713
    """Build hooks nodes.
11714

11715
    """
11716
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11717

    
11718
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11719
      nl.append(self.op.target_node)
11720

    
11721
    return (nl, nl)
11722

    
11723
  def CheckPrereq(self):
11724
    """Check prerequisites.
11725

11726
    This checks that the instance and node names are valid.
11727

11728
    """
11729
    instance_name = self.op.instance_name
11730

    
11731
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11732
    assert self.instance is not None, \
11733
          "Cannot retrieve locked instance %s" % self.op.instance_name
11734
    _CheckNodeOnline(self, self.instance.primary_node)
11735

    
11736
    if (self.op.remove_instance and self.instance.admin_up and
11737
        not self.op.shutdown):
11738
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11739
                                 " down before")
11740

    
11741
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11742
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11743
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11744
      assert self.dst_node is not None
11745

    
11746
      _CheckNodeOnline(self, self.dst_node.name)
11747
      _CheckNodeNotDrained(self, self.dst_node.name)
11748

    
11749
      self._cds = None
11750
      self.dest_disk_info = None
11751
      self.dest_x509_ca = None
11752

    
11753
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11754
      self.dst_node = None
11755

    
11756
      if len(self.op.target_node) != len(self.instance.disks):
11757
        raise errors.OpPrereqError(("Received destination information for %s"
11758
                                    " disks, but instance %s has %s disks") %
11759
                                   (len(self.op.target_node), instance_name,
11760
                                    len(self.instance.disks)),
11761
                                   errors.ECODE_INVAL)
11762

    
11763
      cds = _GetClusterDomainSecret()
11764

    
11765
      # Check X509 key name
11766
      try:
11767
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11768
      except (TypeError, ValueError), err:
11769
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11770

    
11771
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11772
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11773
                                   errors.ECODE_INVAL)
11774

    
11775
      # Load and verify CA
11776
      try:
11777
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11778
      except OpenSSL.crypto.Error, err:
11779
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11780
                                   (err, ), errors.ECODE_INVAL)
11781

    
11782
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11783
      if errcode is not None:
11784
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11785
                                   (msg, ), errors.ECODE_INVAL)
11786

    
11787
      self.dest_x509_ca = cert
11788

    
11789
      # Verify target information
11790
      disk_info = []
11791
      for idx, disk_data in enumerate(self.op.target_node):
11792
        try:
11793
          (host, port, magic) = \
11794
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11795
        except errors.GenericError, err:
11796
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11797
                                     (idx, err), errors.ECODE_INVAL)
11798

    
11799
        disk_info.append((host, port, magic))
11800

    
11801
      assert len(disk_info) == len(self.op.target_node)
11802
      self.dest_disk_info = disk_info
11803

    
11804
    else:
11805
      raise errors.ProgrammerError("Unhandled export mode %r" %
11806
                                   self.op.mode)
11807

    
11808
    # instance disk type verification
11809
    # TODO: Implement export support for file-based disks
11810
    for disk in self.instance.disks:
11811
      if disk.dev_type == constants.LD_FILE:
11812
        raise errors.OpPrereqError("Export not supported for instances with"
11813
                                   " file-based disks", errors.ECODE_INVAL)
11814

    
11815
  def _CleanupExports(self, feedback_fn):
11816
    """Removes exports of current instance from all other nodes.
11817

11818
    If an instance in a cluster with nodes A..D was exported to node C, its
11819
    exports will be removed from the nodes A, B and D.
11820

11821
    """
11822
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11823

    
11824
    nodelist = self.cfg.GetNodeList()
11825
    nodelist.remove(self.dst_node.name)
11826

    
11827
    # on one-node clusters nodelist will be empty after the removal
11828
    # if we proceed the backup would be removed because OpBackupQuery
11829
    # substitutes an empty list with the full cluster node list.
11830
    iname = self.instance.name
11831
    if nodelist:
11832
      feedback_fn("Removing old exports for instance %s" % iname)
11833
      exportlist = self.rpc.call_export_list(nodelist)
11834
      for node in exportlist:
11835
        if exportlist[node].fail_msg:
11836
          continue
11837
        if iname in exportlist[node].payload:
11838
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11839
          if msg:
11840
            self.LogWarning("Could not remove older export for instance %s"
11841
                            " on node %s: %s", iname, node, msg)
11842

    
11843
  def Exec(self, feedback_fn):
11844
    """Export an instance to an image in the cluster.
11845

11846
    """
11847
    assert self.op.mode in constants.EXPORT_MODES
11848

    
11849
    instance = self.instance
11850
    src_node = instance.primary_node
11851

    
11852
    if self.op.shutdown:
11853
      # shutdown the instance, but not the disks
11854
      feedback_fn("Shutting down instance %s" % instance.name)
11855
      result = self.rpc.call_instance_shutdown(src_node, instance,
11856
                                               self.op.shutdown_timeout)
11857
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11858
      result.Raise("Could not shutdown instance %s on"
11859
                   " node %s" % (instance.name, src_node))
11860

    
11861
    # set the disks ID correctly since call_instance_start needs the
11862
    # correct drbd minor to create the symlinks
11863
    for disk in instance.disks:
11864
      self.cfg.SetDiskID(disk, src_node)
11865

    
11866
    activate_disks = (not instance.admin_up)
11867

    
11868
    if activate_disks:
11869
      # Activate the instance disks if we'exporting a stopped instance
11870
      feedback_fn("Activating disks for %s" % instance.name)
11871
      _StartInstanceDisks(self, instance, None)
11872

    
11873
    try:
11874
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11875
                                                     instance)
11876

    
11877
      helper.CreateSnapshots()
11878
      try:
11879
        if (self.op.shutdown and instance.admin_up and
11880
            not self.op.remove_instance):
11881
          assert not activate_disks
11882
          feedback_fn("Starting instance %s" % instance.name)
11883
          result = self.rpc.call_instance_start(src_node, instance,
11884
                                                None, None, False)
11885
          msg = result.fail_msg
11886
          if msg:
11887
            feedback_fn("Failed to start instance: %s" % msg)
11888
            _ShutdownInstanceDisks(self, instance)
11889
            raise errors.OpExecError("Could not start instance: %s" % msg)
11890

    
11891
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11892
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11893
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11894
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11895
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11896

    
11897
          (key_name, _, _) = self.x509_key_name
11898

    
11899
          dest_ca_pem = \
11900
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11901
                                            self.dest_x509_ca)
11902

    
11903
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11904
                                                     key_name, dest_ca_pem,
11905
                                                     timeouts)
11906
      finally:
11907
        helper.Cleanup()
11908

    
11909
      # Check for backwards compatibility
11910
      assert len(dresults) == len(instance.disks)
11911
      assert compat.all(isinstance(i, bool) for i in dresults), \
11912
             "Not all results are boolean: %r" % dresults
11913

    
11914
    finally:
11915
      if activate_disks:
11916
        feedback_fn("Deactivating disks for %s" % instance.name)
11917
        _ShutdownInstanceDisks(self, instance)
11918

    
11919
    if not (compat.all(dresults) and fin_resu):
11920
      failures = []
11921
      if not fin_resu:
11922
        failures.append("export finalization")
11923
      if not compat.all(dresults):
11924
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11925
                               if not dsk)
11926
        failures.append("disk export: disk(s) %s" % fdsk)
11927

    
11928
      raise errors.OpExecError("Export failed, errors in %s" %
11929
                               utils.CommaJoin(failures))
11930

    
11931
    # At this point, the export was successful, we can cleanup/finish
11932

    
11933
    # Remove instance if requested
11934
    if self.op.remove_instance:
11935
      feedback_fn("Removing instance %s" % instance.name)
11936
      _RemoveInstance(self, feedback_fn, instance,
11937
                      self.op.ignore_remove_failures)
11938

    
11939
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11940
      self._CleanupExports(feedback_fn)
11941

    
11942
    return fin_resu, dresults
11943

    
11944

    
11945
class LUBackupRemove(NoHooksLU):
11946
  """Remove exports related to the named instance.
11947

11948
  """
11949
  REQ_BGL = False
11950

    
11951
  def ExpandNames(self):
11952
    self.needed_locks = {}
11953
    # We need all nodes to be locked in order for RemoveExport to work, but we
11954
    # don't need to lock the instance itself, as nothing will happen to it (and
11955
    # we can remove exports also for a removed instance)
11956
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11957

    
11958
  def Exec(self, feedback_fn):
11959
    """Remove any export.
11960

11961
    """
11962
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11963
    # If the instance was not found we'll try with the name that was passed in.
11964
    # This will only work if it was an FQDN, though.
11965
    fqdn_warn = False
11966
    if not instance_name:
11967
      fqdn_warn = True
11968
      instance_name = self.op.instance_name
11969

    
11970
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11971
    exportlist = self.rpc.call_export_list(locked_nodes)
11972
    found = False
11973
    for node in exportlist:
11974
      msg = exportlist[node].fail_msg
11975
      if msg:
11976
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11977
        continue
11978
      if instance_name in exportlist[node].payload:
11979
        found = True
11980
        result = self.rpc.call_export_remove(node, instance_name)
11981
        msg = result.fail_msg
11982
        if msg:
11983
          logging.error("Could not remove export for instance %s"
11984
                        " on node %s: %s", instance_name, node, msg)
11985

    
11986
    if fqdn_warn and not found:
11987
      feedback_fn("Export not found. If trying to remove an export belonging"
11988
                  " to a deleted instance please use its Fully Qualified"
11989
                  " Domain Name.")
11990

    
11991

    
11992
class LUGroupAdd(LogicalUnit):
11993
  """Logical unit for creating node groups.
11994

11995
  """
11996
  HPATH = "group-add"
11997
  HTYPE = constants.HTYPE_GROUP
11998
  REQ_BGL = False
11999

    
12000
  def ExpandNames(self):
12001
    # We need the new group's UUID here so that we can create and acquire the
12002
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12003
    # that it should not check whether the UUID exists in the configuration.
12004
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12005
    self.needed_locks = {}
12006
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12007

    
12008
  def CheckPrereq(self):
12009
    """Check prerequisites.
12010

12011
    This checks that the given group name is not an existing node group
12012
    already.
12013

12014
    """
12015
    try:
12016
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12017
    except errors.OpPrereqError:
12018
      pass
12019
    else:
12020
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12021
                                 " node group (UUID: %s)" %
12022
                                 (self.op.group_name, existing_uuid),
12023
                                 errors.ECODE_EXISTS)
12024

    
12025
    if self.op.ndparams:
12026
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12027

    
12028
  def BuildHooksEnv(self):
12029
    """Build hooks env.
12030

12031
    """
12032
    return {
12033
      "GROUP_NAME": self.op.group_name,
12034
      }
12035

    
12036
  def BuildHooksNodes(self):
12037
    """Build hooks nodes.
12038

12039
    """
12040
    mn = self.cfg.GetMasterNode()
12041
    return ([mn], [mn])
12042

    
12043
  def Exec(self, feedback_fn):
12044
    """Add the node group to the cluster.
12045

12046
    """
12047
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12048
                                  uuid=self.group_uuid,
12049
                                  alloc_policy=self.op.alloc_policy,
12050
                                  ndparams=self.op.ndparams)
12051

    
12052
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12053
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12054

    
12055

    
12056
class LUGroupAssignNodes(NoHooksLU):
12057
  """Logical unit for assigning nodes to groups.
12058

12059
  """
12060
  REQ_BGL = False
12061

    
12062
  def ExpandNames(self):
12063
    # These raise errors.OpPrereqError on their own:
12064
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12065
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12066

    
12067
    # We want to lock all the affected nodes and groups. We have readily
12068
    # available the list of nodes, and the *destination* group. To gather the
12069
    # list of "source" groups, we need to fetch node information later on.
12070
    self.needed_locks = {
12071
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12072
      locking.LEVEL_NODE: self.op.nodes,
12073
      }
12074

    
12075
  def DeclareLocks(self, level):
12076
    if level == locking.LEVEL_NODEGROUP:
12077
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12078

    
12079
      # Try to get all affected nodes' groups without having the group or node
12080
      # lock yet. Needs verification later in the code flow.
12081
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12082

    
12083
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12084

    
12085
  def CheckPrereq(self):
12086
    """Check prerequisites.
12087

12088
    """
12089
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12090
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12091
            frozenset(self.op.nodes))
12092

    
12093
    expected_locks = (set([self.group_uuid]) |
12094
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12095
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12096
    if actual_locks != expected_locks:
12097
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12098
                               " current groups are '%s', used to be '%s'" %
12099
                               (utils.CommaJoin(expected_locks),
12100
                                utils.CommaJoin(actual_locks)))
12101

    
12102
    self.node_data = self.cfg.GetAllNodesInfo()
12103
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12104
    instance_data = self.cfg.GetAllInstancesInfo()
12105

    
12106
    if self.group is None:
12107
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12108
                               (self.op.group_name, self.group_uuid))
12109

    
12110
    (new_splits, previous_splits) = \
12111
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12112
                                             for node in self.op.nodes],
12113
                                            self.node_data, instance_data)
12114

    
12115
    if new_splits:
12116
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12117

    
12118
      if not self.op.force:
12119
        raise errors.OpExecError("The following instances get split by this"
12120
                                 " change and --force was not given: %s" %
12121
                                 fmt_new_splits)
12122
      else:
12123
        self.LogWarning("This operation will split the following instances: %s",
12124
                        fmt_new_splits)
12125

    
12126
        if previous_splits:
12127
          self.LogWarning("In addition, these already-split instances continue"
12128
                          " to be split across groups: %s",
12129
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12130

    
12131
  def Exec(self, feedback_fn):
12132
    """Assign nodes to a new group.
12133

12134
    """
12135
    for node in self.op.nodes:
12136
      self.node_data[node].group = self.group_uuid
12137

    
12138
    # FIXME: Depends on side-effects of modifying the result of
12139
    # C{cfg.GetAllNodesInfo}
12140

    
12141
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12142

    
12143
  @staticmethod
12144
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12145
    """Check for split instances after a node assignment.
12146

12147
    This method considers a series of node assignments as an atomic operation,
12148
    and returns information about split instances after applying the set of
12149
    changes.
12150

12151
    In particular, it returns information about newly split instances, and
12152
    instances that were already split, and remain so after the change.
12153

12154
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12155
    considered.
12156

12157
    @type changes: list of (node_name, new_group_uuid) pairs.
12158
    @param changes: list of node assignments to consider.
12159
    @param node_data: a dict with data for all nodes
12160
    @param instance_data: a dict with all instances to consider
12161
    @rtype: a two-tuple
12162
    @return: a list of instances that were previously okay and result split as a
12163
      consequence of this change, and a list of instances that were previously
12164
      split and this change does not fix.
12165

12166
    """
12167
    changed_nodes = dict((node, group) for node, group in changes
12168
                         if node_data[node].group != group)
12169

    
12170
    all_split_instances = set()
12171
    previously_split_instances = set()
12172

    
12173
    def InstanceNodes(instance):
12174
      return [instance.primary_node] + list(instance.secondary_nodes)
12175

    
12176
    for inst in instance_data.values():
12177
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12178
        continue
12179

    
12180
      instance_nodes = InstanceNodes(inst)
12181

    
12182
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12183
        previously_split_instances.add(inst.name)
12184

    
12185
      if len(set(changed_nodes.get(node, node_data[node].group)
12186
                 for node in instance_nodes)) > 1:
12187
        all_split_instances.add(inst.name)
12188

    
12189
    return (list(all_split_instances - previously_split_instances),
12190
            list(previously_split_instances & all_split_instances))
12191

    
12192

    
12193
class _GroupQuery(_QueryBase):
12194
  FIELDS = query.GROUP_FIELDS
12195

    
12196
  def ExpandNames(self, lu):
12197
    lu.needed_locks = {}
12198

    
12199
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12200
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12201

    
12202
    if not self.names:
12203
      self.wanted = [name_to_uuid[name]
12204
                     for name in utils.NiceSort(name_to_uuid.keys())]
12205
    else:
12206
      # Accept names to be either names or UUIDs.
12207
      missing = []
12208
      self.wanted = []
12209
      all_uuid = frozenset(self._all_groups.keys())
12210

    
12211
      for name in self.names:
12212
        if name in all_uuid:
12213
          self.wanted.append(name)
12214
        elif name in name_to_uuid:
12215
          self.wanted.append(name_to_uuid[name])
12216
        else:
12217
          missing.append(name)
12218

    
12219
      if missing:
12220
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12221
                                   utils.CommaJoin(missing),
12222
                                   errors.ECODE_NOENT)
12223

    
12224
  def DeclareLocks(self, lu, level):
12225
    pass
12226

    
12227
  def _GetQueryData(self, lu):
12228
    """Computes the list of node groups and their attributes.
12229

12230
    """
12231
    do_nodes = query.GQ_NODE in self.requested_data
12232
    do_instances = query.GQ_INST in self.requested_data
12233

    
12234
    group_to_nodes = None
12235
    group_to_instances = None
12236

    
12237
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12238
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12239
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12240
    # instance->node. Hence, we will need to process nodes even if we only need
12241
    # instance information.
12242
    if do_nodes or do_instances:
12243
      all_nodes = lu.cfg.GetAllNodesInfo()
12244
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12245
      node_to_group = {}
12246

    
12247
      for node in all_nodes.values():
12248
        if node.group in group_to_nodes:
12249
          group_to_nodes[node.group].append(node.name)
12250
          node_to_group[node.name] = node.group
12251

    
12252
      if do_instances:
12253
        all_instances = lu.cfg.GetAllInstancesInfo()
12254
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12255

    
12256
        for instance in all_instances.values():
12257
          node = instance.primary_node
12258
          if node in node_to_group:
12259
            group_to_instances[node_to_group[node]].append(instance.name)
12260

    
12261
        if not do_nodes:
12262
          # Do not pass on node information if it was not requested.
12263
          group_to_nodes = None
12264

    
12265
    return query.GroupQueryData([self._all_groups[uuid]
12266
                                 for uuid in self.wanted],
12267
                                group_to_nodes, group_to_instances)
12268

    
12269

    
12270
class LUGroupQuery(NoHooksLU):
12271
  """Logical unit for querying node groups.
12272

12273
  """
12274
  REQ_BGL = False
12275

    
12276
  def CheckArguments(self):
12277
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12278
                          self.op.output_fields, False)
12279

    
12280
  def ExpandNames(self):
12281
    self.gq.ExpandNames(self)
12282

    
12283
  def DeclareLocks(self, level):
12284
    self.gq.DeclareLocks(self, level)
12285

    
12286
  def Exec(self, feedback_fn):
12287
    return self.gq.OldStyleQuery(self)
12288

    
12289

    
12290
class LUGroupSetParams(LogicalUnit):
12291
  """Modifies the parameters of a node group.
12292

12293
  """
12294
  HPATH = "group-modify"
12295
  HTYPE = constants.HTYPE_GROUP
12296
  REQ_BGL = False
12297

    
12298
  def CheckArguments(self):
12299
    all_changes = [
12300
      self.op.ndparams,
12301
      self.op.alloc_policy,
12302
      ]
12303

    
12304
    if all_changes.count(None) == len(all_changes):
12305
      raise errors.OpPrereqError("Please pass at least one modification",
12306
                                 errors.ECODE_INVAL)
12307

    
12308
  def ExpandNames(self):
12309
    # This raises errors.OpPrereqError on its own:
12310
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12311

    
12312
    self.needed_locks = {
12313
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12314
      }
12315

    
12316
  def CheckPrereq(self):
12317
    """Check prerequisites.
12318

12319
    """
12320
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12321

    
12322
    if self.group is None:
12323
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12324
                               (self.op.group_name, self.group_uuid))
12325

    
12326
    if self.op.ndparams:
12327
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12328
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12329
      self.new_ndparams = new_ndparams
12330

    
12331
  def BuildHooksEnv(self):
12332
    """Build hooks env.
12333

12334
    """
12335
    return {
12336
      "GROUP_NAME": self.op.group_name,
12337
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12338
      }
12339

    
12340
  def BuildHooksNodes(self):
12341
    """Build hooks nodes.
12342

12343
    """
12344
    mn = self.cfg.GetMasterNode()
12345
    return ([mn], [mn])
12346

    
12347
  def Exec(self, feedback_fn):
12348
    """Modifies the node group.
12349

12350
    """
12351
    result = []
12352

    
12353
    if self.op.ndparams:
12354
      self.group.ndparams = self.new_ndparams
12355
      result.append(("ndparams", str(self.group.ndparams)))
12356

    
12357
    if self.op.alloc_policy:
12358
      self.group.alloc_policy = self.op.alloc_policy
12359

    
12360
    self.cfg.Update(self.group, feedback_fn)
12361
    return result
12362

    
12363

    
12364
class LUGroupRemove(LogicalUnit):
12365
  HPATH = "group-remove"
12366
  HTYPE = constants.HTYPE_GROUP
12367
  REQ_BGL = False
12368

    
12369
  def ExpandNames(self):
12370
    # This will raises errors.OpPrereqError on its own:
12371
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12372
    self.needed_locks = {
12373
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12374
      }
12375

    
12376
  def CheckPrereq(self):
12377
    """Check prerequisites.
12378

12379
    This checks that the given group name exists as a node group, that is
12380
    empty (i.e., contains no nodes), and that is not the last group of the
12381
    cluster.
12382

12383
    """
12384
    # Verify that the group is empty.
12385
    group_nodes = [node.name
12386
                   for node in self.cfg.GetAllNodesInfo().values()
12387
                   if node.group == self.group_uuid]
12388

    
12389
    if group_nodes:
12390
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12391
                                 " nodes: %s" %
12392
                                 (self.op.group_name,
12393
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12394
                                 errors.ECODE_STATE)
12395

    
12396
    # Verify the cluster would not be left group-less.
12397
    if len(self.cfg.GetNodeGroupList()) == 1:
12398
      raise errors.OpPrereqError("Group '%s' is the only group,"
12399
                                 " cannot be removed" %
12400
                                 self.op.group_name,
12401
                                 errors.ECODE_STATE)
12402

    
12403
  def BuildHooksEnv(self):
12404
    """Build hooks env.
12405

12406
    """
12407
    return {
12408
      "GROUP_NAME": self.op.group_name,
12409
      }
12410

    
12411
  def BuildHooksNodes(self):
12412
    """Build hooks nodes.
12413

12414
    """
12415
    mn = self.cfg.GetMasterNode()
12416
    return ([mn], [mn])
12417

    
12418
  def Exec(self, feedback_fn):
12419
    """Remove the node group.
12420

12421
    """
12422
    try:
12423
      self.cfg.RemoveNodeGroup(self.group_uuid)
12424
    except errors.ConfigurationError:
12425
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12426
                               (self.op.group_name, self.group_uuid))
12427

    
12428
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12429

    
12430

    
12431
class LUGroupRename(LogicalUnit):
12432
  HPATH = "group-rename"
12433
  HTYPE = constants.HTYPE_GROUP
12434
  REQ_BGL = False
12435

    
12436
  def ExpandNames(self):
12437
    # This raises errors.OpPrereqError on its own:
12438
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12439

    
12440
    self.needed_locks = {
12441
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12442
      }
12443

    
12444
  def CheckPrereq(self):
12445
    """Check prerequisites.
12446

12447
    Ensures requested new name is not yet used.
12448

12449
    """
12450
    try:
12451
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12452
    except errors.OpPrereqError:
12453
      pass
12454
    else:
12455
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12456
                                 " node group (UUID: %s)" %
12457
                                 (self.op.new_name, new_name_uuid),
12458
                                 errors.ECODE_EXISTS)
12459

    
12460
  def BuildHooksEnv(self):
12461
    """Build hooks env.
12462

12463
    """
12464
    return {
12465
      "OLD_NAME": self.op.group_name,
12466
      "NEW_NAME": self.op.new_name,
12467
      }
12468

    
12469
  def BuildHooksNodes(self):
12470
    """Build hooks nodes.
12471

12472
    """
12473
    mn = self.cfg.GetMasterNode()
12474

    
12475
    all_nodes = self.cfg.GetAllNodesInfo()
12476
    all_nodes.pop(mn, None)
12477

    
12478
    run_nodes = [mn]
12479
    run_nodes.extend(node.name for node in all_nodes.values()
12480
                     if node.group == self.group_uuid)
12481

    
12482
    return (run_nodes, run_nodes)
12483

    
12484
  def Exec(self, feedback_fn):
12485
    """Rename the node group.
12486

12487
    """
12488
    group = self.cfg.GetNodeGroup(self.group_uuid)
12489

    
12490
    if group is None:
12491
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12492
                               (self.op.group_name, self.group_uuid))
12493

    
12494
    group.name = self.op.new_name
12495
    self.cfg.Update(group, feedback_fn)
12496

    
12497
    return self.op.new_name
12498

    
12499

    
12500
class LUGroupEvacuate(LogicalUnit):
12501
  HPATH = "group-evacuate"
12502
  HTYPE = constants.HTYPE_GROUP
12503
  REQ_BGL = False
12504

    
12505
  def ExpandNames(self):
12506
    # This raises errors.OpPrereqError on its own:
12507
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12508

    
12509
    if self.op.target_groups:
12510
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12511
                                  self.op.target_groups)
12512
    else:
12513
      self.req_target_uuids = []
12514

    
12515
    if self.group_uuid in self.req_target_uuids:
12516
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12517
                                 " as a target group (targets are %s)" %
12518
                                 (self.group_uuid,
12519
                                  utils.CommaJoin(self.req_target_uuids)),
12520
                                 errors.ECODE_INVAL)
12521

    
12522
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12523

    
12524
    self.share_locks = _ShareAll()
12525
    self.needed_locks = {
12526
      locking.LEVEL_INSTANCE: [],
12527
      locking.LEVEL_NODEGROUP: [],
12528
      locking.LEVEL_NODE: [],
12529
      }
12530

    
12531
  def DeclareLocks(self, level):
12532
    if level == locking.LEVEL_INSTANCE:
12533
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12534

    
12535
      # Lock instances optimistically, needs verification once node and group
12536
      # locks have been acquired
12537
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12538
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12539

    
12540
    elif level == locking.LEVEL_NODEGROUP:
12541
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12542

    
12543
      if self.req_target_uuids:
12544
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12545

    
12546
        # Lock all groups used by instances optimistically; this requires going
12547
        # via the node before it's locked, requiring verification later on
12548
        lock_groups.update(group_uuid
12549
                           for instance_name in
12550
                             self.owned_locks(locking.LEVEL_INSTANCE)
12551
                           for group_uuid in
12552
                             self.cfg.GetInstanceNodeGroups(instance_name))
12553
      else:
12554
        # No target groups, need to lock all of them
12555
        lock_groups = locking.ALL_SET
12556

    
12557
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12558

    
12559
    elif level == locking.LEVEL_NODE:
12560
      # This will only lock the nodes in the group to be evacuated which
12561
      # contain actual instances
12562
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12563
      self._LockInstancesNodes()
12564

    
12565
      # Lock all nodes in group to be evacuated and target groups
12566
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12567
      assert self.group_uuid in owned_groups
12568
      member_nodes = [node_name
12569
                      for group in owned_groups
12570
                      for node_name in self.cfg.GetNodeGroup(group).members]
12571
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12572

    
12573
  def CheckPrereq(self):
12574
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12575
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12576
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12577

    
12578
    assert owned_groups.issuperset(self.req_target_uuids)
12579
    assert self.group_uuid in owned_groups
12580

    
12581
    # Check if locked instances are still correct
12582
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12583

    
12584
    # Get instance information
12585
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12586

    
12587
    # Check if node groups for locked instances are still correct
12588
    for instance_name in owned_instances:
12589
      inst = self.instances[instance_name]
12590
      assert owned_nodes.issuperset(inst.all_nodes), \
12591
        "Instance %s's nodes changed while we kept the lock" % instance_name
12592

    
12593
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12594
                                             owned_groups)
12595

    
12596
      assert self.group_uuid in inst_groups, \
12597
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12598

    
12599
    if self.req_target_uuids:
12600
      # User requested specific target groups
12601
      self.target_uuids = self.req_target_uuids
12602
    else:
12603
      # All groups except the one to be evacuated are potential targets
12604
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12605
                           if group_uuid != self.group_uuid]
12606

    
12607
      if not self.target_uuids:
12608
        raise errors.OpPrereqError("There are no possible target groups",
12609
                                   errors.ECODE_INVAL)
12610

    
12611
  def BuildHooksEnv(self):
12612
    """Build hooks env.
12613

12614
    """
12615
    return {
12616
      "GROUP_NAME": self.op.group_name,
12617
      "TARGET_GROUPS": " ".join(self.target_uuids),
12618
      }
12619

    
12620
  def BuildHooksNodes(self):
12621
    """Build hooks nodes.
12622

12623
    """
12624
    mn = self.cfg.GetMasterNode()
12625

    
12626
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12627

    
12628
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12629

    
12630
    return (run_nodes, run_nodes)
12631

    
12632
  def Exec(self, feedback_fn):
12633
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12634

    
12635
    assert self.group_uuid not in self.target_uuids
12636

    
12637
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12638
                     instances=instances, target_groups=self.target_uuids)
12639

    
12640
    ial.Run(self.op.iallocator)
12641

    
12642
    if not ial.success:
12643
      raise errors.OpPrereqError("Can't compute group evacuation using"
12644
                                 " iallocator '%s': %s" %
12645
                                 (self.op.iallocator, ial.info),
12646
                                 errors.ECODE_NORES)
12647

    
12648
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12649

    
12650
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12651
                 len(jobs), self.op.group_name)
12652

    
12653
    return ResultWithJobs(jobs)
12654

    
12655

    
12656
class TagsLU(NoHooksLU): # pylint: disable=W0223
12657
  """Generic tags LU.
12658

12659
  This is an abstract class which is the parent of all the other tags LUs.
12660

12661
  """
12662
  def ExpandNames(self):
12663
    self.group_uuid = None
12664
    self.needed_locks = {}
12665
    if self.op.kind == constants.TAG_NODE:
12666
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12667
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12668
    elif self.op.kind == constants.TAG_INSTANCE:
12669
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12670
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12671
    elif self.op.kind == constants.TAG_NODEGROUP:
12672
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12673

    
12674
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12675
    # not possible to acquire the BGL based on opcode parameters)
12676

    
12677
  def CheckPrereq(self):
12678
    """Check prerequisites.
12679

12680
    """
12681
    if self.op.kind == constants.TAG_CLUSTER:
12682
      self.target = self.cfg.GetClusterInfo()
12683
    elif self.op.kind == constants.TAG_NODE:
12684
      self.target = self.cfg.GetNodeInfo(self.op.name)
12685
    elif self.op.kind == constants.TAG_INSTANCE:
12686
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12687
    elif self.op.kind == constants.TAG_NODEGROUP:
12688
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12689
    else:
12690
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12691
                                 str(self.op.kind), errors.ECODE_INVAL)
12692

    
12693

    
12694
class LUTagsGet(TagsLU):
12695
  """Returns the tags of a given object.
12696

12697
  """
12698
  REQ_BGL = False
12699

    
12700
  def ExpandNames(self):
12701
    TagsLU.ExpandNames(self)
12702

    
12703
    # Share locks as this is only a read operation
12704
    self.share_locks = _ShareAll()
12705

    
12706
  def Exec(self, feedback_fn):
12707
    """Returns the tag list.
12708

12709
    """
12710
    return list(self.target.GetTags())
12711

    
12712

    
12713
class LUTagsSearch(NoHooksLU):
12714
  """Searches the tags for a given pattern.
12715

12716
  """
12717
  REQ_BGL = False
12718

    
12719
  def ExpandNames(self):
12720
    self.needed_locks = {}
12721

    
12722
  def CheckPrereq(self):
12723
    """Check prerequisites.
12724

12725
    This checks the pattern passed for validity by compiling it.
12726

12727
    """
12728
    try:
12729
      self.re = re.compile(self.op.pattern)
12730
    except re.error, err:
12731
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12732
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12733

    
12734
  def Exec(self, feedback_fn):
12735
    """Returns the tag list.
12736

12737
    """
12738
    cfg = self.cfg
12739
    tgts = [("/cluster", cfg.GetClusterInfo())]
12740
    ilist = cfg.GetAllInstancesInfo().values()
12741
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12742
    nlist = cfg.GetAllNodesInfo().values()
12743
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12744
    tgts.extend(("/nodegroup/%s" % n.name, n)
12745
                for n in cfg.GetAllNodeGroupsInfo().values())
12746
    results = []
12747
    for path, target in tgts:
12748
      for tag in target.GetTags():
12749
        if self.re.search(tag):
12750
          results.append((path, tag))
12751
    return results
12752

    
12753

    
12754
class LUTagsSet(TagsLU):
12755
  """Sets a tag on a given object.
12756

12757
  """
12758
  REQ_BGL = False
12759

    
12760
  def CheckPrereq(self):
12761
    """Check prerequisites.
12762

12763
    This checks the type and length of the tag name and value.
12764

12765
    """
12766
    TagsLU.CheckPrereq(self)
12767
    for tag in self.op.tags:
12768
      objects.TaggableObject.ValidateTag(tag)
12769

    
12770
  def Exec(self, feedback_fn):
12771
    """Sets the tag.
12772

12773
    """
12774
    try:
12775
      for tag in self.op.tags:
12776
        self.target.AddTag(tag)
12777
    except errors.TagError, err:
12778
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12779
    self.cfg.Update(self.target, feedback_fn)
12780

    
12781

    
12782
class LUTagsDel(TagsLU):
12783
  """Delete a list of tags from a given object.
12784

12785
  """
12786
  REQ_BGL = False
12787

    
12788
  def CheckPrereq(self):
12789
    """Check prerequisites.
12790

12791
    This checks that we have the given tag.
12792

12793
    """
12794
    TagsLU.CheckPrereq(self)
12795
    for tag in self.op.tags:
12796
      objects.TaggableObject.ValidateTag(tag)
12797
    del_tags = frozenset(self.op.tags)
12798
    cur_tags = self.target.GetTags()
12799

    
12800
    diff_tags = del_tags - cur_tags
12801
    if diff_tags:
12802
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12803
      raise errors.OpPrereqError("Tag(s) %s not found" %
12804
                                 (utils.CommaJoin(diff_names), ),
12805
                                 errors.ECODE_NOENT)
12806

    
12807
  def Exec(self, feedback_fn):
12808
    """Remove the tag from the object.
12809

12810
    """
12811
    for tag in self.op.tags:
12812
      self.target.RemoveTag(tag)
12813
    self.cfg.Update(self.target, feedback_fn)
12814

    
12815

    
12816
class LUTestDelay(NoHooksLU):
12817
  """Sleep for a specified amount of time.
12818

12819
  This LU sleeps on the master and/or nodes for a specified amount of
12820
  time.
12821

12822
  """
12823
  REQ_BGL = False
12824

    
12825
  def ExpandNames(self):
12826
    """Expand names and set required locks.
12827

12828
    This expands the node list, if any.
12829

12830
    """
12831
    self.needed_locks = {}
12832
    if self.op.on_nodes:
12833
      # _GetWantedNodes can be used here, but is not always appropriate to use
12834
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12835
      # more information.
12836
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12837
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12838

    
12839
  def _TestDelay(self):
12840
    """Do the actual sleep.
12841

12842
    """
12843
    if self.op.on_master:
12844
      if not utils.TestDelay(self.op.duration):
12845
        raise errors.OpExecError("Error during master delay test")
12846
    if self.op.on_nodes:
12847
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12848
      for node, node_result in result.items():
12849
        node_result.Raise("Failure during rpc call to node %s" % node)
12850

    
12851
  def Exec(self, feedback_fn):
12852
    """Execute the test delay opcode, with the wanted repetitions.
12853

12854
    """
12855
    if self.op.repeat == 0:
12856
      self._TestDelay()
12857
    else:
12858
      top_value = self.op.repeat - 1
12859
      for i in range(self.op.repeat):
12860
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12861
        self._TestDelay()
12862

    
12863

    
12864
class LUTestJqueue(NoHooksLU):
12865
  """Utility LU to test some aspects of the job queue.
12866

12867
  """
12868
  REQ_BGL = False
12869

    
12870
  # Must be lower than default timeout for WaitForJobChange to see whether it
12871
  # notices changed jobs
12872
  _CLIENT_CONNECT_TIMEOUT = 20.0
12873
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12874

    
12875
  @classmethod
12876
  def _NotifyUsingSocket(cls, cb, errcls):
12877
    """Opens a Unix socket and waits for another program to connect.
12878

12879
    @type cb: callable
12880
    @param cb: Callback to send socket name to client
12881
    @type errcls: class
12882
    @param errcls: Exception class to use for errors
12883

12884
    """
12885
    # Using a temporary directory as there's no easy way to create temporary
12886
    # sockets without writing a custom loop around tempfile.mktemp and
12887
    # socket.bind
12888
    tmpdir = tempfile.mkdtemp()
12889
    try:
12890
      tmpsock = utils.PathJoin(tmpdir, "sock")
12891

    
12892
      logging.debug("Creating temporary socket at %s", tmpsock)
12893
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12894
      try:
12895
        sock.bind(tmpsock)
12896
        sock.listen(1)
12897

    
12898
        # Send details to client
12899
        cb(tmpsock)
12900

    
12901
        # Wait for client to connect before continuing
12902
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12903
        try:
12904
          (conn, _) = sock.accept()
12905
        except socket.error, err:
12906
          raise errcls("Client didn't connect in time (%s)" % err)
12907
      finally:
12908
        sock.close()
12909
    finally:
12910
      # Remove as soon as client is connected
12911
      shutil.rmtree(tmpdir)
12912

    
12913
    # Wait for client to close
12914
    try:
12915
      try:
12916
        # pylint: disable=E1101
12917
        # Instance of '_socketobject' has no ... member
12918
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12919
        conn.recv(1)
12920
      except socket.error, err:
12921
        raise errcls("Client failed to confirm notification (%s)" % err)
12922
    finally:
12923
      conn.close()
12924

    
12925
  def _SendNotification(self, test, arg, sockname):
12926
    """Sends a notification to the client.
12927

12928
    @type test: string
12929
    @param test: Test name
12930
    @param arg: Test argument (depends on test)
12931
    @type sockname: string
12932
    @param sockname: Socket path
12933

12934
    """
12935
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12936

    
12937
  def _Notify(self, prereq, test, arg):
12938
    """Notifies the client of a test.
12939

12940
    @type prereq: bool
12941
    @param prereq: Whether this is a prereq-phase test
12942
    @type test: string
12943
    @param test: Test name
12944
    @param arg: Test argument (depends on test)
12945

12946
    """
12947
    if prereq:
12948
      errcls = errors.OpPrereqError
12949
    else:
12950
      errcls = errors.OpExecError
12951

    
12952
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12953
                                                  test, arg),
12954
                                   errcls)
12955

    
12956
  def CheckArguments(self):
12957
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12958
    self.expandnames_calls = 0
12959

    
12960
  def ExpandNames(self):
12961
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12962
    if checkargs_calls < 1:
12963
      raise errors.ProgrammerError("CheckArguments was not called")
12964

    
12965
    self.expandnames_calls += 1
12966

    
12967
    if self.op.notify_waitlock:
12968
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12969

    
12970
    self.LogInfo("Expanding names")
12971

    
12972
    # Get lock on master node (just to get a lock, not for a particular reason)
12973
    self.needed_locks = {
12974
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12975
      }
12976

    
12977
  def Exec(self, feedback_fn):
12978
    if self.expandnames_calls < 1:
12979
      raise errors.ProgrammerError("ExpandNames was not called")
12980

    
12981
    if self.op.notify_exec:
12982
      self._Notify(False, constants.JQT_EXEC, None)
12983

    
12984
    self.LogInfo("Executing")
12985

    
12986
    if self.op.log_messages:
12987
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12988
      for idx, msg in enumerate(self.op.log_messages):
12989
        self.LogInfo("Sending log message %s", idx + 1)
12990
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12991
        # Report how many test messages have been sent
12992
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12993

    
12994
    if self.op.fail:
12995
      raise errors.OpExecError("Opcode failure was requested")
12996

    
12997
    return True
12998

    
12999

    
13000
class IAllocator(object):
13001
  """IAllocator framework.
13002

13003
  An IAllocator instance has three sets of attributes:
13004
    - cfg that is needed to query the cluster
13005
    - input data (all members of the _KEYS class attribute are required)
13006
    - four buffer attributes (in|out_data|text), that represent the
13007
      input (to the external script) in text and data structure format,
13008
      and the output from it, again in two formats
13009
    - the result variables from the script (success, info, nodes) for
13010
      easy usage
13011

13012
  """
13013
  # pylint: disable=R0902
13014
  # lots of instance attributes
13015

    
13016
  def __init__(self, cfg, rpc, mode, **kwargs):
13017
    self.cfg = cfg
13018
    self.rpc = rpc
13019
    # init buffer variables
13020
    self.in_text = self.out_text = self.in_data = self.out_data = None
13021
    # init all input fields so that pylint is happy
13022
    self.mode = mode
13023
    self.memory = self.disks = self.disk_template = None
13024
    self.os = self.tags = self.nics = self.vcpus = None
13025
    self.hypervisor = None
13026
    self.relocate_from = None
13027
    self.name = None
13028
    self.instances = None
13029
    self.evac_mode = None
13030
    self.target_groups = []
13031
    # computed fields
13032
    self.required_nodes = None
13033
    # init result fields
13034
    self.success = self.info = self.result = None
13035

    
13036
    try:
13037
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13038
    except KeyError:
13039
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13040
                                   " IAllocator" % self.mode)
13041

    
13042
    keyset = [n for (n, _) in keydata]
13043

    
13044
    for key in kwargs:
13045
      if key not in keyset:
13046
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13047
                                     " IAllocator" % key)
13048
      setattr(self, key, kwargs[key])
13049

    
13050
    for key in keyset:
13051
      if key not in kwargs:
13052
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13053
                                     " IAllocator" % key)
13054
    self._BuildInputData(compat.partial(fn, self), keydata)
13055

    
13056
  def _ComputeClusterData(self):
13057
    """Compute the generic allocator input data.
13058

13059
    This is the data that is independent of the actual operation.
13060

13061
    """
13062
    cfg = self.cfg
13063
    cluster_info = cfg.GetClusterInfo()
13064
    # cluster data
13065
    data = {
13066
      "version": constants.IALLOCATOR_VERSION,
13067
      "cluster_name": cfg.GetClusterName(),
13068
      "cluster_tags": list(cluster_info.GetTags()),
13069
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13070
      # we don't have job IDs
13071
      }
13072
    ninfo = cfg.GetAllNodesInfo()
13073
    iinfo = cfg.GetAllInstancesInfo().values()
13074
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13075

    
13076
    # node data
13077
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13078

    
13079
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13080
      hypervisor_name = self.hypervisor
13081
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13082
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13083
    else:
13084
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13085

    
13086
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13087
                                        hypervisor_name)
13088
    node_iinfo = \
13089
      self.rpc.call_all_instances_info(node_list,
13090
                                       cluster_info.enabled_hypervisors)
13091

    
13092
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13093

    
13094
    config_ndata = self._ComputeBasicNodeData(ninfo)
13095
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13096
                                                 i_list, config_ndata)
13097
    assert len(data["nodes"]) == len(ninfo), \
13098
        "Incomplete node data computed"
13099

    
13100
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13101

    
13102
    self.in_data = data
13103

    
13104
  @staticmethod
13105
  def _ComputeNodeGroupData(cfg):
13106
    """Compute node groups data.
13107

13108
    """
13109
    ng = dict((guuid, {
13110
      "name": gdata.name,
13111
      "alloc_policy": gdata.alloc_policy,
13112
      })
13113
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13114

    
13115
    return ng
13116

    
13117
  @staticmethod
13118
  def _ComputeBasicNodeData(node_cfg):
13119
    """Compute global node data.
13120

13121
    @rtype: dict
13122
    @returns: a dict of name: (node dict, node config)
13123

13124
    """
13125
    # fill in static (config-based) values
13126
    node_results = dict((ninfo.name, {
13127
      "tags": list(ninfo.GetTags()),
13128
      "primary_ip": ninfo.primary_ip,
13129
      "secondary_ip": ninfo.secondary_ip,
13130
      "offline": ninfo.offline,
13131
      "drained": ninfo.drained,
13132
      "master_candidate": ninfo.master_candidate,
13133
      "group": ninfo.group,
13134
      "master_capable": ninfo.master_capable,
13135
      "vm_capable": ninfo.vm_capable,
13136
      })
13137
      for ninfo in node_cfg.values())
13138

    
13139
    return node_results
13140

    
13141
  @staticmethod
13142
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13143
                              node_results):
13144
    """Compute global node data.
13145

13146
    @param node_results: the basic node structures as filled from the config
13147

13148
    """
13149
    # make a copy of the current dict
13150
    node_results = dict(node_results)
13151
    for nname, nresult in node_data.items():
13152
      assert nname in node_results, "Missing basic data for node %s" % nname
13153
      ninfo = node_cfg[nname]
13154

    
13155
      if not (ninfo.offline or ninfo.drained):
13156
        nresult.Raise("Can't get data for node %s" % nname)
13157
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13158
                                nname)
13159
        remote_info = nresult.payload
13160

    
13161
        for attr in ["memory_total", "memory_free", "memory_dom0",
13162
                     "vg_size", "vg_free", "cpu_total"]:
13163
          if attr not in remote_info:
13164
            raise errors.OpExecError("Node '%s' didn't return attribute"
13165
                                     " '%s'" % (nname, attr))
13166
          if not isinstance(remote_info[attr], int):
13167
            raise errors.OpExecError("Node '%s' returned invalid value"
13168
                                     " for '%s': %s" %
13169
                                     (nname, attr, remote_info[attr]))
13170
        # compute memory used by primary instances
13171
        i_p_mem = i_p_up_mem = 0
13172
        for iinfo, beinfo in i_list:
13173
          if iinfo.primary_node == nname:
13174
            i_p_mem += beinfo[constants.BE_MEMORY]
13175
            if iinfo.name not in node_iinfo[nname].payload:
13176
              i_used_mem = 0
13177
            else:
13178
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13179
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13180
            remote_info["memory_free"] -= max(0, i_mem_diff)
13181

    
13182
            if iinfo.admin_up:
13183
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13184

    
13185
        # compute memory used by instances
13186
        pnr_dyn = {
13187
          "total_memory": remote_info["memory_total"],
13188
          "reserved_memory": remote_info["memory_dom0"],
13189
          "free_memory": remote_info["memory_free"],
13190
          "total_disk": remote_info["vg_size"],
13191
          "free_disk": remote_info["vg_free"],
13192
          "total_cpus": remote_info["cpu_total"],
13193
          "i_pri_memory": i_p_mem,
13194
          "i_pri_up_memory": i_p_up_mem,
13195
          }
13196
        pnr_dyn.update(node_results[nname])
13197
        node_results[nname] = pnr_dyn
13198

    
13199
    return node_results
13200

    
13201
  @staticmethod
13202
  def _ComputeInstanceData(cluster_info, i_list):
13203
    """Compute global instance data.
13204

13205
    """
13206
    instance_data = {}
13207
    for iinfo, beinfo in i_list:
13208
      nic_data = []
13209
      for nic in iinfo.nics:
13210
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13211
        nic_dict = {
13212
          "mac": nic.mac,
13213
          "ip": nic.ip,
13214
          "mode": filled_params[constants.NIC_MODE],
13215
          "link": filled_params[constants.NIC_LINK],
13216
          }
13217
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13218
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13219
        nic_data.append(nic_dict)
13220
      pir = {
13221
        "tags": list(iinfo.GetTags()),
13222
        "admin_up": iinfo.admin_up,
13223
        "vcpus": beinfo[constants.BE_VCPUS],
13224
        "memory": beinfo[constants.BE_MEMORY],
13225
        "os": iinfo.os,
13226
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13227
        "nics": nic_data,
13228
        "disks": [{constants.IDISK_SIZE: dsk.size,
13229
                   constants.IDISK_MODE: dsk.mode}
13230
                  for dsk in iinfo.disks],
13231
        "disk_template": iinfo.disk_template,
13232
        "hypervisor": iinfo.hypervisor,
13233
        }
13234
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13235
                                                 pir["disks"])
13236
      instance_data[iinfo.name] = pir
13237

    
13238
    return instance_data
13239

    
13240
  def _AddNewInstance(self):
13241
    """Add new instance data to allocator structure.
13242

13243
    This in combination with _AllocatorGetClusterData will create the
13244
    correct structure needed as input for the allocator.
13245

13246
    The checks for the completeness of the opcode must have already been
13247
    done.
13248

13249
    """
13250
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13251

    
13252
    if self.disk_template in constants.DTS_INT_MIRROR:
13253
      self.required_nodes = 2
13254
    else:
13255
      self.required_nodes = 1
13256

    
13257
    request = {
13258
      "name": self.name,
13259
      "disk_template": self.disk_template,
13260
      "tags": self.tags,
13261
      "os": self.os,
13262
      "vcpus": self.vcpus,
13263
      "memory": self.memory,
13264
      "disks": self.disks,
13265
      "disk_space_total": disk_space,
13266
      "nics": self.nics,
13267
      "required_nodes": self.required_nodes,
13268
      "hypervisor": self.hypervisor,
13269
      }
13270

    
13271
    return request
13272

    
13273
  def _AddRelocateInstance(self):
13274
    """Add relocate instance data to allocator structure.
13275

13276
    This in combination with _IAllocatorGetClusterData will create the
13277
    correct structure needed as input for the allocator.
13278

13279
    The checks for the completeness of the opcode must have already been
13280
    done.
13281

13282
    """
13283
    instance = self.cfg.GetInstanceInfo(self.name)
13284
    if instance is None:
13285
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13286
                                   " IAllocator" % self.name)
13287

    
13288
    if instance.disk_template not in constants.DTS_MIRRORED:
13289
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13290
                                 errors.ECODE_INVAL)
13291

    
13292
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13293
        len(instance.secondary_nodes) != 1:
13294
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13295
                                 errors.ECODE_STATE)
13296

    
13297
    self.required_nodes = 1
13298
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13299
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13300

    
13301
    request = {
13302
      "name": self.name,
13303
      "disk_space_total": disk_space,
13304
      "required_nodes": self.required_nodes,
13305
      "relocate_from": self.relocate_from,
13306
      }
13307
    return request
13308

    
13309
  def _AddNodeEvacuate(self):
13310
    """Get data for node-evacuate requests.
13311

13312
    """
13313
    return {
13314
      "instances": self.instances,
13315
      "evac_mode": self.evac_mode,
13316
      }
13317

    
13318
  def _AddChangeGroup(self):
13319
    """Get data for node-evacuate requests.
13320

13321
    """
13322
    return {
13323
      "instances": self.instances,
13324
      "target_groups": self.target_groups,
13325
      }
13326

    
13327
  def _BuildInputData(self, fn, keydata):
13328
    """Build input data structures.
13329

13330
    """
13331
    self._ComputeClusterData()
13332

    
13333
    request = fn()
13334
    request["type"] = self.mode
13335
    for keyname, keytype in keydata:
13336
      if keyname not in request:
13337
        raise errors.ProgrammerError("Request parameter %s is missing" %
13338
                                     keyname)
13339
      val = request[keyname]
13340
      if not keytype(val):
13341
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13342
                                     " validation, value %s, expected"
13343
                                     " type %s" % (keyname, val, keytype))
13344
    self.in_data["request"] = request
13345

    
13346
    self.in_text = serializer.Dump(self.in_data)
13347

    
13348
  _STRING_LIST = ht.TListOf(ht.TString)
13349
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13350
     # pylint: disable=E1101
13351
     # Class '...' has no 'OP_ID' member
13352
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13353
                          opcodes.OpInstanceMigrate.OP_ID,
13354
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13355
     })))
13356

    
13357
  _NEVAC_MOVED = \
13358
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13359
                       ht.TItems([ht.TNonEmptyString,
13360
                                  ht.TNonEmptyString,
13361
                                  ht.TListOf(ht.TNonEmptyString),
13362
                                 ])))
13363
  _NEVAC_FAILED = \
13364
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13365
                       ht.TItems([ht.TNonEmptyString,
13366
                                  ht.TMaybeString,
13367
                                 ])))
13368
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13369
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13370

    
13371
  _MODE_DATA = {
13372
    constants.IALLOCATOR_MODE_ALLOC:
13373
      (_AddNewInstance,
13374
       [
13375
        ("name", ht.TString),
13376
        ("memory", ht.TInt),
13377
        ("disks", ht.TListOf(ht.TDict)),
13378
        ("disk_template", ht.TString),
13379
        ("os", ht.TString),
13380
        ("tags", _STRING_LIST),
13381
        ("nics", ht.TListOf(ht.TDict)),
13382
        ("vcpus", ht.TInt),
13383
        ("hypervisor", ht.TString),
13384
        ], ht.TList),
13385
    constants.IALLOCATOR_MODE_RELOC:
13386
      (_AddRelocateInstance,
13387
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13388
       ht.TList),
13389
     constants.IALLOCATOR_MODE_NODE_EVAC:
13390
      (_AddNodeEvacuate, [
13391
        ("instances", _STRING_LIST),
13392
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13393
        ], _NEVAC_RESULT),
13394
     constants.IALLOCATOR_MODE_CHG_GROUP:
13395
      (_AddChangeGroup, [
13396
        ("instances", _STRING_LIST),
13397
        ("target_groups", _STRING_LIST),
13398
        ], _NEVAC_RESULT),
13399
    }
13400

    
13401
  def Run(self, name, validate=True, call_fn=None):
13402
    """Run an instance allocator and return the results.
13403

13404
    """
13405
    if call_fn is None:
13406
      call_fn = self.rpc.call_iallocator_runner
13407

    
13408
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13409
    result.Raise("Failure while running the iallocator script")
13410

    
13411
    self.out_text = result.payload
13412
    if validate:
13413
      self._ValidateResult()
13414

    
13415
  def _ValidateResult(self):
13416
    """Process the allocator results.
13417

13418
    This will process and if successful save the result in
13419
    self.out_data and the other parameters.
13420

13421
    """
13422
    try:
13423
      rdict = serializer.Load(self.out_text)
13424
    except Exception, err:
13425
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13426

    
13427
    if not isinstance(rdict, dict):
13428
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13429

    
13430
    # TODO: remove backwards compatiblity in later versions
13431
    if "nodes" in rdict and "result" not in rdict:
13432
      rdict["result"] = rdict["nodes"]
13433
      del rdict["nodes"]
13434

    
13435
    for key in "success", "info", "result":
13436
      if key not in rdict:
13437
        raise errors.OpExecError("Can't parse iallocator results:"
13438
                                 " missing key '%s'" % key)
13439
      setattr(self, key, rdict[key])
13440

    
13441
    if not self._result_check(self.result):
13442
      raise errors.OpExecError("Iallocator returned invalid result,"
13443
                               " expected %s, got %s" %
13444
                               (self._result_check, self.result),
13445
                               errors.ECODE_INVAL)
13446

    
13447
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13448
      assert self.relocate_from is not None
13449
      assert self.required_nodes == 1
13450

    
13451
      node2group = dict((name, ndata["group"])
13452
                        for (name, ndata) in self.in_data["nodes"].items())
13453

    
13454
      fn = compat.partial(self._NodesToGroups, node2group,
13455
                          self.in_data["nodegroups"])
13456

    
13457
      instance = self.cfg.GetInstanceInfo(self.name)
13458
      request_groups = fn(self.relocate_from + [instance.primary_node])
13459
      result_groups = fn(rdict["result"] + [instance.primary_node])
13460

    
13461
      if self.success and not set(result_groups).issubset(request_groups):
13462
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13463
                                 " differ from original groups (%s)" %
13464
                                 (utils.CommaJoin(result_groups),
13465
                                  utils.CommaJoin(request_groups)))
13466

    
13467
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13468
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13469

    
13470
    self.out_data = rdict
13471

    
13472
  @staticmethod
13473
  def _NodesToGroups(node2group, groups, nodes):
13474
    """Returns a list of unique group names for a list of nodes.
13475

13476
    @type node2group: dict
13477
    @param node2group: Map from node name to group UUID
13478
    @type groups: dict
13479
    @param groups: Group information
13480
    @type nodes: list
13481
    @param nodes: Node names
13482

13483
    """
13484
    result = set()
13485

    
13486
    for node in nodes:
13487
      try:
13488
        group_uuid = node2group[node]
13489
      except KeyError:
13490
        # Ignore unknown node
13491
        pass
13492
      else:
13493
        try:
13494
          group = groups[group_uuid]
13495
        except KeyError:
13496
          # Can't find group, let's use UUID
13497
          group_name = group_uuid
13498
        else:
13499
          group_name = group["name"]
13500

    
13501
        result.add(group_name)
13502

    
13503
    return sorted(result)
13504

    
13505

    
13506
class LUTestAllocator(NoHooksLU):
13507
  """Run allocator tests.
13508

13509
  This LU runs the allocator tests
13510

13511
  """
13512
  def CheckPrereq(self):
13513
    """Check prerequisites.
13514

13515
    This checks the opcode parameters depending on the director and mode test.
13516

13517
    """
13518
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13519
      for attr in ["memory", "disks", "disk_template",
13520
                   "os", "tags", "nics", "vcpus"]:
13521
        if not hasattr(self.op, attr):
13522
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13523
                                     attr, errors.ECODE_INVAL)
13524
      iname = self.cfg.ExpandInstanceName(self.op.name)
13525
      if iname is not None:
13526
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13527
                                   iname, errors.ECODE_EXISTS)
13528
      if not isinstance(self.op.nics, list):
13529
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13530
                                   errors.ECODE_INVAL)
13531
      if not isinstance(self.op.disks, list):
13532
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13533
                                   errors.ECODE_INVAL)
13534
      for row in self.op.disks:
13535
        if (not isinstance(row, dict) or
13536
            constants.IDISK_SIZE not in row or
13537
            not isinstance(row[constants.IDISK_SIZE], int) or
13538
            constants.IDISK_MODE not in row or
13539
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13540
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13541
                                     " parameter", errors.ECODE_INVAL)
13542
      if self.op.hypervisor is None:
13543
        self.op.hypervisor = self.cfg.GetHypervisorType()
13544
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13545
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13546
      self.op.name = fname
13547
      self.relocate_from = \
13548
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13549
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13550
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13551
      if not self.op.instances:
13552
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13553
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13554
    else:
13555
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13556
                                 self.op.mode, errors.ECODE_INVAL)
13557

    
13558
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13559
      if self.op.allocator is None:
13560
        raise errors.OpPrereqError("Missing allocator name",
13561
                                   errors.ECODE_INVAL)
13562
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13563
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13564
                                 self.op.direction, errors.ECODE_INVAL)
13565

    
13566
  def Exec(self, feedback_fn):
13567
    """Run the allocator test.
13568

13569
    """
13570
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13571
      ial = IAllocator(self.cfg, self.rpc,
13572
                       mode=self.op.mode,
13573
                       name=self.op.name,
13574
                       memory=self.op.memory,
13575
                       disks=self.op.disks,
13576
                       disk_template=self.op.disk_template,
13577
                       os=self.op.os,
13578
                       tags=self.op.tags,
13579
                       nics=self.op.nics,
13580
                       vcpus=self.op.vcpus,
13581
                       hypervisor=self.op.hypervisor,
13582
                       )
13583
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13584
      ial = IAllocator(self.cfg, self.rpc,
13585
                       mode=self.op.mode,
13586
                       name=self.op.name,
13587
                       relocate_from=list(self.relocate_from),
13588
                       )
13589
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13590
      ial = IAllocator(self.cfg, self.rpc,
13591
                       mode=self.op.mode,
13592
                       instances=self.op.instances,
13593
                       target_groups=self.op.target_groups)
13594
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13595
      ial = IAllocator(self.cfg, self.rpc,
13596
                       mode=self.op.mode,
13597
                       instances=self.op.instances,
13598
                       evac_mode=self.op.evac_mode)
13599
    else:
13600
      raise errors.ProgrammerError("Uncatched mode %s in"
13601
                                   " LUTestAllocator.Exec", self.op.mode)
13602

    
13603
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13604
      result = ial.in_text
13605
    else:
13606
      ial.Run(self.op.allocator, validate=False)
13607
      result = ial.out_text
13608
    return result
13609

    
13610

    
13611
#: Query type implementations
13612
_QUERY_IMPL = {
13613
  constants.QR_INSTANCE: _InstanceQuery,
13614
  constants.QR_NODE: _NodeQuery,
13615
  constants.QR_GROUP: _GroupQuery,
13616
  constants.QR_OS: _OsQuery,
13617
  }
13618

    
13619
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13620

    
13621

    
13622
def _GetQueryImplementation(name):
13623
  """Returns the implemtnation for a query type.
13624

13625
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13626

13627
  """
13628
  try:
13629
    return _QUERY_IMPL[name]
13630
  except KeyError:
13631
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13632
                               errors.ECODE_INVAL)