Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ da4fd3b0

History | View | Annotate | Download (480.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    # readability alias
123
    self.owned_locks = context.glm.list_owned
124
    self.context = context
125
    self.rpc = rpc
126
    # Dicts used to declare locking needs to mcpu
127
    self.needed_locks = None
128
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
129
    self.add_locks = {}
130
    self.remove_locks = {}
131
    # Used to force good behavior when calling helper functions
132
    self.recalculate_locks = {}
133
    # logging
134
    self.Log = processor.Log # pylint: disable=C0103
135
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
136
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
137
    self.LogStep = processor.LogStep # pylint: disable=C0103
138
    # support for dry-run
139
    self.dry_run_result = None
140
    # support for generic debug attribute
141
    if (not hasattr(self.op, "debug_level") or
142
        not isinstance(self.op.debug_level, int)):
143
      self.op.debug_level = 0
144

    
145
    # Tasklets
146
    self.tasklets = None
147

    
148
    # Validate opcode parameters and set defaults
149
    self.op.Validate(True)
150

    
151
    self.CheckArguments()
152

    
153
  def CheckArguments(self):
154
    """Check syntactic validity for the opcode arguments.
155

156
    This method is for doing a simple syntactic check and ensure
157
    validity of opcode parameters, without any cluster-related
158
    checks. While the same can be accomplished in ExpandNames and/or
159
    CheckPrereq, doing these separate is better because:
160

161
      - ExpandNames is left as as purely a lock-related function
162
      - CheckPrereq is run after we have acquired locks (and possible
163
        waited for them)
164

165
    The function is allowed to change the self.op attribute so that
166
    later methods can no longer worry about missing parameters.
167

168
    """
169
    pass
170

    
171
  def ExpandNames(self):
172
    """Expand names for this LU.
173

174
    This method is called before starting to execute the opcode, and it should
175
    update all the parameters of the opcode to their canonical form (e.g. a
176
    short node name must be fully expanded after this method has successfully
177
    completed). This way locking, hooks, logging, etc. can work correctly.
178

179
    LUs which implement this method must also populate the self.needed_locks
180
    member, as a dict with lock levels as keys, and a list of needed lock names
181
    as values. Rules:
182

183
      - use an empty dict if you don't need any lock
184
      - if you don't need any lock at a particular level omit that level
185
      - don't put anything for the BGL level
186
      - if you want all locks at a level use locking.ALL_SET as a value
187

188
    If you need to share locks (rather than acquire them exclusively) at one
189
    level you can modify self.share_locks, setting a true value (usually 1) for
190
    that level. By default locks are not shared.
191

192
    This function can also define a list of tasklets, which then will be
193
    executed in order instead of the usual LU-level CheckPrereq and Exec
194
    functions, if those are not defined by the LU.
195

196
    Examples::
197

198
      # Acquire all nodes and one instance
199
      self.needed_locks = {
200
        locking.LEVEL_NODE: locking.ALL_SET,
201
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
202
      }
203
      # Acquire just two nodes
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206
      }
207
      # Acquire no locks
208
      self.needed_locks = {} # No, you can't leave it to the default value None
209

210
    """
211
    # The implementation of this method is mandatory only if the new LU is
212
    # concurrent, so that old LUs don't need to be changed all at the same
213
    # time.
214
    if self.REQ_BGL:
215
      self.needed_locks = {} # Exclusive LUs don't need locks.
216
    else:
217
      raise NotImplementedError
218

    
219
  def DeclareLocks(self, level):
220
    """Declare LU locking needs for a level
221

222
    While most LUs can just declare their locking needs at ExpandNames time,
223
    sometimes there's the need to calculate some locks after having acquired
224
    the ones before. This function is called just before acquiring locks at a
225
    particular level, but after acquiring the ones at lower levels, and permits
226
    such calculations. It can be used to modify self.needed_locks, and by
227
    default it does nothing.
228

229
    This function is only called if you have something already set in
230
    self.needed_locks for the level.
231

232
    @param level: Locking level which is going to be locked
233
    @type level: member of ganeti.locking.LEVELS
234

235
    """
236

    
237
  def CheckPrereq(self):
238
    """Check prerequisites for this LU.
239

240
    This method should check that the prerequisites for the execution
241
    of this LU are fulfilled. It can do internode communication, but
242
    it should be idempotent - no cluster or system changes are
243
    allowed.
244

245
    The method should raise errors.OpPrereqError in case something is
246
    not fulfilled. Its return value is ignored.
247

248
    This method should also update all the parameters of the opcode to
249
    their canonical form if it hasn't been done by ExpandNames before.
250

251
    """
252
    if self.tasklets is not None:
253
      for (idx, tl) in enumerate(self.tasklets):
254
        logging.debug("Checking prerequisites for tasklet %s/%s",
255
                      idx + 1, len(self.tasklets))
256
        tl.CheckPrereq()
257
    else:
258
      pass
259

    
260
  def Exec(self, feedback_fn):
261
    """Execute the LU.
262

263
    This method should implement the actual work. It should raise
264
    errors.OpExecError for failures that are somewhat dealt with in
265
    code, or expected.
266

267
    """
268
    if self.tasklets is not None:
269
      for (idx, tl) in enumerate(self.tasklets):
270
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271
        tl.Exec(feedback_fn)
272
    else:
273
      raise NotImplementedError
274

    
275
  def BuildHooksEnv(self):
276
    """Build hooks environment for this LU.
277

278
    @rtype: dict
279
    @return: Dictionary containing the environment that will be used for
280
      running the hooks for this LU. The keys of the dict must not be prefixed
281
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282
      will extend the environment with additional variables. If no environment
283
      should be defined, an empty dictionary should be returned (not C{None}).
284
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
285
      will not be called.
286

287
    """
288
    raise NotImplementedError
289

    
290
  def BuildHooksNodes(self):
291
    """Build list of nodes to run LU's hooks.
292

293
    @rtype: tuple; (list, list)
294
    @return: Tuple containing a list of node names on which the hook
295
      should run before the execution and a list of node names on which the
296
      hook should run after the execution. No nodes should be returned as an
297
      empty list (and not None).
298
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
299
      will not be called.
300

301
    """
302
    raise NotImplementedError
303

    
304
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305
    """Notify the LU about the results of its hooks.
306

307
    This method is called every time a hooks phase is executed, and notifies
308
    the Logical Unit about the hooks' result. The LU can then use it to alter
309
    its result based on the hooks.  By default the method does nothing and the
310
    previous result is passed back unchanged but any LU can define it if it
311
    wants to use the local cluster hook-scripts somehow.
312

313
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
314
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315
    @param hook_results: the results of the multi-node hooks rpc call
316
    @param feedback_fn: function used send feedback back to the caller
317
    @param lu_result: the previous Exec result this LU had, or None
318
        in the PRE phase
319
    @return: the new Exec result, based on the previous result
320
        and hook results
321

322
    """
323
    # API must be kept, thus we ignore the unused argument and could
324
    # be a function warnings
325
    # pylint: disable=W0613,R0201
326
    return lu_result
327

    
328
  def _ExpandAndLockInstance(self):
329
    """Helper function to expand and lock an instance.
330

331
    Many LUs that work on an instance take its name in self.op.instance_name
332
    and need to expand it and then declare the expanded name for locking. This
333
    function does it, and then updates self.op.instance_name to the expanded
334
    name. It also initializes needed_locks as a dict, if this hasn't been done
335
    before.
336

337
    """
338
    if self.needed_locks is None:
339
      self.needed_locks = {}
340
    else:
341
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342
        "_ExpandAndLockInstance called with instance-level locks set"
343
    self.op.instance_name = _ExpandInstanceName(self.cfg,
344
                                                self.op.instance_name)
345
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
346

    
347
  def _LockInstancesNodes(self, primary_only=False):
348
    """Helper function to declare instances' nodes for locking.
349

350
    This function should be called after locking one or more instances to lock
351
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352
    with all primary or secondary nodes for instances already locked and
353
    present in self.needed_locks[locking.LEVEL_INSTANCE].
354

355
    It should be called from DeclareLocks, and for safety only works if
356
    self.recalculate_locks[locking.LEVEL_NODE] is set.
357

358
    In the future it may grow parameters to just lock some instance's nodes, or
359
    to just lock primaries or secondary nodes, if needed.
360

361
    If should be called in DeclareLocks in a way similar to::
362

363
      if level == locking.LEVEL_NODE:
364
        self._LockInstancesNodes()
365

366
    @type primary_only: boolean
367
    @param primary_only: only lock primary nodes of locked instances
368

369
    """
370
    assert locking.LEVEL_NODE in self.recalculate_locks, \
371
      "_LockInstancesNodes helper function called with no nodes to recalculate"
372

    
373
    # TODO: check if we're really been called with the instance locks held
374

    
375
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376
    # future we might want to have different behaviors depending on the value
377
    # of self.recalculate_locks[locking.LEVEL_NODE]
378
    wanted_nodes = []
379
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381
      wanted_nodes.append(instance.primary_node)
382
      if not primary_only:
383
        wanted_nodes.extend(instance.secondary_nodes)
384

    
385
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
389

    
390
    del self.recalculate_locks[locking.LEVEL_NODE]
391

    
392

    
393
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
394
  """Simple LU which runs no hooks.
395

396
  This LU is intended as a parent for other LogicalUnits which will
397
  run no hooks, in order to reduce duplicate code.
398

399
  """
400
  HPATH = None
401
  HTYPE = None
402

    
403
  def BuildHooksEnv(self):
404
    """Empty BuildHooksEnv for NoHooksLu.
405

406
    This just raises an error.
407

408
    """
409
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
410

    
411
  def BuildHooksNodes(self):
412
    """Empty BuildHooksNodes for NoHooksLU.
413

414
    """
415
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
416

    
417

    
418
class Tasklet:
419
  """Tasklet base class.
420

421
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
423
  tasklets know nothing about locks.
424

425
  Subclasses must follow these rules:
426
    - Implement CheckPrereq
427
    - Implement Exec
428

429
  """
430
  def __init__(self, lu):
431
    self.lu = lu
432

    
433
    # Shortcuts
434
    self.cfg = lu.cfg
435
    self.rpc = lu.rpc
436

    
437
  def CheckPrereq(self):
438
    """Check prerequisites for this tasklets.
439

440
    This method should check whether the prerequisites for the execution of
441
    this tasklet are fulfilled. It can do internode communication, but it
442
    should be idempotent - no cluster or system changes are allowed.
443

444
    The method should raise errors.OpPrereqError in case something is not
445
    fulfilled. Its return value is ignored.
446

447
    This method should also update all parameters to their canonical form if it
448
    hasn't been done before.
449

450
    """
451
    pass
452

    
453
  def Exec(self, feedback_fn):
454
    """Execute the tasklet.
455

456
    This method should implement the actual work. It should raise
457
    errors.OpExecError for failures that are somewhat dealt with in code, or
458
    expected.
459

460
    """
461
    raise NotImplementedError
462

    
463

    
464
class _QueryBase:
465
  """Base for query utility classes.
466

467
  """
468
  #: Attribute holding field definitions
469
  FIELDS = None
470

    
471
  def __init__(self, qfilter, fields, use_locking):
472
    """Initializes this class.
473

474
    """
475
    self.use_locking = use_locking
476

    
477
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
478
                             namefield="name")
479
    self.requested_data = self.query.RequestedData()
480
    self.names = self.query.RequestedNames()
481

    
482
    # Sort only if no names were requested
483
    self.sort_by_name = not self.names
484

    
485
    self.do_locking = None
486
    self.wanted = None
487

    
488
  def _GetNames(self, lu, all_names, lock_level):
489
    """Helper function to determine names asked for in the query.
490

491
    """
492
    if self.do_locking:
493
      names = lu.owned_locks(lock_level)
494
    else:
495
      names = all_names
496

    
497
    if self.wanted == locking.ALL_SET:
498
      assert not self.names
499
      # caller didn't specify names, so ordering is not important
500
      return utils.NiceSort(names)
501

    
502
    # caller specified names and we must keep the same order
503
    assert self.names
504
    assert not self.do_locking or lu.glm.is_owned(lock_level)
505

    
506
    missing = set(self.wanted).difference(names)
507
    if missing:
508
      raise errors.OpExecError("Some items were removed before retrieving"
509
                               " their data: %s" % missing)
510

    
511
    # Return expanded names
512
    return self.wanted
513

    
514
  def ExpandNames(self, lu):
515
    """Expand names for this query.
516

517
    See L{LogicalUnit.ExpandNames}.
518

519
    """
520
    raise NotImplementedError()
521

    
522
  def DeclareLocks(self, lu, level):
523
    """Declare locks for this query.
524

525
    See L{LogicalUnit.DeclareLocks}.
526

527
    """
528
    raise NotImplementedError()
529

    
530
  def _GetQueryData(self, lu):
531
    """Collects all data for this query.
532

533
    @return: Query data object
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def NewStyleQuery(self, lu):
539
    """Collect data and execute query.
540

541
    """
542
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543
                                  sort_by_name=self.sort_by_name)
544

    
545
  def OldStyleQuery(self, lu):
546
    """Collect data and execute query.
547

548
    """
549
    return self.query.OldStyleQuery(self._GetQueryData(lu),
550
                                    sort_by_name=self.sort_by_name)
551

    
552

    
553
def _ShareAll():
554
  """Returns a dict declaring all lock levels shared.
555

556
  """
557
  return dict.fromkeys(locking.LEVELS, 1)
558

    
559

    
560
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
561
  """Checks if the owned node groups are still correct for an instance.
562

563
  @type cfg: L{config.ConfigWriter}
564
  @param cfg: The cluster configuration
565
  @type instance_name: string
566
  @param instance_name: Instance name
567
  @type owned_groups: set or frozenset
568
  @param owned_groups: List of currently owned node groups
569

570
  """
571
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
572

    
573
  if not owned_groups.issuperset(inst_groups):
574
    raise errors.OpPrereqError("Instance %s's node groups changed since"
575
                               " locks were acquired, current groups are"
576
                               " are '%s', owning groups '%s'; retry the"
577
                               " operation" %
578
                               (instance_name,
579
                                utils.CommaJoin(inst_groups),
580
                                utils.CommaJoin(owned_groups)),
581
                               errors.ECODE_STATE)
582

    
583
  return inst_groups
584

    
585

    
586
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
587
  """Checks if the instances in a node group are still correct.
588

589
  @type cfg: L{config.ConfigWriter}
590
  @param cfg: The cluster configuration
591
  @type group_uuid: string
592
  @param group_uuid: Node group UUID
593
  @type owned_instances: set or frozenset
594
  @param owned_instances: List of currently owned instances
595

596
  """
597
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
598
  if owned_instances != wanted_instances:
599
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
600
                               " locks were acquired, wanted '%s', have '%s';"
601
                               " retry the operation" %
602
                               (group_uuid,
603
                                utils.CommaJoin(wanted_instances),
604
                                utils.CommaJoin(owned_instances)),
605
                               errors.ECODE_STATE)
606

    
607
  return wanted_instances
608

    
609

    
610
def _SupportsOob(cfg, node):
611
  """Tells if node supports OOB.
612

613
  @type cfg: L{config.ConfigWriter}
614
  @param cfg: The cluster configuration
615
  @type node: L{objects.Node}
616
  @param node: The node
617
  @return: The OOB script if supported or an empty string otherwise
618

619
  """
620
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
621

    
622

    
623
def _GetWantedNodes(lu, nodes):
624
  """Returns list of checked and expanded node names.
625

626
  @type lu: L{LogicalUnit}
627
  @param lu: the logical unit on whose behalf we execute
628
  @type nodes: list
629
  @param nodes: list of node names or None for all nodes
630
  @rtype: list
631
  @return: the list of nodes, sorted
632
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
633

634
  """
635
  if nodes:
636
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
637

    
638
  return utils.NiceSort(lu.cfg.GetNodeList())
639

    
640

    
641
def _GetWantedInstances(lu, instances):
642
  """Returns list of checked and expanded instance names.
643

644
  @type lu: L{LogicalUnit}
645
  @param lu: the logical unit on whose behalf we execute
646
  @type instances: list
647
  @param instances: list of instance names or None for all instances
648
  @rtype: list
649
  @return: the list of instances, sorted
650
  @raise errors.OpPrereqError: if the instances parameter is wrong type
651
  @raise errors.OpPrereqError: if any of the passed instances is not found
652

653
  """
654
  if instances:
655
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
656
  else:
657
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
658
  return wanted
659

    
660

    
661
def _GetUpdatedParams(old_params, update_dict,
662
                      use_default=True, use_none=False):
663
  """Return the new version of a parameter dictionary.
664

665
  @type old_params: dict
666
  @param old_params: old parameters
667
  @type update_dict: dict
668
  @param update_dict: dict containing new parameter values, or
669
      constants.VALUE_DEFAULT to reset the parameter to its default
670
      value
671
  @param use_default: boolean
672
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
673
      values as 'to be deleted' values
674
  @param use_none: boolean
675
  @type use_none: whether to recognise C{None} values as 'to be
676
      deleted' values
677
  @rtype: dict
678
  @return: the new parameter dictionary
679

680
  """
681
  params_copy = copy.deepcopy(old_params)
682
  for key, val in update_dict.iteritems():
683
    if ((use_default and val == constants.VALUE_DEFAULT) or
684
        (use_none and val is None)):
685
      try:
686
        del params_copy[key]
687
      except KeyError:
688
        pass
689
    else:
690
      params_copy[key] = val
691
  return params_copy
692

    
693

    
694
def _ReleaseLocks(lu, level, names=None, keep=None):
695
  """Releases locks owned by an LU.
696

697
  @type lu: L{LogicalUnit}
698
  @param level: Lock level
699
  @type names: list or None
700
  @param names: Names of locks to release
701
  @type keep: list or None
702
  @param keep: Names of locks to retain
703

704
  """
705
  assert not (keep is not None and names is not None), \
706
         "Only one of the 'names' and the 'keep' parameters can be given"
707

    
708
  if names is not None:
709
    should_release = names.__contains__
710
  elif keep:
711
    should_release = lambda name: name not in keep
712
  else:
713
    should_release = None
714

    
715
  if should_release:
716
    retain = []
717
    release = []
718

    
719
    # Determine which locks to release
720
    for name in lu.owned_locks(level):
721
      if should_release(name):
722
        release.append(name)
723
      else:
724
        retain.append(name)
725

    
726
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
727

    
728
    # Release just some locks
729
    lu.glm.release(level, names=release)
730

    
731
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
732
  else:
733
    # Release everything
734
    lu.glm.release(level)
735

    
736
    assert not lu.glm.is_owned(level), "No locks should be owned"
737

    
738

    
739
def _MapInstanceDisksToNodes(instances):
740
  """Creates a map from (node, volume) to instance name.
741

742
  @type instances: list of L{objects.Instance}
743
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
744

745
  """
746
  return dict(((node, vol), inst.name)
747
              for inst in instances
748
              for (node, vols) in inst.MapLVsByNode().items()
749
              for vol in vols)
750

    
751

    
752
def _RunPostHook(lu, node_name):
753
  """Runs the post-hook for an opcode on a single node.
754

755
  """
756
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
757
  try:
758
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
759
  except:
760
    # pylint: disable=W0702
761
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
762

    
763

    
764
def _CheckOutputFields(static, dynamic, selected):
765
  """Checks whether all selected fields are valid.
766

767
  @type static: L{utils.FieldSet}
768
  @param static: static fields set
769
  @type dynamic: L{utils.FieldSet}
770
  @param dynamic: dynamic fields set
771

772
  """
773
  f = utils.FieldSet()
774
  f.Extend(static)
775
  f.Extend(dynamic)
776

    
777
  delta = f.NonMatching(selected)
778
  if delta:
779
    raise errors.OpPrereqError("Unknown output fields selected: %s"
780
                               % ",".join(delta), errors.ECODE_INVAL)
781

    
782

    
783
def _CheckGlobalHvParams(params):
784
  """Validates that given hypervisor params are not global ones.
785

786
  This will ensure that instances don't get customised versions of
787
  global params.
788

789
  """
790
  used_globals = constants.HVC_GLOBALS.intersection(params)
791
  if used_globals:
792
    msg = ("The following hypervisor parameters are global and cannot"
793
           " be customized at instance level, please modify them at"
794
           " cluster level: %s" % utils.CommaJoin(used_globals))
795
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
796

    
797

    
798
def _CheckNodeOnline(lu, node, msg=None):
799
  """Ensure that a given node is online.
800

801
  @param lu: the LU on behalf of which we make the check
802
  @param node: the node to check
803
  @param msg: if passed, should be a message to replace the default one
804
  @raise errors.OpPrereqError: if the node is offline
805

806
  """
807
  if msg is None:
808
    msg = "Can't use offline node"
809
  if lu.cfg.GetNodeInfo(node).offline:
810
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
811

    
812

    
813
def _CheckNodeNotDrained(lu, node):
814
  """Ensure that a given node is not drained.
815

816
  @param lu: the LU on behalf of which we make the check
817
  @param node: the node to check
818
  @raise errors.OpPrereqError: if the node is drained
819

820
  """
821
  if lu.cfg.GetNodeInfo(node).drained:
822
    raise errors.OpPrereqError("Can't use drained node %s" % node,
823
                               errors.ECODE_STATE)
824

    
825

    
826
def _CheckNodeVmCapable(lu, node):
827
  """Ensure that a given node is vm capable.
828

829
  @param lu: the LU on behalf of which we make the check
830
  @param node: the node to check
831
  @raise errors.OpPrereqError: if the node is not vm capable
832

833
  """
834
  if not lu.cfg.GetNodeInfo(node).vm_capable:
835
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
836
                               errors.ECODE_STATE)
837

    
838

    
839
def _CheckNodeHasOS(lu, node, os_name, force_variant):
840
  """Ensure that a node supports a given OS.
841

842
  @param lu: the LU on behalf of which we make the check
843
  @param node: the node to check
844
  @param os_name: the OS to query about
845
  @param force_variant: whether to ignore variant errors
846
  @raise errors.OpPrereqError: if the node is not supporting the OS
847

848
  """
849
  result = lu.rpc.call_os_get(node, os_name)
850
  result.Raise("OS '%s' not in supported OS list for node %s" %
851
               (os_name, node),
852
               prereq=True, ecode=errors.ECODE_INVAL)
853
  if not force_variant:
854
    _CheckOSVariant(result.payload, os_name)
855

    
856

    
857
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
858
  """Ensure that a node has the given secondary ip.
859

860
  @type lu: L{LogicalUnit}
861
  @param lu: the LU on behalf of which we make the check
862
  @type node: string
863
  @param node: the node to check
864
  @type secondary_ip: string
865
  @param secondary_ip: the ip to check
866
  @type prereq: boolean
867
  @param prereq: whether to throw a prerequisite or an execute error
868
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
869
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
870

871
  """
872
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
873
  result.Raise("Failure checking secondary ip on node %s" % node,
874
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
875
  if not result.payload:
876
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
877
           " please fix and re-run this command" % secondary_ip)
878
    if prereq:
879
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
880
    else:
881
      raise errors.OpExecError(msg)
882

    
883

    
884
def _GetClusterDomainSecret():
885
  """Reads the cluster domain secret.
886

887
  """
888
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
889
                               strict=True)
890

    
891

    
892
def _CheckInstanceDown(lu, instance, reason):
893
  """Ensure that an instance is not running."""
894
  if instance.admin_up:
895
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
896
                               (instance.name, reason), errors.ECODE_STATE)
897

    
898
  pnode = instance.primary_node
899
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
900
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
901
              prereq=True, ecode=errors.ECODE_ENVIRON)
902

    
903
  if instance.name in ins_l.payload:
904
    raise errors.OpPrereqError("Instance %s is running, %s" %
905
                               (instance.name, reason), errors.ECODE_STATE)
906

    
907

    
908
def _ExpandItemName(fn, name, kind):
909
  """Expand an item name.
910

911
  @param fn: the function to use for expansion
912
  @param name: requested item name
913
  @param kind: text description ('Node' or 'Instance')
914
  @return: the resolved (full) name
915
  @raise errors.OpPrereqError: if the item is not found
916

917
  """
918
  full_name = fn(name)
919
  if full_name is None:
920
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
921
                               errors.ECODE_NOENT)
922
  return full_name
923

    
924

    
925
def _ExpandNodeName(cfg, name):
926
  """Wrapper over L{_ExpandItemName} for nodes."""
927
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
928

    
929

    
930
def _ExpandInstanceName(cfg, name):
931
  """Wrapper over L{_ExpandItemName} for instance."""
932
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
933

    
934

    
935
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
936
                          memory, vcpus, nics, disk_template, disks,
937
                          bep, hvp, hypervisor_name, tags):
938
  """Builds instance related env variables for hooks
939

940
  This builds the hook environment from individual variables.
941

942
  @type name: string
943
  @param name: the name of the instance
944
  @type primary_node: string
945
  @param primary_node: the name of the instance's primary node
946
  @type secondary_nodes: list
947
  @param secondary_nodes: list of secondary nodes as strings
948
  @type os_type: string
949
  @param os_type: the name of the instance's OS
950
  @type status: boolean
951
  @param status: the should_run status of the instance
952
  @type memory: string
953
  @param memory: the memory size of the instance
954
  @type vcpus: string
955
  @param vcpus: the count of VCPUs the instance has
956
  @type nics: list
957
  @param nics: list of tuples (ip, mac, mode, link) representing
958
      the NICs the instance has
959
  @type disk_template: string
960
  @param disk_template: the disk template of the instance
961
  @type disks: list
962
  @param disks: the list of (size, mode) pairs
963
  @type bep: dict
964
  @param bep: the backend parameters for the instance
965
  @type hvp: dict
966
  @param hvp: the hypervisor parameters for the instance
967
  @type hypervisor_name: string
968
  @param hypervisor_name: the hypervisor for the instance
969
  @type tags: list
970
  @param tags: list of instance tags as strings
971
  @rtype: dict
972
  @return: the hook environment for this instance
973

974
  """
975
  if status:
976
    str_status = "up"
977
  else:
978
    str_status = "down"
979
  env = {
980
    "OP_TARGET": name,
981
    "INSTANCE_NAME": name,
982
    "INSTANCE_PRIMARY": primary_node,
983
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
984
    "INSTANCE_OS_TYPE": os_type,
985
    "INSTANCE_STATUS": str_status,
986
    "INSTANCE_MEMORY": memory,
987
    "INSTANCE_VCPUS": vcpus,
988
    "INSTANCE_DISK_TEMPLATE": disk_template,
989
    "INSTANCE_HYPERVISOR": hypervisor_name,
990
  }
991

    
992
  if nics:
993
    nic_count = len(nics)
994
    for idx, (ip, mac, mode, link) in enumerate(nics):
995
      if ip is None:
996
        ip = ""
997
      env["INSTANCE_NIC%d_IP" % idx] = ip
998
      env["INSTANCE_NIC%d_MAC" % idx] = mac
999
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1000
      env["INSTANCE_NIC%d_LINK" % idx] = link
1001
      if mode == constants.NIC_MODE_BRIDGED:
1002
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003
  else:
1004
    nic_count = 0
1005

    
1006
  env["INSTANCE_NIC_COUNT"] = nic_count
1007

    
1008
  if disks:
1009
    disk_count = len(disks)
1010
    for idx, (size, mode) in enumerate(disks):
1011
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1012
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1013
  else:
1014
    disk_count = 0
1015

    
1016
  env["INSTANCE_DISK_COUNT"] = disk_count
1017

    
1018
  if not tags:
1019
    tags = []
1020

    
1021
  env["INSTANCE_TAGS"] = " ".join(tags)
1022

    
1023
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024
    for key, value in source.items():
1025
      env["INSTANCE_%s_%s" % (kind, key)] = value
1026

    
1027
  return env
1028

    
1029

    
1030
def _NICListToTuple(lu, nics):
1031
  """Build a list of nic information tuples.
1032

1033
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034
  value in LUInstanceQueryData.
1035

1036
  @type lu:  L{LogicalUnit}
1037
  @param lu: the logical unit on whose behalf we execute
1038
  @type nics: list of L{objects.NIC}
1039
  @param nics: list of nics to convert to hooks tuples
1040

1041
  """
1042
  hooks_nics = []
1043
  cluster = lu.cfg.GetClusterInfo()
1044
  for nic in nics:
1045
    ip = nic.ip
1046
    mac = nic.mac
1047
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048
    mode = filled_params[constants.NIC_MODE]
1049
    link = filled_params[constants.NIC_LINK]
1050
    hooks_nics.append((ip, mac, mode, link))
1051
  return hooks_nics
1052

    
1053

    
1054
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055
  """Builds instance related env variables for hooks from an object.
1056

1057
  @type lu: L{LogicalUnit}
1058
  @param lu: the logical unit on whose behalf we execute
1059
  @type instance: L{objects.Instance}
1060
  @param instance: the instance for which we should build the
1061
      environment
1062
  @type override: dict
1063
  @param override: dictionary with key/values that will override
1064
      our values
1065
  @rtype: dict
1066
  @return: the hook environment dictionary
1067

1068
  """
1069
  cluster = lu.cfg.GetClusterInfo()
1070
  bep = cluster.FillBE(instance)
1071
  hvp = cluster.FillHV(instance)
1072
  args = {
1073
    "name": instance.name,
1074
    "primary_node": instance.primary_node,
1075
    "secondary_nodes": instance.secondary_nodes,
1076
    "os_type": instance.os,
1077
    "status": instance.admin_up,
1078
    "memory": bep[constants.BE_MEMORY],
1079
    "vcpus": bep[constants.BE_VCPUS],
1080
    "nics": _NICListToTuple(lu, instance.nics),
1081
    "disk_template": instance.disk_template,
1082
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083
    "bep": bep,
1084
    "hvp": hvp,
1085
    "hypervisor_name": instance.hypervisor,
1086
    "tags": instance.tags,
1087
  }
1088
  if override:
1089
    args.update(override)
1090
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1091

    
1092

    
1093
def _AdjustCandidatePool(lu, exceptions):
1094
  """Adjust the candidate pool after node operations.
1095

1096
  """
1097
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098
  if mod_list:
1099
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1100
               utils.CommaJoin(node.name for node in mod_list))
1101
    for name in mod_list:
1102
      lu.context.ReaddNode(name)
1103
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104
  if mc_now > mc_max:
1105
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106
               (mc_now, mc_max))
1107

    
1108

    
1109
def _DecideSelfPromotion(lu, exceptions=None):
1110
  """Decide whether I should promote myself as a master candidate.
1111

1112
  """
1113
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115
  # the new node will increase mc_max with one, so:
1116
  mc_should = min(mc_should + 1, cp_size)
1117
  return mc_now < mc_should
1118

    
1119

    
1120
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121
  """Check that the brigdes needed by a list of nics exist.
1122

1123
  """
1124
  cluster = lu.cfg.GetClusterInfo()
1125
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126
  brlist = [params[constants.NIC_LINK] for params in paramslist
1127
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128
  if brlist:
1129
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1130
    result.Raise("Error checking bridges on destination node '%s'" %
1131
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132

    
1133

    
1134
def _CheckInstanceBridgesExist(lu, instance, node=None):
1135
  """Check that the brigdes needed by an instance exist.
1136

1137
  """
1138
  if node is None:
1139
    node = instance.primary_node
1140
  _CheckNicsBridgesExist(lu, instance.nics, node)
1141

    
1142

    
1143
def _CheckOSVariant(os_obj, name):
1144
  """Check whether an OS name conforms to the os variants specification.
1145

1146
  @type os_obj: L{objects.OS}
1147
  @param os_obj: OS object to check
1148
  @type name: string
1149
  @param name: OS name passed by the user, to check for validity
1150

1151
  """
1152
  variant = objects.OS.GetVariant(name)
1153
  if not os_obj.supported_variants:
1154
    if variant:
1155
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156
                                 " passed)" % (os_obj.name, variant),
1157
                                 errors.ECODE_INVAL)
1158
    return
1159
  if not variant:
1160
    raise errors.OpPrereqError("OS name must include a variant",
1161
                               errors.ECODE_INVAL)
1162

    
1163
  if variant not in os_obj.supported_variants:
1164
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165

    
1166

    
1167
def _GetNodeInstancesInner(cfg, fn):
1168
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169

    
1170

    
1171
def _GetNodeInstances(cfg, node_name):
1172
  """Returns a list of all primary and secondary instances on a node.
1173

1174
  """
1175

    
1176
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177

    
1178

    
1179
def _GetNodePrimaryInstances(cfg, node_name):
1180
  """Returns primary instances on a node.
1181

1182
  """
1183
  return _GetNodeInstancesInner(cfg,
1184
                                lambda inst: node_name == inst.primary_node)
1185

    
1186

    
1187
def _GetNodeSecondaryInstances(cfg, node_name):
1188
  """Returns secondary instances on a node.
1189

1190
  """
1191
  return _GetNodeInstancesInner(cfg,
1192
                                lambda inst: node_name in inst.secondary_nodes)
1193

    
1194

    
1195
def _GetStorageTypeArgs(cfg, storage_type):
1196
  """Returns the arguments for a storage type.
1197

1198
  """
1199
  # Special case for file storage
1200
  if storage_type == constants.ST_FILE:
1201
    # storage.FileStorage wants a list of storage directories
1202
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203

    
1204
  return []
1205

    
1206

    
1207
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208
  faulty = []
1209

    
1210
  for dev in instance.disks:
1211
    cfg.SetDiskID(dev, node_name)
1212

    
1213
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214
  result.Raise("Failed to get disk status from node %s" % node_name,
1215
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216

    
1217
  for idx, bdev_status in enumerate(result.payload):
1218
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219
      faulty.append(idx)
1220

    
1221
  return faulty
1222

    
1223

    
1224
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225
  """Check the sanity of iallocator and node arguments and use the
1226
  cluster-wide iallocator if appropriate.
1227

1228
  Check that at most one of (iallocator, node) is specified. If none is
1229
  specified, then the LU's opcode's iallocator slot is filled with the
1230
  cluster-wide default iallocator.
1231

1232
  @type iallocator_slot: string
1233
  @param iallocator_slot: the name of the opcode iallocator slot
1234
  @type node_slot: string
1235
  @param node_slot: the name of the opcode target node slot
1236

1237
  """
1238
  node = getattr(lu.op, node_slot, None)
1239
  iallocator = getattr(lu.op, iallocator_slot, None)
1240

    
1241
  if node is not None and iallocator is not None:
1242
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243
                               errors.ECODE_INVAL)
1244
  elif node is None and iallocator is None:
1245
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1246
    if default_iallocator:
1247
      setattr(lu.op, iallocator_slot, default_iallocator)
1248
    else:
1249
      raise errors.OpPrereqError("No iallocator or node given and no"
1250
                                 " cluster-wide default iallocator found;"
1251
                                 " please specify either an iallocator or a"
1252
                                 " node, or set a cluster-wide default"
1253
                                 " iallocator")
1254

    
1255

    
1256
def _GetDefaultIAllocator(cfg, iallocator):
1257
  """Decides on which iallocator to use.
1258

1259
  @type cfg: L{config.ConfigWriter}
1260
  @param cfg: Cluster configuration object
1261
  @type iallocator: string or None
1262
  @param iallocator: Iallocator specified in opcode
1263
  @rtype: string
1264
  @return: Iallocator name
1265

1266
  """
1267
  if not iallocator:
1268
    # Use default iallocator
1269
    iallocator = cfg.GetDefaultIAllocator()
1270

    
1271
  if not iallocator:
1272
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273
                               " opcode nor as a cluster-wide default",
1274
                               errors.ECODE_INVAL)
1275

    
1276
  return iallocator
1277

    
1278

    
1279
class LUClusterPostInit(LogicalUnit):
1280
  """Logical unit for running hooks after cluster initialization.
1281

1282
  """
1283
  HPATH = "cluster-init"
1284
  HTYPE = constants.HTYPE_CLUSTER
1285

    
1286
  def BuildHooksEnv(self):
1287
    """Build hooks env.
1288

1289
    """
1290
    return {
1291
      "OP_TARGET": self.cfg.GetClusterName(),
1292
      }
1293

    
1294
  def BuildHooksNodes(self):
1295
    """Build hooks nodes.
1296

1297
    """
1298
    return ([], [self.cfg.GetMasterNode()])
1299

    
1300
  def Exec(self, feedback_fn):
1301
    """Nothing to do.
1302

1303
    """
1304
    return True
1305

    
1306

    
1307
class LUClusterDestroy(LogicalUnit):
1308
  """Logical unit for destroying the cluster.
1309

1310
  """
1311
  HPATH = "cluster-destroy"
1312
  HTYPE = constants.HTYPE_CLUSTER
1313

    
1314
  def BuildHooksEnv(self):
1315
    """Build hooks env.
1316

1317
    """
1318
    return {
1319
      "OP_TARGET": self.cfg.GetClusterName(),
1320
      }
1321

    
1322
  def BuildHooksNodes(self):
1323
    """Build hooks nodes.
1324

1325
    """
1326
    return ([], [])
1327

    
1328
  def CheckPrereq(self):
1329
    """Check prerequisites.
1330

1331
    This checks whether the cluster is empty.
1332

1333
    Any errors are signaled by raising errors.OpPrereqError.
1334

1335
    """
1336
    master = self.cfg.GetMasterNode()
1337

    
1338
    nodelist = self.cfg.GetNodeList()
1339
    if len(nodelist) != 1 or nodelist[0] != master:
1340
      raise errors.OpPrereqError("There are still %d node(s) in"
1341
                                 " this cluster." % (len(nodelist) - 1),
1342
                                 errors.ECODE_INVAL)
1343
    instancelist = self.cfg.GetInstanceList()
1344
    if instancelist:
1345
      raise errors.OpPrereqError("There are still %d instance(s) in"
1346
                                 " this cluster." % len(instancelist),
1347
                                 errors.ECODE_INVAL)
1348

    
1349
  def Exec(self, feedback_fn):
1350
    """Destroys the cluster.
1351

1352
    """
1353
    master = self.cfg.GetMasterNode()
1354

    
1355
    # Run post hooks on master node before it's removed
1356
    _RunPostHook(self, master)
1357

    
1358
    result = self.rpc.call_node_deactivate_master_ip(master)
1359
    result.Raise("Could not disable the master role")
1360

    
1361
    return master
1362

    
1363

    
1364
def _VerifyCertificate(filename):
1365
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1366

1367
  @type filename: string
1368
  @param filename: Path to PEM file
1369

1370
  """
1371
  try:
1372
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373
                                           utils.ReadFile(filename))
1374
  except Exception, err: # pylint: disable=W0703
1375
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1376
            "Failed to load X509 certificate %s: %s" % (filename, err))
1377

    
1378
  (errcode, msg) = \
1379
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380
                                constants.SSL_CERT_EXPIRATION_ERROR)
1381

    
1382
  if msg:
1383
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1384
  else:
1385
    fnamemsg = None
1386

    
1387
  if errcode is None:
1388
    return (None, fnamemsg)
1389
  elif errcode == utils.CERT_WARNING:
1390
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391
  elif errcode == utils.CERT_ERROR:
1392
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393

    
1394
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395

    
1396

    
1397
def _GetAllHypervisorParameters(cluster, instances):
1398
  """Compute the set of all hypervisor parameters.
1399

1400
  @type cluster: L{objects.Cluster}
1401
  @param cluster: the cluster object
1402
  @param instances: list of L{objects.Instance}
1403
  @param instances: additional instances from which to obtain parameters
1404
  @rtype: list of (origin, hypervisor, parameters)
1405
  @return: a list with all parameters found, indicating the hypervisor they
1406
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407

1408
  """
1409
  hvp_data = []
1410

    
1411
  for hv_name in cluster.enabled_hypervisors:
1412
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413

    
1414
  for os_name, os_hvp in cluster.os_hvp.items():
1415
    for hv_name, hv_params in os_hvp.items():
1416
      if hv_params:
1417
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419

    
1420
  # TODO: collapse identical parameter values in a single one
1421
  for instance in instances:
1422
    if instance.hvparams:
1423
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424
                       cluster.FillHV(instance)))
1425

    
1426
  return hvp_data
1427

    
1428

    
1429
class _VerifyErrors(object):
1430
  """Mix-in for cluster/group verify LUs.
1431

1432
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433
  self.op and self._feedback_fn to be available.)
1434

1435
  """
1436

    
1437
  ETYPE_FIELD = "code"
1438
  ETYPE_ERROR = "ERROR"
1439
  ETYPE_WARNING = "WARNING"
1440

    
1441
  def _Error(self, ecode, item, msg, *args, **kwargs):
1442
    """Format an error message.
1443

1444
    Based on the opcode's error_codes parameter, either format a
1445
    parseable error code, or a simpler error string.
1446

1447
    This must be called only from Exec and functions called from Exec.
1448

1449
    """
1450
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1451
    itype, etxt, _ = ecode
1452
    # first complete the msg
1453
    if args:
1454
      msg = msg % args
1455
    # then format the whole message
1456
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1457
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1458
    else:
1459
      if item:
1460
        item = " " + item
1461
      else:
1462
        item = ""
1463
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1464
    # and finally report it via the feedback_fn
1465
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1466

    
1467
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1468
    """Log an error message if the passed condition is True.
1469

1470
    """
1471
    cond = (bool(cond)
1472
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1473

    
1474
    # If the error code is in the list of ignored errors, demote the error to a
1475
    # warning
1476
    (_, etxt, _) = ecode
1477
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1478
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1479

    
1480
    if cond:
1481
      self._Error(ecode, *args, **kwargs)
1482

    
1483
    # do not mark the operation as failed for WARN cases only
1484
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1485
      self.bad = self.bad or cond
1486

    
1487

    
1488
class LUClusterVerify(NoHooksLU):
1489
  """Submits all jobs necessary to verify the cluster.
1490

1491
  """
1492
  REQ_BGL = False
1493

    
1494
  def ExpandNames(self):
1495
    self.needed_locks = {}
1496

    
1497
  def Exec(self, feedback_fn):
1498
    jobs = []
1499

    
1500
    if self.op.group_name:
1501
      groups = [self.op.group_name]
1502
      depends_fn = lambda: None
1503
    else:
1504
      groups = self.cfg.GetNodeGroupList()
1505

    
1506
      # Verify global configuration
1507
      jobs.append([
1508
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1509
        ])
1510

    
1511
      # Always depend on global verification
1512
      depends_fn = lambda: [(-len(jobs), [])]
1513

    
1514
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1515
                                            ignore_errors=self.op.ignore_errors,
1516
                                            depends=depends_fn())]
1517
                for group in groups)
1518

    
1519
    # Fix up all parameters
1520
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1521
      op.debug_simulate_errors = self.op.debug_simulate_errors
1522
      op.verbose = self.op.verbose
1523
      op.error_codes = self.op.error_codes
1524
      try:
1525
        op.skip_checks = self.op.skip_checks
1526
      except AttributeError:
1527
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1528

    
1529
    return ResultWithJobs(jobs)
1530

    
1531

    
1532
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1533
  """Verifies the cluster config.
1534

1535
  """
1536
  REQ_BGL = True
1537

    
1538
  def _VerifyHVP(self, hvp_data):
1539
    """Verifies locally the syntax of the hypervisor parameters.
1540

1541
    """
1542
    for item, hv_name, hv_params in hvp_data:
1543
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1544
             (item, hv_name))
1545
      try:
1546
        hv_class = hypervisor.GetHypervisor(hv_name)
1547
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1548
        hv_class.CheckParameterSyntax(hv_params)
1549
      except errors.GenericError, err:
1550
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1551

    
1552
  def ExpandNames(self):
1553
    # Information can be safely retrieved as the BGL is acquired in exclusive
1554
    # mode
1555
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1556
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1557
    self.all_node_info = self.cfg.GetAllNodesInfo()
1558
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1559
    self.needed_locks = {}
1560

    
1561
  def Exec(self, feedback_fn):
1562
    """Verify integrity of cluster, performing various test on nodes.
1563

1564
    """
1565
    self.bad = False
1566
    self._feedback_fn = feedback_fn
1567

    
1568
    feedback_fn("* Verifying cluster config")
1569

    
1570
    for msg in self.cfg.VerifyConfig():
1571
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1572

    
1573
    feedback_fn("* Verifying cluster certificate files")
1574

    
1575
    for cert_filename in constants.ALL_CERT_FILES:
1576
      (errcode, msg) = _VerifyCertificate(cert_filename)
1577
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1578

    
1579
    feedback_fn("* Verifying hypervisor parameters")
1580

    
1581
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1582
                                                self.all_inst_info.values()))
1583

    
1584
    feedback_fn("* Verifying all nodes belong to an existing group")
1585

    
1586
    # We do this verification here because, should this bogus circumstance
1587
    # occur, it would never be caught by VerifyGroup, which only acts on
1588
    # nodes/instances reachable from existing node groups.
1589

    
1590
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1591
                         if node.group not in self.all_group_info)
1592

    
1593
    dangling_instances = {}
1594
    no_node_instances = []
1595

    
1596
    for inst in self.all_inst_info.values():
1597
      if inst.primary_node in dangling_nodes:
1598
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1599
      elif inst.primary_node not in self.all_node_info:
1600
        no_node_instances.append(inst.name)
1601

    
1602
    pretty_dangling = [
1603
        "%s (%s)" %
1604
        (node.name,
1605
         utils.CommaJoin(dangling_instances.get(node.name,
1606
                                                ["no instances"])))
1607
        for node in dangling_nodes]
1608

    
1609
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1610
                  None,
1611
                  "the following nodes (and their instances) belong to a non"
1612
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1613

    
1614
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1615
                  None,
1616
                  "the following instances have a non-existing primary-node:"
1617
                  " %s", utils.CommaJoin(no_node_instances))
1618

    
1619
    return not self.bad
1620

    
1621

    
1622
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1623
  """Verifies the status of a node group.
1624

1625
  """
1626
  HPATH = "cluster-verify"
1627
  HTYPE = constants.HTYPE_CLUSTER
1628
  REQ_BGL = False
1629

    
1630
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1631

    
1632
  class NodeImage(object):
1633
    """A class representing the logical and physical status of a node.
1634

1635
    @type name: string
1636
    @ivar name: the node name to which this object refers
1637
    @ivar volumes: a structure as returned from
1638
        L{ganeti.backend.GetVolumeList} (runtime)
1639
    @ivar instances: a list of running instances (runtime)
1640
    @ivar pinst: list of configured primary instances (config)
1641
    @ivar sinst: list of configured secondary instances (config)
1642
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1643
        instances for which this node is secondary (config)
1644
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1645
    @ivar dfree: free disk, as reported by the node (runtime)
1646
    @ivar offline: the offline status (config)
1647
    @type rpc_fail: boolean
1648
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1649
        not whether the individual keys were correct) (runtime)
1650
    @type lvm_fail: boolean
1651
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1652
    @type hyp_fail: boolean
1653
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1654
    @type ghost: boolean
1655
    @ivar ghost: whether this is a known node or not (config)
1656
    @type os_fail: boolean
1657
    @ivar os_fail: whether the RPC call didn't return valid OS data
1658
    @type oslist: list
1659
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1660
    @type vm_capable: boolean
1661
    @ivar vm_capable: whether the node can host instances
1662

1663
    """
1664
    def __init__(self, offline=False, name=None, vm_capable=True):
1665
      self.name = name
1666
      self.volumes = {}
1667
      self.instances = []
1668
      self.pinst = []
1669
      self.sinst = []
1670
      self.sbp = {}
1671
      self.mfree = 0
1672
      self.dfree = 0
1673
      self.offline = offline
1674
      self.vm_capable = vm_capable
1675
      self.rpc_fail = False
1676
      self.lvm_fail = False
1677
      self.hyp_fail = False
1678
      self.ghost = False
1679
      self.os_fail = False
1680
      self.oslist = {}
1681

    
1682
  def ExpandNames(self):
1683
    # This raises errors.OpPrereqError on its own:
1684
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1685

    
1686
    # Get instances in node group; this is unsafe and needs verification later
1687
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1688

    
1689
    self.needed_locks = {
1690
      locking.LEVEL_INSTANCE: inst_names,
1691
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1692
      locking.LEVEL_NODE: [],
1693
      }
1694

    
1695
    self.share_locks = _ShareAll()
1696

    
1697
  def DeclareLocks(self, level):
1698
    if level == locking.LEVEL_NODE:
1699
      # Get members of node group; this is unsafe and needs verification later
1700
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1701

    
1702
      all_inst_info = self.cfg.GetAllInstancesInfo()
1703

    
1704
      # In Exec(), we warn about mirrored instances that have primary and
1705
      # secondary living in separate node groups. To fully verify that
1706
      # volumes for these instances are healthy, we will need to do an
1707
      # extra call to their secondaries. We ensure here those nodes will
1708
      # be locked.
1709
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1710
        # Important: access only the instances whose lock is owned
1711
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1712
          nodes.update(all_inst_info[inst].secondary_nodes)
1713

    
1714
      self.needed_locks[locking.LEVEL_NODE] = nodes
1715

    
1716
  def CheckPrereq(self):
1717
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1718
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1719

    
1720
    group_nodes = set(self.group_info.members)
1721
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1722

    
1723
    unlocked_nodes = \
1724
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1725

    
1726
    unlocked_instances = \
1727
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1728

    
1729
    if unlocked_nodes:
1730
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1731
                                 utils.CommaJoin(unlocked_nodes))
1732

    
1733
    if unlocked_instances:
1734
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1735
                                 utils.CommaJoin(unlocked_instances))
1736

    
1737
    self.all_node_info = self.cfg.GetAllNodesInfo()
1738
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1739

    
1740
    self.my_node_names = utils.NiceSort(group_nodes)
1741
    self.my_inst_names = utils.NiceSort(group_instances)
1742

    
1743
    self.my_node_info = dict((name, self.all_node_info[name])
1744
                             for name in self.my_node_names)
1745

    
1746
    self.my_inst_info = dict((name, self.all_inst_info[name])
1747
                             for name in self.my_inst_names)
1748

    
1749
    # We detect here the nodes that will need the extra RPC calls for verifying
1750
    # split LV volumes; they should be locked.
1751
    extra_lv_nodes = set()
1752

    
1753
    for inst in self.my_inst_info.values():
1754
      if inst.disk_template in constants.DTS_INT_MIRROR:
1755
        group = self.my_node_info[inst.primary_node].group
1756
        for nname in inst.secondary_nodes:
1757
          if self.all_node_info[nname].group != group:
1758
            extra_lv_nodes.add(nname)
1759

    
1760
    unlocked_lv_nodes = \
1761
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1762

    
1763
    if unlocked_lv_nodes:
1764
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1765
                                 utils.CommaJoin(unlocked_lv_nodes))
1766
    self.extra_lv_nodes = list(extra_lv_nodes)
1767

    
1768
  def _VerifyNode(self, ninfo, nresult):
1769
    """Perform some basic validation on data returned from a node.
1770

1771
      - check the result data structure is well formed and has all the
1772
        mandatory fields
1773
      - check ganeti version
1774

1775
    @type ninfo: L{objects.Node}
1776
    @param ninfo: the node to check
1777
    @param nresult: the results from the node
1778
    @rtype: boolean
1779
    @return: whether overall this call was successful (and we can expect
1780
         reasonable values in the respose)
1781

1782
    """
1783
    node = ninfo.name
1784
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1785

    
1786
    # main result, nresult should be a non-empty dict
1787
    test = not nresult or not isinstance(nresult, dict)
1788
    _ErrorIf(test, constants.CV_ENODERPC, node,
1789
                  "unable to verify node: no data returned")
1790
    if test:
1791
      return False
1792

    
1793
    # compares ganeti version
1794
    local_version = constants.PROTOCOL_VERSION
1795
    remote_version = nresult.get("version", None)
1796
    test = not (remote_version and
1797
                isinstance(remote_version, (list, tuple)) and
1798
                len(remote_version) == 2)
1799
    _ErrorIf(test, constants.CV_ENODERPC, node,
1800
             "connection to node returned invalid data")
1801
    if test:
1802
      return False
1803

    
1804
    test = local_version != remote_version[0]
1805
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1806
             "incompatible protocol versions: master %s,"
1807
             " node %s", local_version, remote_version[0])
1808
    if test:
1809
      return False
1810

    
1811
    # node seems compatible, we can actually try to look into its results
1812

    
1813
    # full package version
1814
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1815
                  constants.CV_ENODEVERSION, node,
1816
                  "software version mismatch: master %s, node %s",
1817
                  constants.RELEASE_VERSION, remote_version[1],
1818
                  code=self.ETYPE_WARNING)
1819

    
1820
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1821
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1822
      for hv_name, hv_result in hyp_result.iteritems():
1823
        test = hv_result is not None
1824
        _ErrorIf(test, constants.CV_ENODEHV, node,
1825
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1826

    
1827
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1828
    if ninfo.vm_capable and isinstance(hvp_result, list):
1829
      for item, hv_name, hv_result in hvp_result:
1830
        _ErrorIf(True, constants.CV_ENODEHV, node,
1831
                 "hypervisor %s parameter verify failure (source %s): %s",
1832
                 hv_name, item, hv_result)
1833

    
1834
    test = nresult.get(constants.NV_NODESETUP,
1835
                       ["Missing NODESETUP results"])
1836
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1837
             "; ".join(test))
1838

    
1839
    return True
1840

    
1841
  def _VerifyNodeTime(self, ninfo, nresult,
1842
                      nvinfo_starttime, nvinfo_endtime):
1843
    """Check the node time.
1844

1845
    @type ninfo: L{objects.Node}
1846
    @param ninfo: the node to check
1847
    @param nresult: the remote results for the node
1848
    @param nvinfo_starttime: the start time of the RPC call
1849
    @param nvinfo_endtime: the end time of the RPC call
1850

1851
    """
1852
    node = ninfo.name
1853
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1854

    
1855
    ntime = nresult.get(constants.NV_TIME, None)
1856
    try:
1857
      ntime_merged = utils.MergeTime(ntime)
1858
    except (ValueError, TypeError):
1859
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1860
      return
1861

    
1862
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1863
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1864
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1865
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1866
    else:
1867
      ntime_diff = None
1868

    
1869
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1870
             "Node time diverges by at least %s from master node time",
1871
             ntime_diff)
1872

    
1873
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1874
    """Check the node LVM results.
1875

1876
    @type ninfo: L{objects.Node}
1877
    @param ninfo: the node to check
1878
    @param nresult: the remote results for the node
1879
    @param vg_name: the configured VG name
1880

1881
    """
1882
    if vg_name is None:
1883
      return
1884

    
1885
    node = ninfo.name
1886
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1887

    
1888
    # checks vg existence and size > 20G
1889
    vglist = nresult.get(constants.NV_VGLIST, None)
1890
    test = not vglist
1891
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1892
    if not test:
1893
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1894
                                            constants.MIN_VG_SIZE)
1895
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1896

    
1897
    # check pv names
1898
    pvlist = nresult.get(constants.NV_PVLIST, None)
1899
    test = pvlist is None
1900
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1901
    if not test:
1902
      # check that ':' is not present in PV names, since it's a
1903
      # special character for lvcreate (denotes the range of PEs to
1904
      # use on the PV)
1905
      for _, pvname, owner_vg in pvlist:
1906
        test = ":" in pvname
1907
        _ErrorIf(test, constants.CV_ENODELVM, node,
1908
                 "Invalid character ':' in PV '%s' of VG '%s'",
1909
                 pvname, owner_vg)
1910

    
1911
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1912
    """Check the node bridges.
1913

1914
    @type ninfo: L{objects.Node}
1915
    @param ninfo: the node to check
1916
    @param nresult: the remote results for the node
1917
    @param bridges: the expected list of bridges
1918

1919
    """
1920
    if not bridges:
1921
      return
1922

    
1923
    node = ninfo.name
1924
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1925

    
1926
    missing = nresult.get(constants.NV_BRIDGES, None)
1927
    test = not isinstance(missing, list)
1928
    _ErrorIf(test, constants.CV_ENODENET, node,
1929
             "did not return valid bridge information")
1930
    if not test:
1931
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1932
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1933

    
1934
  def _VerifyNodeNetwork(self, ninfo, nresult):
1935
    """Check the node network connectivity results.
1936

1937
    @type ninfo: L{objects.Node}
1938
    @param ninfo: the node to check
1939
    @param nresult: the remote results for the node
1940

1941
    """
1942
    node = ninfo.name
1943
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1944

    
1945
    test = constants.NV_NODELIST not in nresult
1946
    _ErrorIf(test, constants.CV_ENODESSH, node,
1947
             "node hasn't returned node ssh connectivity data")
1948
    if not test:
1949
      if nresult[constants.NV_NODELIST]:
1950
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1951
          _ErrorIf(True, constants.CV_ENODESSH, node,
1952
                   "ssh communication with node '%s': %s", a_node, a_msg)
1953

    
1954
    test = constants.NV_NODENETTEST not in nresult
1955
    _ErrorIf(test, constants.CV_ENODENET, node,
1956
             "node hasn't returned node tcp connectivity data")
1957
    if not test:
1958
      if nresult[constants.NV_NODENETTEST]:
1959
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1960
        for anode in nlist:
1961
          _ErrorIf(True, constants.CV_ENODENET, node,
1962
                   "tcp communication with node '%s': %s",
1963
                   anode, nresult[constants.NV_NODENETTEST][anode])
1964

    
1965
    test = constants.NV_MASTERIP not in nresult
1966
    _ErrorIf(test, constants.CV_ENODENET, node,
1967
             "node hasn't returned node master IP reachability data")
1968
    if not test:
1969
      if not nresult[constants.NV_MASTERIP]:
1970
        if node == self.master_node:
1971
          msg = "the master node cannot reach the master IP (not configured?)"
1972
        else:
1973
          msg = "cannot reach the master IP"
1974
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
1975

    
1976
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1977
                      diskstatus):
1978
    """Verify an instance.
1979

1980
    This function checks to see if the required block devices are
1981
    available on the instance's node.
1982

1983
    """
1984
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1985
    node_current = instanceconfig.primary_node
1986

    
1987
    node_vol_should = {}
1988
    instanceconfig.MapLVsByNode(node_vol_should)
1989

    
1990
    for node in node_vol_should:
1991
      n_img = node_image[node]
1992
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1993
        # ignore missing volumes on offline or broken nodes
1994
        continue
1995
      for volume in node_vol_should[node]:
1996
        test = volume not in n_img.volumes
1997
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
1998
                 "volume %s missing on node %s", volume, node)
1999

    
2000
    if instanceconfig.admin_up:
2001
      pri_img = node_image[node_current]
2002
      test = instance not in pri_img.instances and not pri_img.offline
2003
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2004
               "instance not running on its primary node %s",
2005
               node_current)
2006

    
2007
    diskdata = [(nname, success, status, idx)
2008
                for (nname, disks) in diskstatus.items()
2009
                for idx, (success, status) in enumerate(disks)]
2010

    
2011
    for nname, success, bdev_status, idx in diskdata:
2012
      # the 'ghost node' construction in Exec() ensures that we have a
2013
      # node here
2014
      snode = node_image[nname]
2015
      bad_snode = snode.ghost or snode.offline
2016
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2017
               constants.CV_EINSTANCEFAULTYDISK, instance,
2018
               "couldn't retrieve status for disk/%s on %s: %s",
2019
               idx, nname, bdev_status)
2020
      _ErrorIf((instanceconfig.admin_up and success and
2021
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2022
               constants.CV_EINSTANCEFAULTYDISK, instance,
2023
               "disk/%s on %s is faulty", idx, nname)
2024

    
2025
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2026
    """Verify if there are any unknown volumes in the cluster.
2027

2028
    The .os, .swap and backup volumes are ignored. All other volumes are
2029
    reported as unknown.
2030

2031
    @type reserved: L{ganeti.utils.FieldSet}
2032
    @param reserved: a FieldSet of reserved volume names
2033

2034
    """
2035
    for node, n_img in node_image.items():
2036
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2037
        # skip non-healthy nodes
2038
        continue
2039
      for volume in n_img.volumes:
2040
        test = ((node not in node_vol_should or
2041
                volume not in node_vol_should[node]) and
2042
                not reserved.Matches(volume))
2043
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2044
                      "volume %s is unknown", volume)
2045

    
2046
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2047
    """Verify N+1 Memory Resilience.
2048

2049
    Check that if one single node dies we can still start all the
2050
    instances it was primary for.
2051

2052
    """
2053
    cluster_info = self.cfg.GetClusterInfo()
2054
    for node, n_img in node_image.items():
2055
      # This code checks that every node which is now listed as
2056
      # secondary has enough memory to host all instances it is
2057
      # supposed to should a single other node in the cluster fail.
2058
      # FIXME: not ready for failover to an arbitrary node
2059
      # FIXME: does not support file-backed instances
2060
      # WARNING: we currently take into account down instances as well
2061
      # as up ones, considering that even if they're down someone
2062
      # might want to start them even in the event of a node failure.
2063
      if n_img.offline:
2064
        # we're skipping offline nodes from the N+1 warning, since
2065
        # most likely we don't have good memory infromation from them;
2066
        # we already list instances living on such nodes, and that's
2067
        # enough warning
2068
        continue
2069
      for prinode, instances in n_img.sbp.items():
2070
        needed_mem = 0
2071
        for instance in instances:
2072
          bep = cluster_info.FillBE(instance_cfg[instance])
2073
          if bep[constants.BE_AUTO_BALANCE]:
2074
            needed_mem += bep[constants.BE_MEMORY]
2075
        test = n_img.mfree < needed_mem
2076
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2077
                      "not enough memory to accomodate instance failovers"
2078
                      " should node %s fail (%dMiB needed, %dMiB available)",
2079
                      prinode, needed_mem, n_img.mfree)
2080

    
2081
  @classmethod
2082
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2083
                   (files_all, files_all_opt, files_mc, files_vm)):
2084
    """Verifies file checksums collected from all nodes.
2085

2086
    @param errorif: Callback for reporting errors
2087
    @param nodeinfo: List of L{objects.Node} objects
2088
    @param master_node: Name of master node
2089
    @param all_nvinfo: RPC results
2090

2091
    """
2092
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2093
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2094
           "Found file listed in more than one file list"
2095

    
2096
    # Define functions determining which nodes to consider for a file
2097
    files2nodefn = [
2098
      (files_all, None),
2099
      (files_all_opt, None),
2100
      (files_mc, lambda node: (node.master_candidate or
2101
                               node.name == master_node)),
2102
      (files_vm, lambda node: node.vm_capable),
2103
      ]
2104

    
2105
    # Build mapping from filename to list of nodes which should have the file
2106
    nodefiles = {}
2107
    for (files, fn) in files2nodefn:
2108
      if fn is None:
2109
        filenodes = nodeinfo
2110
      else:
2111
        filenodes = filter(fn, nodeinfo)
2112
      nodefiles.update((filename,
2113
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2114
                       for filename in files)
2115

    
2116
    assert set(nodefiles) == (files_all | files_all_opt | files_mc | files_vm)
2117

    
2118
    fileinfo = dict((filename, {}) for filename in nodefiles)
2119
    ignore_nodes = set()
2120

    
2121
    for node in nodeinfo:
2122
      if node.offline:
2123
        ignore_nodes.add(node.name)
2124
        continue
2125

    
2126
      nresult = all_nvinfo[node.name]
2127

    
2128
      if nresult.fail_msg or not nresult.payload:
2129
        node_files = None
2130
      else:
2131
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2132

    
2133
      test = not (node_files and isinstance(node_files, dict))
2134
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2135
              "Node did not return file checksum data")
2136
      if test:
2137
        ignore_nodes.add(node.name)
2138
        continue
2139

    
2140
      # Build per-checksum mapping from filename to nodes having it
2141
      for (filename, checksum) in node_files.items():
2142
        assert filename in nodefiles
2143
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2144

    
2145
    for (filename, checksums) in fileinfo.items():
2146
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2147

    
2148
      # Nodes having the file
2149
      with_file = frozenset(node_name
2150
                            for nodes in fileinfo[filename].values()
2151
                            for node_name in nodes) - ignore_nodes
2152

    
2153
      expected_nodes = nodefiles[filename] - ignore_nodes
2154

    
2155
      # Nodes missing file
2156
      missing_file = expected_nodes - with_file
2157

    
2158
      if filename in files_all_opt:
2159
        # All or no nodes
2160
        errorif(missing_file and missing_file != expected_nodes,
2161
                constants.CV_ECLUSTERFILECHECK, None,
2162
                "File %s is optional, but it must exist on all or no"
2163
                " nodes (not found on %s)",
2164
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2165
      else:
2166
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2167
                "File %s is missing from node(s) %s", filename,
2168
                utils.CommaJoin(utils.NiceSort(missing_file)))
2169

    
2170
        # Warn if a node has a file it shouldn't
2171
        unexpected = with_file - expected_nodes
2172
        errorif(unexpected,
2173
                constants.CV_ECLUSTERFILECHECK, None,
2174
                "File %s should not exist on node(s) %s",
2175
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2176

    
2177
      # See if there are multiple versions of the file
2178
      test = len(checksums) > 1
2179
      if test:
2180
        variants = ["variant %s on %s" %
2181
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2182
                    for (idx, (checksum, nodes)) in
2183
                      enumerate(sorted(checksums.items()))]
2184
      else:
2185
        variants = []
2186

    
2187
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2188
              "File %s found with %s different checksums (%s)",
2189
              filename, len(checksums), "; ".join(variants))
2190

    
2191
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2192
                      drbd_map):
2193
    """Verifies and the node DRBD status.
2194

2195
    @type ninfo: L{objects.Node}
2196
    @param ninfo: the node to check
2197
    @param nresult: the remote results for the node
2198
    @param instanceinfo: the dict of instances
2199
    @param drbd_helper: the configured DRBD usermode helper
2200
    @param drbd_map: the DRBD map as returned by
2201
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2202

2203
    """
2204
    node = ninfo.name
2205
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2206

    
2207
    if drbd_helper:
2208
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2209
      test = (helper_result == None)
2210
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2211
               "no drbd usermode helper returned")
2212
      if helper_result:
2213
        status, payload = helper_result
2214
        test = not status
2215
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2216
                 "drbd usermode helper check unsuccessful: %s", payload)
2217
        test = status and (payload != drbd_helper)
2218
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2219
                 "wrong drbd usermode helper: %s", payload)
2220

    
2221
    # compute the DRBD minors
2222
    node_drbd = {}
2223
    for minor, instance in drbd_map[node].items():
2224
      test = instance not in instanceinfo
2225
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2226
               "ghost instance '%s' in temporary DRBD map", instance)
2227
        # ghost instance should not be running, but otherwise we
2228
        # don't give double warnings (both ghost instance and
2229
        # unallocated minor in use)
2230
      if test:
2231
        node_drbd[minor] = (instance, False)
2232
      else:
2233
        instance = instanceinfo[instance]
2234
        node_drbd[minor] = (instance.name, instance.admin_up)
2235

    
2236
    # and now check them
2237
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2238
    test = not isinstance(used_minors, (tuple, list))
2239
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2240
             "cannot parse drbd status file: %s", str(used_minors))
2241
    if test:
2242
      # we cannot check drbd status
2243
      return
2244

    
2245
    for minor, (iname, must_exist) in node_drbd.items():
2246
      test = minor not in used_minors and must_exist
2247
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2248
               "drbd minor %d of instance %s is not active", minor, iname)
2249
    for minor in used_minors:
2250
      test = minor not in node_drbd
2251
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2252
               "unallocated drbd minor %d is in use", minor)
2253

    
2254
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2255
    """Builds the node OS structures.
2256

2257
    @type ninfo: L{objects.Node}
2258
    @param ninfo: the node to check
2259
    @param nresult: the remote results for the node
2260
    @param nimg: the node image object
2261

2262
    """
2263
    node = ninfo.name
2264
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2265

    
2266
    remote_os = nresult.get(constants.NV_OSLIST, None)
2267
    test = (not isinstance(remote_os, list) or
2268
            not compat.all(isinstance(v, list) and len(v) == 7
2269
                           for v in remote_os))
2270

    
2271
    _ErrorIf(test, constants.CV_ENODEOS, node,
2272
             "node hasn't returned valid OS data")
2273

    
2274
    nimg.os_fail = test
2275

    
2276
    if test:
2277
      return
2278

    
2279
    os_dict = {}
2280

    
2281
    for (name, os_path, status, diagnose,
2282
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2283

    
2284
      if name not in os_dict:
2285
        os_dict[name] = []
2286

    
2287
      # parameters is a list of lists instead of list of tuples due to
2288
      # JSON lacking a real tuple type, fix it:
2289
      parameters = [tuple(v) for v in parameters]
2290
      os_dict[name].append((os_path, status, diagnose,
2291
                            set(variants), set(parameters), set(api_ver)))
2292

    
2293
    nimg.oslist = os_dict
2294

    
2295
  def _VerifyNodeOS(self, ninfo, nimg, base):
2296
    """Verifies the node OS list.
2297

2298
    @type ninfo: L{objects.Node}
2299
    @param ninfo: the node to check
2300
    @param nimg: the node image object
2301
    @param base: the 'template' node we match against (e.g. from the master)
2302

2303
    """
2304
    node = ninfo.name
2305
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2306

    
2307
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2308

    
2309
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2310
    for os_name, os_data in nimg.oslist.items():
2311
      assert os_data, "Empty OS status for OS %s?!" % os_name
2312
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2313
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2314
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2315
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2316
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2317
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2318
      # comparisons with the 'base' image
2319
      test = os_name not in base.oslist
2320
      _ErrorIf(test, constants.CV_ENODEOS, node,
2321
               "Extra OS %s not present on reference node (%s)",
2322
               os_name, base.name)
2323
      if test:
2324
        continue
2325
      assert base.oslist[os_name], "Base node has empty OS status?"
2326
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2327
      if not b_status:
2328
        # base OS is invalid, skipping
2329
        continue
2330
      for kind, a, b in [("API version", f_api, b_api),
2331
                         ("variants list", f_var, b_var),
2332
                         ("parameters", beautify_params(f_param),
2333
                          beautify_params(b_param))]:
2334
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2335
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2336
                 kind, os_name, base.name,
2337
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2338

    
2339
    # check any missing OSes
2340
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2341
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2342
             "OSes present on reference node %s but missing on this node: %s",
2343
             base.name, utils.CommaJoin(missing))
2344

    
2345
  def _VerifyOob(self, ninfo, nresult):
2346
    """Verifies out of band functionality of a node.
2347

2348
    @type ninfo: L{objects.Node}
2349
    @param ninfo: the node to check
2350
    @param nresult: the remote results for the node
2351

2352
    """
2353
    node = ninfo.name
2354
    # We just have to verify the paths on master and/or master candidates
2355
    # as the oob helper is invoked on the master
2356
    if ((ninfo.master_candidate or ninfo.master_capable) and
2357
        constants.NV_OOB_PATHS in nresult):
2358
      for path_result in nresult[constants.NV_OOB_PATHS]:
2359
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2360

    
2361
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2362
    """Verifies and updates the node volume data.
2363

2364
    This function will update a L{NodeImage}'s internal structures
2365
    with data from the remote call.
2366

2367
    @type ninfo: L{objects.Node}
2368
    @param ninfo: the node to check
2369
    @param nresult: the remote results for the node
2370
    @param nimg: the node image object
2371
    @param vg_name: the configured VG name
2372

2373
    """
2374
    node = ninfo.name
2375
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2376

    
2377
    nimg.lvm_fail = True
2378
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2379
    if vg_name is None:
2380
      pass
2381
    elif isinstance(lvdata, basestring):
2382
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2383
               utils.SafeEncode(lvdata))
2384
    elif not isinstance(lvdata, dict):
2385
      _ErrorIf(True, constants.CV_ENODELVM, node,
2386
               "rpc call to node failed (lvlist)")
2387
    else:
2388
      nimg.volumes = lvdata
2389
      nimg.lvm_fail = False
2390

    
2391
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2392
    """Verifies and updates the node instance list.
2393

2394
    If the listing was successful, then updates this node's instance
2395
    list. Otherwise, it marks the RPC call as failed for the instance
2396
    list key.
2397

2398
    @type ninfo: L{objects.Node}
2399
    @param ninfo: the node to check
2400
    @param nresult: the remote results for the node
2401
    @param nimg: the node image object
2402

2403
    """
2404
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2405
    test = not isinstance(idata, list)
2406
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2407
                  "rpc call to node failed (instancelist): %s",
2408
                  utils.SafeEncode(str(idata)))
2409
    if test:
2410
      nimg.hyp_fail = True
2411
    else:
2412
      nimg.instances = idata
2413

    
2414
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2415
    """Verifies and computes a node information map
2416

2417
    @type ninfo: L{objects.Node}
2418
    @param ninfo: the node to check
2419
    @param nresult: the remote results for the node
2420
    @param nimg: the node image object
2421
    @param vg_name: the configured VG name
2422

2423
    """
2424
    node = ninfo.name
2425
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2426

    
2427
    # try to read free memory (from the hypervisor)
2428
    hv_info = nresult.get(constants.NV_HVINFO, None)
2429
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2430
    _ErrorIf(test, constants.CV_ENODEHV, node,
2431
             "rpc call to node failed (hvinfo)")
2432
    if not test:
2433
      try:
2434
        nimg.mfree = int(hv_info["memory_free"])
2435
      except (ValueError, TypeError):
2436
        _ErrorIf(True, constants.CV_ENODERPC, node,
2437
                 "node returned invalid nodeinfo, check hypervisor")
2438

    
2439
    # FIXME: devise a free space model for file based instances as well
2440
    if vg_name is not None:
2441
      test = (constants.NV_VGLIST not in nresult or
2442
              vg_name not in nresult[constants.NV_VGLIST])
2443
      _ErrorIf(test, constants.CV_ENODELVM, node,
2444
               "node didn't return data for the volume group '%s'"
2445
               " - it is either missing or broken", vg_name)
2446
      if not test:
2447
        try:
2448
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2449
        except (ValueError, TypeError):
2450
          _ErrorIf(True, constants.CV_ENODERPC, node,
2451
                   "node returned invalid LVM info, check LVM status")
2452

    
2453
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2454
    """Gets per-disk status information for all instances.
2455

2456
    @type nodelist: list of strings
2457
    @param nodelist: Node names
2458
    @type node_image: dict of (name, L{objects.Node})
2459
    @param node_image: Node objects
2460
    @type instanceinfo: dict of (name, L{objects.Instance})
2461
    @param instanceinfo: Instance objects
2462
    @rtype: {instance: {node: [(succes, payload)]}}
2463
    @return: a dictionary of per-instance dictionaries with nodes as
2464
        keys and disk information as values; the disk information is a
2465
        list of tuples (success, payload)
2466

2467
    """
2468
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2469

    
2470
    node_disks = {}
2471
    node_disks_devonly = {}
2472
    diskless_instances = set()
2473
    diskless = constants.DT_DISKLESS
2474

    
2475
    for nname in nodelist:
2476
      node_instances = list(itertools.chain(node_image[nname].pinst,
2477
                                            node_image[nname].sinst))
2478
      diskless_instances.update(inst for inst in node_instances
2479
                                if instanceinfo[inst].disk_template == diskless)
2480
      disks = [(inst, disk)
2481
               for inst in node_instances
2482
               for disk in instanceinfo[inst].disks]
2483

    
2484
      if not disks:
2485
        # No need to collect data
2486
        continue
2487

    
2488
      node_disks[nname] = disks
2489

    
2490
      # Creating copies as SetDiskID below will modify the objects and that can
2491
      # lead to incorrect data returned from nodes
2492
      devonly = [dev.Copy() for (_, dev) in disks]
2493

    
2494
      for dev in devonly:
2495
        self.cfg.SetDiskID(dev, nname)
2496

    
2497
      node_disks_devonly[nname] = devonly
2498

    
2499
    assert len(node_disks) == len(node_disks_devonly)
2500

    
2501
    # Collect data from all nodes with disks
2502
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2503
                                                          node_disks_devonly)
2504

    
2505
    assert len(result) == len(node_disks)
2506

    
2507
    instdisk = {}
2508

    
2509
    for (nname, nres) in result.items():
2510
      disks = node_disks[nname]
2511

    
2512
      if nres.offline:
2513
        # No data from this node
2514
        data = len(disks) * [(False, "node offline")]
2515
      else:
2516
        msg = nres.fail_msg
2517
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2518
                 "while getting disk information: %s", msg)
2519
        if msg:
2520
          # No data from this node
2521
          data = len(disks) * [(False, msg)]
2522
        else:
2523
          data = []
2524
          for idx, i in enumerate(nres.payload):
2525
            if isinstance(i, (tuple, list)) and len(i) == 2:
2526
              data.append(i)
2527
            else:
2528
              logging.warning("Invalid result from node %s, entry %d: %s",
2529
                              nname, idx, i)
2530
              data.append((False, "Invalid result from the remote node"))
2531

    
2532
      for ((inst, _), status) in zip(disks, data):
2533
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2534

    
2535
    # Add empty entries for diskless instances.
2536
    for inst in diskless_instances:
2537
      assert inst not in instdisk
2538
      instdisk[inst] = {}
2539

    
2540
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2541
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2542
                      compat.all(isinstance(s, (tuple, list)) and
2543
                                 len(s) == 2 for s in statuses)
2544
                      for inst, nnames in instdisk.items()
2545
                      for nname, statuses in nnames.items())
2546
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2547

    
2548
    return instdisk
2549

    
2550
  @staticmethod
2551
  def _SshNodeSelector(group_uuid, all_nodes):
2552
    """Create endless iterators for all potential SSH check hosts.
2553

2554
    """
2555
    nodes = [node for node in all_nodes
2556
             if (node.group != group_uuid and
2557
                 not node.offline)]
2558
    keyfunc = operator.attrgetter("group")
2559

    
2560
    return map(itertools.cycle,
2561
               [sorted(map(operator.attrgetter("name"), names))
2562
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2563
                                                  keyfunc)])
2564

    
2565
  @classmethod
2566
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2567
    """Choose which nodes should talk to which other nodes.
2568

2569
    We will make nodes contact all nodes in their group, and one node from
2570
    every other group.
2571

2572
    @warning: This algorithm has a known issue if one node group is much
2573
      smaller than others (e.g. just one node). In such a case all other
2574
      nodes will talk to the single node.
2575

2576
    """
2577
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2578
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2579

    
2580
    return (online_nodes,
2581
            dict((name, sorted([i.next() for i in sel]))
2582
                 for name in online_nodes))
2583

    
2584
  def BuildHooksEnv(self):
2585
    """Build hooks env.
2586

2587
    Cluster-Verify hooks just ran in the post phase and their failure makes
2588
    the output be logged in the verify output and the verification to fail.
2589

2590
    """
2591
    env = {
2592
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2593
      }
2594

    
2595
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2596
               for node in self.my_node_info.values())
2597

    
2598
    return env
2599

    
2600
  def BuildHooksNodes(self):
2601
    """Build hooks nodes.
2602

2603
    """
2604
    return ([], self.my_node_names)
2605

    
2606
  def Exec(self, feedback_fn):
2607
    """Verify integrity of the node group, performing various test on nodes.
2608

2609
    """
2610
    # This method has too many local variables. pylint: disable=R0914
2611
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2612

    
2613
    if not self.my_node_names:
2614
      # empty node group
2615
      feedback_fn("* Empty node group, skipping verification")
2616
      return True
2617

    
2618
    self.bad = False
2619
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2620
    verbose = self.op.verbose
2621
    self._feedback_fn = feedback_fn
2622

    
2623
    vg_name = self.cfg.GetVGName()
2624
    drbd_helper = self.cfg.GetDRBDHelper()
2625
    cluster = self.cfg.GetClusterInfo()
2626
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2627
    hypervisors = cluster.enabled_hypervisors
2628
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2629

    
2630
    i_non_redundant = [] # Non redundant instances
2631
    i_non_a_balanced = [] # Non auto-balanced instances
2632
    n_offline = 0 # Count of offline nodes
2633
    n_drained = 0 # Count of nodes being drained
2634
    node_vol_should = {}
2635

    
2636
    # FIXME: verify OS list
2637

    
2638
    # File verification
2639
    filemap = _ComputeAncillaryFiles(cluster, False)
2640

    
2641
    # do local checksums
2642
    master_node = self.master_node = self.cfg.GetMasterNode()
2643
    master_ip = self.cfg.GetMasterIP()
2644

    
2645
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2646

    
2647
    node_verify_param = {
2648
      constants.NV_FILELIST:
2649
        utils.UniqueSequence(filename
2650
                             for files in filemap
2651
                             for filename in files),
2652
      constants.NV_NODELIST:
2653
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2654
                                  self.all_node_info.values()),
2655
      constants.NV_HYPERVISOR: hypervisors,
2656
      constants.NV_HVPARAMS:
2657
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2658
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2659
                                 for node in node_data_list
2660
                                 if not node.offline],
2661
      constants.NV_INSTANCELIST: hypervisors,
2662
      constants.NV_VERSION: None,
2663
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2664
      constants.NV_NODESETUP: None,
2665
      constants.NV_TIME: None,
2666
      constants.NV_MASTERIP: (master_node, master_ip),
2667
      constants.NV_OSLIST: None,
2668
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2669
      }
2670

    
2671
    if vg_name is not None:
2672
      node_verify_param[constants.NV_VGLIST] = None
2673
      node_verify_param[constants.NV_LVLIST] = vg_name
2674
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2675
      node_verify_param[constants.NV_DRBDLIST] = None
2676

    
2677
    if drbd_helper:
2678
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2679

    
2680
    # bridge checks
2681
    # FIXME: this needs to be changed per node-group, not cluster-wide
2682
    bridges = set()
2683
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2684
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2685
      bridges.add(default_nicpp[constants.NIC_LINK])
2686
    for instance in self.my_inst_info.values():
2687
      for nic in instance.nics:
2688
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2689
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2690
          bridges.add(full_nic[constants.NIC_LINK])
2691

    
2692
    if bridges:
2693
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2694

    
2695
    # Build our expected cluster state
2696
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2697
                                                 name=node.name,
2698
                                                 vm_capable=node.vm_capable))
2699
                      for node in node_data_list)
2700

    
2701
    # Gather OOB paths
2702
    oob_paths = []
2703
    for node in self.all_node_info.values():
2704
      path = _SupportsOob(self.cfg, node)
2705
      if path and path not in oob_paths:
2706
        oob_paths.append(path)
2707

    
2708
    if oob_paths:
2709
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2710

    
2711
    for instance in self.my_inst_names:
2712
      inst_config = self.my_inst_info[instance]
2713

    
2714
      for nname in inst_config.all_nodes:
2715
        if nname not in node_image:
2716
          gnode = self.NodeImage(name=nname)
2717
          gnode.ghost = (nname not in self.all_node_info)
2718
          node_image[nname] = gnode
2719

    
2720
      inst_config.MapLVsByNode(node_vol_should)
2721

    
2722
      pnode = inst_config.primary_node
2723
      node_image[pnode].pinst.append(instance)
2724

    
2725
      for snode in inst_config.secondary_nodes:
2726
        nimg = node_image[snode]
2727
        nimg.sinst.append(instance)
2728
        if pnode not in nimg.sbp:
2729
          nimg.sbp[pnode] = []
2730
        nimg.sbp[pnode].append(instance)
2731

    
2732
    # At this point, we have the in-memory data structures complete,
2733
    # except for the runtime information, which we'll gather next
2734

    
2735
    # Due to the way our RPC system works, exact response times cannot be
2736
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2737
    # time before and after executing the request, we can at least have a time
2738
    # window.
2739
    nvinfo_starttime = time.time()
2740
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2741
                                           node_verify_param,
2742
                                           self.cfg.GetClusterName())
2743
    nvinfo_endtime = time.time()
2744

    
2745
    if self.extra_lv_nodes and vg_name is not None:
2746
      extra_lv_nvinfo = \
2747
          self.rpc.call_node_verify(self.extra_lv_nodes,
2748
                                    {constants.NV_LVLIST: vg_name},
2749
                                    self.cfg.GetClusterName())
2750
    else:
2751
      extra_lv_nvinfo = {}
2752

    
2753
    all_drbd_map = self.cfg.ComputeDRBDMap()
2754

    
2755
    feedback_fn("* Gathering disk information (%s nodes)" %
2756
                len(self.my_node_names))
2757
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2758
                                     self.my_inst_info)
2759

    
2760
    feedback_fn("* Verifying configuration file consistency")
2761

    
2762
    # If not all nodes are being checked, we need to make sure the master node
2763
    # and a non-checked vm_capable node are in the list.
2764
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2765
    if absent_nodes:
2766
      vf_nvinfo = all_nvinfo.copy()
2767
      vf_node_info = list(self.my_node_info.values())
2768
      additional_nodes = []
2769
      if master_node not in self.my_node_info:
2770
        additional_nodes.append(master_node)
2771
        vf_node_info.append(self.all_node_info[master_node])
2772
      # Add the first vm_capable node we find which is not included
2773
      for node in absent_nodes:
2774
        nodeinfo = self.all_node_info[node]
2775
        if nodeinfo.vm_capable and not nodeinfo.offline:
2776
          additional_nodes.append(node)
2777
          vf_node_info.append(self.all_node_info[node])
2778
          break
2779
      key = constants.NV_FILELIST
2780
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2781
                                                 {key: node_verify_param[key]},
2782
                                                 self.cfg.GetClusterName()))
2783
    else:
2784
      vf_nvinfo = all_nvinfo
2785
      vf_node_info = self.my_node_info.values()
2786

    
2787
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2788

    
2789
    feedback_fn("* Verifying node status")
2790

    
2791
    refos_img = None
2792

    
2793
    for node_i in node_data_list:
2794
      node = node_i.name
2795
      nimg = node_image[node]
2796

    
2797
      if node_i.offline:
2798
        if verbose:
2799
          feedback_fn("* Skipping offline node %s" % (node,))
2800
        n_offline += 1
2801
        continue
2802

    
2803
      if node == master_node:
2804
        ntype = "master"
2805
      elif node_i.master_candidate:
2806
        ntype = "master candidate"
2807
      elif node_i.drained:
2808
        ntype = "drained"
2809
        n_drained += 1
2810
      else:
2811
        ntype = "regular"
2812
      if verbose:
2813
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2814

    
2815
      msg = all_nvinfo[node].fail_msg
2816
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2817
               msg)
2818
      if msg:
2819
        nimg.rpc_fail = True
2820
        continue
2821

    
2822
      nresult = all_nvinfo[node].payload
2823

    
2824
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2825
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2826
      self._VerifyNodeNetwork(node_i, nresult)
2827
      self._VerifyOob(node_i, nresult)
2828

    
2829
      if nimg.vm_capable:
2830
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2831
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2832
                             all_drbd_map)
2833

    
2834
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2835
        self._UpdateNodeInstances(node_i, nresult, nimg)
2836
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2837
        self._UpdateNodeOS(node_i, nresult, nimg)
2838

    
2839
        if not nimg.os_fail:
2840
          if refos_img is None:
2841
            refos_img = nimg
2842
          self._VerifyNodeOS(node_i, nimg, refos_img)
2843
        self._VerifyNodeBridges(node_i, nresult, bridges)
2844

    
2845
        # Check whether all running instancies are primary for the node. (This
2846
        # can no longer be done from _VerifyInstance below, since some of the
2847
        # wrong instances could be from other node groups.)
2848
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2849

    
2850
        for inst in non_primary_inst:
2851
          test = inst in self.all_inst_info
2852
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2853
                   "instance should not run on node %s", node_i.name)
2854
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2855
                   "node is running unknown instance %s", inst)
2856

    
2857
    for node, result in extra_lv_nvinfo.items():
2858
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2859
                              node_image[node], vg_name)
2860

    
2861
    feedback_fn("* Verifying instance status")
2862
    for instance in self.my_inst_names:
2863
      if verbose:
2864
        feedback_fn("* Verifying instance %s" % instance)
2865
      inst_config = self.my_inst_info[instance]
2866
      self._VerifyInstance(instance, inst_config, node_image,
2867
                           instdisk[instance])
2868
      inst_nodes_offline = []
2869

    
2870
      pnode = inst_config.primary_node
2871
      pnode_img = node_image[pnode]
2872
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2873
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2874
               " primary node failed", instance)
2875

    
2876
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2877
               constants.CV_EINSTANCEBADNODE, instance,
2878
               "instance is marked as running and lives on offline node %s",
2879
               inst_config.primary_node)
2880

    
2881
      # If the instance is non-redundant we cannot survive losing its primary
2882
      # node, so we are not N+1 compliant. On the other hand we have no disk
2883
      # templates with more than one secondary so that situation is not well
2884
      # supported either.
2885
      # FIXME: does not support file-backed instances
2886
      if not inst_config.secondary_nodes:
2887
        i_non_redundant.append(instance)
2888

    
2889
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2890
               constants.CV_EINSTANCELAYOUT,
2891
               instance, "instance has multiple secondary nodes: %s",
2892
               utils.CommaJoin(inst_config.secondary_nodes),
2893
               code=self.ETYPE_WARNING)
2894

    
2895
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2896
        pnode = inst_config.primary_node
2897
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2898
        instance_groups = {}
2899

    
2900
        for node in instance_nodes:
2901
          instance_groups.setdefault(self.all_node_info[node].group,
2902
                                     []).append(node)
2903

    
2904
        pretty_list = [
2905
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2906
          # Sort so that we always list the primary node first.
2907
          for group, nodes in sorted(instance_groups.items(),
2908
                                     key=lambda (_, nodes): pnode in nodes,
2909
                                     reverse=True)]
2910

    
2911
        self._ErrorIf(len(instance_groups) > 1,
2912
                      constants.CV_EINSTANCESPLITGROUPS,
2913
                      instance, "instance has primary and secondary nodes in"
2914
                      " different groups: %s", utils.CommaJoin(pretty_list),
2915
                      code=self.ETYPE_WARNING)
2916

    
2917
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2918
        i_non_a_balanced.append(instance)
2919

    
2920
      for snode in inst_config.secondary_nodes:
2921
        s_img = node_image[snode]
2922
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2923
                 snode, "instance %s, connection to secondary node failed",
2924
                 instance)
2925

    
2926
        if s_img.offline:
2927
          inst_nodes_offline.append(snode)
2928

    
2929
      # warn that the instance lives on offline nodes
2930
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2931
               "instance has offline secondary node(s) %s",
2932
               utils.CommaJoin(inst_nodes_offline))
2933
      # ... or ghost/non-vm_capable nodes
2934
      for node in inst_config.all_nodes:
2935
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2936
                 instance, "instance lives on ghost node %s", node)
2937
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2938
                 instance, "instance lives on non-vm_capable node %s", node)
2939

    
2940
    feedback_fn("* Verifying orphan volumes")
2941
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2942

    
2943
    # We will get spurious "unknown volume" warnings if any node of this group
2944
    # is secondary for an instance whose primary is in another group. To avoid
2945
    # them, we find these instances and add their volumes to node_vol_should.
2946
    for inst in self.all_inst_info.values():
2947
      for secondary in inst.secondary_nodes:
2948
        if (secondary in self.my_node_info
2949
            and inst.name not in self.my_inst_info):
2950
          inst.MapLVsByNode(node_vol_should)
2951
          break
2952

    
2953
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2954

    
2955
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2956
      feedback_fn("* Verifying N+1 Memory redundancy")
2957
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2958

    
2959
    feedback_fn("* Other Notes")
2960
    if i_non_redundant:
2961
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2962
                  % len(i_non_redundant))
2963

    
2964
    if i_non_a_balanced:
2965
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2966
                  % len(i_non_a_balanced))
2967

    
2968
    if n_offline:
2969
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2970

    
2971
    if n_drained:
2972
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2973

    
2974
    return not self.bad
2975

    
2976
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2977
    """Analyze the post-hooks' result
2978

2979
    This method analyses the hook result, handles it, and sends some
2980
    nicely-formatted feedback back to the user.
2981

2982
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2983
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2984
    @param hooks_results: the results of the multi-node hooks rpc call
2985
    @param feedback_fn: function used send feedback back to the caller
2986
    @param lu_result: previous Exec result
2987
    @return: the new Exec result, based on the previous result
2988
        and hook results
2989

2990
    """
2991
    # We only really run POST phase hooks, only for non-empty groups,
2992
    # and are only interested in their results
2993
    if not self.my_node_names:
2994
      # empty node group
2995
      pass
2996
    elif phase == constants.HOOKS_PHASE_POST:
2997
      # Used to change hooks' output to proper indentation
2998
      feedback_fn("* Hooks Results")
2999
      assert hooks_results, "invalid result from hooks"
3000

    
3001
      for node_name in hooks_results:
3002
        res = hooks_results[node_name]
3003
        msg = res.fail_msg
3004
        test = msg and not res.offline
3005
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3006
                      "Communication failure in hooks execution: %s", msg)
3007
        if res.offline or msg:
3008
          # No need to investigate payload if node is offline or gave
3009
          # an error.
3010
          continue
3011
        for script, hkr, output in res.payload:
3012
          test = hkr == constants.HKR_FAIL
3013
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3014
                        "Script %s failed, output:", script)
3015
          if test:
3016
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3017
            feedback_fn("%s" % output)
3018
            lu_result = False
3019

    
3020
    return lu_result
3021

    
3022

    
3023
class LUClusterVerifyDisks(NoHooksLU):
3024
  """Verifies the cluster disks status.
3025

3026
  """
3027
  REQ_BGL = False
3028

    
3029
  def ExpandNames(self):
3030
    self.share_locks = _ShareAll()
3031
    self.needed_locks = {
3032
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3033
      }
3034

    
3035
  def Exec(self, feedback_fn):
3036
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3037

    
3038
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3039
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3040
                           for group in group_names])
3041

    
3042

    
3043
class LUGroupVerifyDisks(NoHooksLU):
3044
  """Verifies the status of all disks in a node group.
3045

3046
  """
3047
  REQ_BGL = False
3048

    
3049
  def ExpandNames(self):
3050
    # Raises errors.OpPrereqError on its own if group can't be found
3051
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3052

    
3053
    self.share_locks = _ShareAll()
3054
    self.needed_locks = {
3055
      locking.LEVEL_INSTANCE: [],
3056
      locking.LEVEL_NODEGROUP: [],
3057
      locking.LEVEL_NODE: [],
3058
      }
3059

    
3060
  def DeclareLocks(self, level):
3061
    if level == locking.LEVEL_INSTANCE:
3062
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3063

    
3064
      # Lock instances optimistically, needs verification once node and group
3065
      # locks have been acquired
3066
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3067
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3068

    
3069
    elif level == locking.LEVEL_NODEGROUP:
3070
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3071

    
3072
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3073
        set([self.group_uuid] +
3074
            # Lock all groups used by instances optimistically; this requires
3075
            # going via the node before it's locked, requiring verification
3076
            # later on
3077
            [group_uuid
3078
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3079
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3080

    
3081
    elif level == locking.LEVEL_NODE:
3082
      # This will only lock the nodes in the group to be verified which contain
3083
      # actual instances
3084
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3085
      self._LockInstancesNodes()
3086

    
3087
      # Lock all nodes in group to be verified
3088
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3089
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3090
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3091

    
3092
  def CheckPrereq(self):
3093
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3094
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3095
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3096

    
3097
    assert self.group_uuid in owned_groups
3098

    
3099
    # Check if locked instances are still correct
3100
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3101

    
3102
    # Get instance information
3103
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3104

    
3105
    # Check if node groups for locked instances are still correct
3106
    for (instance_name, inst) in self.instances.items():
3107
      assert owned_nodes.issuperset(inst.all_nodes), \
3108
        "Instance %s's nodes changed while we kept the lock" % instance_name
3109

    
3110
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3111
                                             owned_groups)
3112

    
3113
      assert self.group_uuid in inst_groups, \
3114
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3115

    
3116
  def Exec(self, feedback_fn):
3117
    """Verify integrity of cluster disks.
3118

3119
    @rtype: tuple of three items
3120
    @return: a tuple of (dict of node-to-node_error, list of instances
3121
        which need activate-disks, dict of instance: (node, volume) for
3122
        missing volumes
3123

3124
    """
3125
    res_nodes = {}
3126
    res_instances = set()
3127
    res_missing = {}
3128

    
3129
    nv_dict = _MapInstanceDisksToNodes([inst
3130
                                        for inst in self.instances.values()
3131
                                        if inst.admin_up])
3132

    
3133
    if nv_dict:
3134
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3135
                             set(self.cfg.GetVmCapableNodeList()))
3136

    
3137
      node_lvs = self.rpc.call_lv_list(nodes, [])
3138

    
3139
      for (node, node_res) in node_lvs.items():
3140
        if node_res.offline:
3141
          continue
3142

    
3143
        msg = node_res.fail_msg
3144
        if msg:
3145
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3146
          res_nodes[node] = msg
3147
          continue
3148

    
3149
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3150
          inst = nv_dict.pop((node, lv_name), None)
3151
          if not (lv_online or inst is None):
3152
            res_instances.add(inst)
3153

    
3154
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3155
      # better
3156
      for key, inst in nv_dict.iteritems():
3157
        res_missing.setdefault(inst, []).append(key)
3158

    
3159
    return (res_nodes, list(res_instances), res_missing)
3160

    
3161

    
3162
class LUClusterRepairDiskSizes(NoHooksLU):
3163
  """Verifies the cluster disks sizes.
3164

3165
  """
3166
  REQ_BGL = False
3167

    
3168
  def ExpandNames(self):
3169
    if self.op.instances:
3170
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3171
      self.needed_locks = {
3172
        locking.LEVEL_NODE: [],
3173
        locking.LEVEL_INSTANCE: self.wanted_names,
3174
        }
3175
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3176
    else:
3177
      self.wanted_names = None
3178
      self.needed_locks = {
3179
        locking.LEVEL_NODE: locking.ALL_SET,
3180
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3181
        }
3182
    self.share_locks = _ShareAll()
3183

    
3184
  def DeclareLocks(self, level):
3185
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3186
      self._LockInstancesNodes(primary_only=True)
3187

    
3188
  def CheckPrereq(self):
3189
    """Check prerequisites.
3190

3191
    This only checks the optional instance list against the existing names.
3192

3193
    """
3194
    if self.wanted_names is None:
3195
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3196

    
3197
    self.wanted_instances = \
3198
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3199

    
3200
  def _EnsureChildSizes(self, disk):
3201
    """Ensure children of the disk have the needed disk size.
3202

3203
    This is valid mainly for DRBD8 and fixes an issue where the
3204
    children have smaller disk size.
3205

3206
    @param disk: an L{ganeti.objects.Disk} object
3207

3208
    """
3209
    if disk.dev_type == constants.LD_DRBD8:
3210
      assert disk.children, "Empty children for DRBD8?"
3211
      fchild = disk.children[0]
3212
      mismatch = fchild.size < disk.size
3213
      if mismatch:
3214
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3215
                     fchild.size, disk.size)
3216
        fchild.size = disk.size
3217

    
3218
      # and we recurse on this child only, not on the metadev
3219
      return self._EnsureChildSizes(fchild) or mismatch
3220
    else:
3221
      return False
3222

    
3223
  def Exec(self, feedback_fn):
3224
    """Verify the size of cluster disks.
3225

3226
    """
3227
    # TODO: check child disks too
3228
    # TODO: check differences in size between primary/secondary nodes
3229
    per_node_disks = {}
3230
    for instance in self.wanted_instances:
3231
      pnode = instance.primary_node
3232
      if pnode not in per_node_disks:
3233
        per_node_disks[pnode] = []
3234
      for idx, disk in enumerate(instance.disks):
3235
        per_node_disks[pnode].append((instance, idx, disk))
3236

    
3237
    changed = []
3238
    for node, dskl in per_node_disks.items():
3239
      newl = [v[2].Copy() for v in dskl]
3240
      for dsk in newl:
3241
        self.cfg.SetDiskID(dsk, node)
3242
      result = self.rpc.call_blockdev_getsize(node, newl)
3243
      if result.fail_msg:
3244
        self.LogWarning("Failure in blockdev_getsize call to node"
3245
                        " %s, ignoring", node)
3246
        continue
3247
      if len(result.payload) != len(dskl):
3248
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3249
                        " result.payload=%s", node, len(dskl), result.payload)
3250
        self.LogWarning("Invalid result from node %s, ignoring node results",
3251
                        node)
3252
        continue
3253
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3254
        if size is None:
3255
          self.LogWarning("Disk %d of instance %s did not return size"
3256
                          " information, ignoring", idx, instance.name)
3257
          continue
3258
        if not isinstance(size, (int, long)):
3259
          self.LogWarning("Disk %d of instance %s did not return valid"
3260
                          " size information, ignoring", idx, instance.name)
3261
          continue
3262
        size = size >> 20
3263
        if size != disk.size:
3264
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3265
                       " correcting: recorded %d, actual %d", idx,
3266
                       instance.name, disk.size, size)
3267
          disk.size = size
3268
          self.cfg.Update(instance, feedback_fn)
3269
          changed.append((instance.name, idx, size))
3270
        if self._EnsureChildSizes(disk):
3271
          self.cfg.Update(instance, feedback_fn)
3272
          changed.append((instance.name, idx, disk.size))
3273
    return changed
3274

    
3275

    
3276
class LUClusterRename(LogicalUnit):
3277
  """Rename the cluster.
3278

3279
  """
3280
  HPATH = "cluster-rename"
3281
  HTYPE = constants.HTYPE_CLUSTER
3282

    
3283
  def BuildHooksEnv(self):
3284
    """Build hooks env.
3285

3286
    """
3287
    return {
3288
      "OP_TARGET": self.cfg.GetClusterName(),
3289
      "NEW_NAME": self.op.name,
3290
      }
3291

    
3292
  def BuildHooksNodes(self):
3293
    """Build hooks nodes.
3294

3295
    """
3296
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3297

    
3298
  def CheckPrereq(self):
3299
    """Verify that the passed name is a valid one.
3300

3301
    """
3302
    hostname = netutils.GetHostname(name=self.op.name,
3303
                                    family=self.cfg.GetPrimaryIPFamily())
3304

    
3305
    new_name = hostname.name
3306
    self.ip = new_ip = hostname.ip
3307
    old_name = self.cfg.GetClusterName()
3308
    old_ip = self.cfg.GetMasterIP()
3309
    if new_name == old_name and new_ip == old_ip:
3310
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3311
                                 " cluster has changed",
3312
                                 errors.ECODE_INVAL)
3313
    if new_ip != old_ip:
3314
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3315
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3316
                                   " reachable on the network" %
3317
                                   new_ip, errors.ECODE_NOTUNIQUE)
3318

    
3319
    self.op.name = new_name
3320

    
3321
  def Exec(self, feedback_fn):
3322
    """Rename the cluster.
3323

3324
    """
3325
    clustername = self.op.name
3326
    ip = self.ip
3327

    
3328
    # shutdown the master IP
3329
    master = self.cfg.GetMasterNode()
3330
    result = self.rpc.call_node_deactivate_master_ip(master)
3331
    result.Raise("Could not disable the master role")
3332

    
3333
    try:
3334
      cluster = self.cfg.GetClusterInfo()
3335
      cluster.cluster_name = clustername
3336
      cluster.master_ip = ip
3337
      self.cfg.Update(cluster, feedback_fn)
3338

    
3339
      # update the known hosts file
3340
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3341
      node_list = self.cfg.GetOnlineNodeList()
3342
      try:
3343
        node_list.remove(master)
3344
      except ValueError:
3345
        pass
3346
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3347
    finally:
3348
      result = self.rpc.call_node_activate_master_ip(master)
3349
      msg = result.fail_msg
3350
      if msg:
3351
        self.LogWarning("Could not re-enable the master role on"
3352
                        " the master, please restart manually: %s", msg)
3353

    
3354
    return clustername
3355

    
3356

    
3357
def _ValidateNetmask(cfg, netmask):
3358
  """Checks if a netmask is valid.
3359

3360
  @type cfg: L{config.ConfigWriter}
3361
  @param cfg: The cluster configuration
3362
  @type netmask: int
3363
  @param netmask: the netmask to be verified
3364
  @raise errors.OpPrereqError: if the validation fails
3365

3366
  """
3367
  ip_family = cfg.GetPrimaryIPFamily()
3368
  try:
3369
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3370
  except errors.ProgrammerError:
3371
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3372
                               ip_family)
3373
  if not ipcls.ValidateNetmask(netmask):
3374
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3375
                                (netmask))
3376

    
3377

    
3378
class LUClusterSetParams(LogicalUnit):
3379
  """Change the parameters of the cluster.
3380

3381
  """
3382
  HPATH = "cluster-modify"
3383
  HTYPE = constants.HTYPE_CLUSTER
3384
  REQ_BGL = False
3385

    
3386
  def CheckArguments(self):
3387
    """Check parameters
3388

3389
    """
3390
    if self.op.uid_pool:
3391
      uidpool.CheckUidPool(self.op.uid_pool)
3392

    
3393
    if self.op.add_uids:
3394
      uidpool.CheckUidPool(self.op.add_uids)
3395

    
3396
    if self.op.remove_uids:
3397
      uidpool.CheckUidPool(self.op.remove_uids)
3398

    
3399
    if self.op.master_netmask is not None:
3400
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3401

    
3402
  def ExpandNames(self):
3403
    # FIXME: in the future maybe other cluster params won't require checking on
3404
    # all nodes to be modified.
3405
    self.needed_locks = {
3406
      locking.LEVEL_NODE: locking.ALL_SET,
3407
    }
3408
    self.share_locks[locking.LEVEL_NODE] = 1
3409

    
3410
  def BuildHooksEnv(self):
3411
    """Build hooks env.
3412

3413
    """
3414
    return {
3415
      "OP_TARGET": self.cfg.GetClusterName(),
3416
      "NEW_VG_NAME": self.op.vg_name,
3417
      }
3418

    
3419
  def BuildHooksNodes(self):
3420
    """Build hooks nodes.
3421

3422
    """
3423
    mn = self.cfg.GetMasterNode()
3424
    return ([mn], [mn])
3425

    
3426
  def CheckPrereq(self):
3427
    """Check prerequisites.
3428

3429
    This checks whether the given params don't conflict and
3430
    if the given volume group is valid.
3431

3432
    """
3433
    if self.op.vg_name is not None and not self.op.vg_name:
3434
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3435
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3436
                                   " instances exist", errors.ECODE_INVAL)
3437

    
3438
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3439
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3440
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3441
                                   " drbd-based instances exist",
3442
                                   errors.ECODE_INVAL)
3443

    
3444
    node_list = self.owned_locks(locking.LEVEL_NODE)
3445

    
3446
    # if vg_name not None, checks given volume group on all nodes
3447
    if self.op.vg_name:
3448
      vglist = self.rpc.call_vg_list(node_list)
3449
      for node in node_list:
3450
        msg = vglist[node].fail_msg
3451
        if msg:
3452
          # ignoring down node
3453
          self.LogWarning("Error while gathering data on node %s"
3454
                          " (ignoring node): %s", node, msg)
3455
          continue
3456
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3457
                                              self.op.vg_name,
3458
                                              constants.MIN_VG_SIZE)
3459
        if vgstatus:
3460
          raise errors.OpPrereqError("Error on node '%s': %s" %
3461
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3462

    
3463
    if self.op.drbd_helper:
3464
      # checks given drbd helper on all nodes
3465
      helpers = self.rpc.call_drbd_helper(node_list)
3466
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3467
        if ninfo.offline:
3468
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3469
          continue
3470
        msg = helpers[node].fail_msg
3471
        if msg:
3472
          raise errors.OpPrereqError("Error checking drbd helper on node"
3473
                                     " '%s': %s" % (node, msg),
3474
                                     errors.ECODE_ENVIRON)
3475
        node_helper = helpers[node].payload
3476
        if node_helper != self.op.drbd_helper:
3477
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3478
                                     (node, node_helper), errors.ECODE_ENVIRON)
3479

    
3480
    self.cluster = cluster = self.cfg.GetClusterInfo()
3481
    # validate params changes
3482
    if self.op.beparams:
3483
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3484
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3485

    
3486
    if self.op.ndparams:
3487
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3488
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3489

    
3490
      # TODO: we need a more general way to handle resetting
3491
      # cluster-level parameters to default values
3492
      if self.new_ndparams["oob_program"] == "":
3493
        self.new_ndparams["oob_program"] = \
3494
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3495

    
3496
    if self.op.nicparams:
3497
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3498
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3499
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3500
      nic_errors = []
3501

    
3502
      # check all instances for consistency
3503
      for instance in self.cfg.GetAllInstancesInfo().values():
3504
        for nic_idx, nic in enumerate(instance.nics):
3505
          params_copy = copy.deepcopy(nic.nicparams)
3506
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3507

    
3508
          # check parameter syntax
3509
          try:
3510
            objects.NIC.CheckParameterSyntax(params_filled)
3511
          except errors.ConfigurationError, err:
3512
            nic_errors.append("Instance %s, nic/%d: %s" %
3513
                              (instance.name, nic_idx, err))
3514

    
3515
          # if we're moving instances to routed, check that they have an ip
3516
          target_mode = params_filled[constants.NIC_MODE]
3517
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3518
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3519
                              " address" % (instance.name, nic_idx))
3520
      if nic_errors:
3521
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3522
                                   "\n".join(nic_errors))
3523

    
3524
    # hypervisor list/parameters
3525
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3526
    if self.op.hvparams:
3527
      for hv_name, hv_dict in self.op.hvparams.items():
3528
        if hv_name not in self.new_hvparams:
3529
          self.new_hvparams[hv_name] = hv_dict
3530
        else:
3531
          self.new_hvparams[hv_name].update(hv_dict)
3532

    
3533
    # os hypervisor parameters
3534
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3535
    if self.op.os_hvp:
3536
      for os_name, hvs in self.op.os_hvp.items():
3537
        if os_name not in self.new_os_hvp:
3538
          self.new_os_hvp[os_name] = hvs
3539
        else:
3540
          for hv_name, hv_dict in hvs.items():
3541
            if hv_name not in self.new_os_hvp[os_name]:
3542
              self.new_os_hvp[os_name][hv_name] = hv_dict
3543
            else:
3544
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3545

    
3546
    # os parameters
3547
    self.new_osp = objects.FillDict(cluster.osparams, {})
3548
    if self.op.osparams:
3549
      for os_name, osp in self.op.osparams.items():
3550
        if os_name not in self.new_osp:
3551
          self.new_osp[os_name] = {}
3552

    
3553
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3554
                                                  use_none=True)
3555

    
3556
        if not self.new_osp[os_name]:
3557
          # we removed all parameters
3558
          del self.new_osp[os_name]
3559
        else:
3560
          # check the parameter validity (remote check)
3561
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3562
                         os_name, self.new_osp[os_name])
3563

    
3564
    # changes to the hypervisor list
3565
    if self.op.enabled_hypervisors is not None:
3566
      self.hv_list = self.op.enabled_hypervisors
3567
      for hv in self.hv_list:
3568
        # if the hypervisor doesn't already exist in the cluster
3569
        # hvparams, we initialize it to empty, and then (in both
3570
        # cases) we make sure to fill the defaults, as we might not
3571
        # have a complete defaults list if the hypervisor wasn't
3572
        # enabled before
3573
        if hv not in new_hvp:
3574
          new_hvp[hv] = {}
3575
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3576
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3577
    else:
3578
      self.hv_list = cluster.enabled_hypervisors
3579

    
3580
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3581
      # either the enabled list has changed, or the parameters have, validate
3582
      for hv_name, hv_params in self.new_hvparams.items():
3583
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3584
            (self.op.enabled_hypervisors and
3585
             hv_name in self.op.enabled_hypervisors)):
3586
          # either this is a new hypervisor, or its parameters have changed
3587
          hv_class = hypervisor.GetHypervisor(hv_name)
3588
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3589
          hv_class.CheckParameterSyntax(hv_params)
3590
          _CheckHVParams(self, node_list, hv_name, hv_params)
3591

    
3592
    if self.op.os_hvp:
3593
      # no need to check any newly-enabled hypervisors, since the
3594
      # defaults have already been checked in the above code-block
3595
      for os_name, os_hvp in self.new_os_hvp.items():
3596
        for hv_name, hv_params in os_hvp.items():
3597
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3598
          # we need to fill in the new os_hvp on top of the actual hv_p
3599
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3600
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3601
          hv_class = hypervisor.GetHypervisor(hv_name)
3602
          hv_class.CheckParameterSyntax(new_osp)
3603
          _CheckHVParams(self, node_list, hv_name, new_osp)
3604

    
3605
    if self.op.default_iallocator:
3606
      alloc_script = utils.FindFile(self.op.default_iallocator,
3607
                                    constants.IALLOCATOR_SEARCH_PATH,
3608
                                    os.path.isfile)
3609
      if alloc_script is None:
3610
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3611
                                   " specified" % self.op.default_iallocator,
3612
                                   errors.ECODE_INVAL)
3613

    
3614
  def Exec(self, feedback_fn):
3615
    """Change the parameters of the cluster.
3616

3617
    """
3618
    if self.op.vg_name is not None:
3619
      new_volume = self.op.vg_name
3620
      if not new_volume:
3621
        new_volume = None
3622
      if new_volume != self.cfg.GetVGName():
3623
        self.cfg.SetVGName(new_volume)
3624
      else:
3625
        feedback_fn("Cluster LVM configuration already in desired"
3626
                    " state, not changing")
3627
    if self.op.drbd_helper is not None:
3628
      new_helper = self.op.drbd_helper
3629
      if not new_helper:
3630
        new_helper = None
3631
      if new_helper != self.cfg.GetDRBDHelper():
3632
        self.cfg.SetDRBDHelper(new_helper)
3633
      else:
3634
        feedback_fn("Cluster DRBD helper already in desired state,"
3635
                    " not changing")
3636
    if self.op.hvparams:
3637
      self.cluster.hvparams = self.new_hvparams
3638
    if self.op.os_hvp:
3639
      self.cluster.os_hvp = self.new_os_hvp
3640
    if self.op.enabled_hypervisors is not None:
3641
      self.cluster.hvparams = self.new_hvparams
3642
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3643
    if self.op.beparams:
3644
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3645
    if self.op.nicparams:
3646
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3647
    if self.op.osparams:
3648
      self.cluster.osparams = self.new_osp
3649
    if self.op.ndparams:
3650
      self.cluster.ndparams = self.new_ndparams
3651

    
3652
    if self.op.candidate_pool_size is not None:
3653
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3654
      # we need to update the pool size here, otherwise the save will fail
3655
      _AdjustCandidatePool(self, [])
3656

    
3657
    if self.op.maintain_node_health is not None:
3658
      self.cluster.maintain_node_health = self.op.maintain_node_health
3659

    
3660
    if self.op.prealloc_wipe_disks is not None:
3661
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3662

    
3663
    if self.op.add_uids is not None:
3664
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3665

    
3666
    if self.op.remove_uids is not None:
3667
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3668

    
3669
    if self.op.uid_pool is not None:
3670
      self.cluster.uid_pool = self.op.uid_pool
3671

    
3672
    if self.op.default_iallocator is not None:
3673
      self.cluster.default_iallocator = self.op.default_iallocator
3674

    
3675
    if self.op.reserved_lvs is not None:
3676
      self.cluster.reserved_lvs = self.op.reserved_lvs
3677

    
3678
    def helper_os(aname, mods, desc):
3679
      desc += " OS list"
3680
      lst = getattr(self.cluster, aname)
3681
      for key, val in mods:
3682
        if key == constants.DDM_ADD:
3683
          if val in lst:
3684
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3685
          else:
3686
            lst.append(val)
3687
        elif key == constants.DDM_REMOVE:
3688
          if val in lst:
3689
            lst.remove(val)
3690
          else:
3691
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3692
        else:
3693
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3694

    
3695
    if self.op.hidden_os:
3696
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3697

    
3698
    if self.op.blacklisted_os:
3699
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3700

    
3701
    if self.op.master_netdev:
3702
      master = self.cfg.GetMasterNode()
3703
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3704
                  self.cluster.master_netdev)
3705
      result = self.rpc.call_node_deactivate_master_ip(master)
3706
      result.Raise("Could not disable the master ip")
3707
      feedback_fn("Changing master_netdev from %s to %s" %
3708
                  (self.cluster.master_netdev, self.op.master_netdev))
3709
      self.cluster.master_netdev = self.op.master_netdev
3710

    
3711
    if self.op.master_netmask:
3712
      master = self.cfg.GetMasterNode()
3713
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3714
      result = self.rpc.call_node_change_master_netmask(master,
3715
                                                        self.op.master_netmask)
3716
      if result.fail_msg:
3717
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3718
        self.LogWarning(msg)
3719
        feedback_fn(msg)
3720
      else:
3721
        self.cluster.master_netmask = self.op.master_netmask
3722

    
3723
    self.cfg.Update(self.cluster, feedback_fn)
3724

    
3725
    if self.op.master_netdev:
3726
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3727
                  self.op.master_netdev)
3728
      result = self.rpc.call_node_activate_master_ip(master)
3729
      if result.fail_msg:
3730
        self.LogWarning("Could not re-enable the master ip on"
3731
                        " the master, please restart manually: %s",
3732
                        result.fail_msg)
3733

    
3734

    
3735
def _UploadHelper(lu, nodes, fname):
3736
  """Helper for uploading a file and showing warnings.
3737

3738
  """
3739
  if os.path.exists(fname):
3740
    result = lu.rpc.call_upload_file(nodes, fname)
3741
    for to_node, to_result in result.items():
3742
      msg = to_result.fail_msg
3743
      if msg:
3744
        msg = ("Copy of file %s to node %s failed: %s" %
3745
               (fname, to_node, msg))
3746
        lu.proc.LogWarning(msg)
3747

    
3748

    
3749
def _ComputeAncillaryFiles(cluster, redist):
3750
  """Compute files external to Ganeti which need to be consistent.
3751

3752
  @type redist: boolean
3753
  @param redist: Whether to include files which need to be redistributed
3754

3755
  """
3756
  # Compute files for all nodes
3757
  files_all = set([
3758
    constants.SSH_KNOWN_HOSTS_FILE,
3759
    constants.CONFD_HMAC_KEY,
3760
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3761
    constants.SPICE_CERT_FILE,
3762
    constants.SPICE_CACERT_FILE,
3763
    ])
3764

    
3765
  if not redist:
3766
    files_all.update(constants.ALL_CERT_FILES)
3767
    files_all.update(ssconf.SimpleStore().GetFileList())
3768
  else:
3769
    # we need to ship at least the RAPI certificate
3770
    files_all.add(constants.RAPI_CERT_FILE)
3771

    
3772
  if cluster.modify_etc_hosts:
3773
    files_all.add(constants.ETC_HOSTS)
3774

    
3775
  # Files which must either exist on all nodes or on none
3776
  files_all_opt = set([
3777
    constants.RAPI_USERS_FILE,
3778
    ])
3779

    
3780
  # Files which should only be on master candidates
3781
  files_mc = set()
3782
  if not redist:
3783
    files_mc.add(constants.CLUSTER_CONF_FILE)
3784

    
3785
  # Files which should only be on VM-capable nodes
3786
  files_vm = set(filename
3787
    for hv_name in cluster.enabled_hypervisors
3788
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3789

    
3790
  # Filenames must be unique
3791
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3792
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3793
         "Found file listed in more than one file list"
3794

    
3795
  return (files_all, files_all_opt, files_mc, files_vm)
3796

    
3797

    
3798
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3799
  """Distribute additional files which are part of the cluster configuration.
3800

3801
  ConfigWriter takes care of distributing the config and ssconf files, but
3802
  there are more files which should be distributed to all nodes. This function
3803
  makes sure those are copied.
3804

3805
  @param lu: calling logical unit
3806
  @param additional_nodes: list of nodes not in the config to distribute to
3807
  @type additional_vm: boolean
3808
  @param additional_vm: whether the additional nodes are vm-capable or not
3809

3810
  """
3811
  # Gather target nodes
3812
  cluster = lu.cfg.GetClusterInfo()
3813
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3814

    
3815
  online_nodes = lu.cfg.GetOnlineNodeList()
3816
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3817

    
3818
  if additional_nodes is not None:
3819
    online_nodes.extend(additional_nodes)
3820
    if additional_vm:
3821
      vm_nodes.extend(additional_nodes)
3822

    
3823
  # Never distribute to master node
3824
  for nodelist in [online_nodes, vm_nodes]:
3825
    if master_info.name in nodelist:
3826
      nodelist.remove(master_info.name)
3827

    
3828
  # Gather file lists
3829
  (files_all, files_all_opt, files_mc, files_vm) = \
3830
    _ComputeAncillaryFiles(cluster, True)
3831

    
3832
  # Never re-distribute configuration file from here
3833
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3834
              constants.CLUSTER_CONF_FILE in files_vm)
3835
  assert not files_mc, "Master candidates not handled in this function"
3836

    
3837
  filemap = [
3838
    (online_nodes, files_all),
3839
    (online_nodes, files_all_opt),
3840
    (vm_nodes, files_vm),
3841
    ]
3842

    
3843
  # Upload the files
3844
  for (node_list, files) in filemap:
3845
    for fname in files:
3846
      _UploadHelper(lu, node_list, fname)
3847

    
3848

    
3849
class LUClusterRedistConf(NoHooksLU):
3850
  """Force the redistribution of cluster configuration.
3851

3852
  This is a very simple LU.
3853

3854
  """
3855
  REQ_BGL = False
3856

    
3857
  def ExpandNames(self):
3858
    self.needed_locks = {
3859
      locking.LEVEL_NODE: locking.ALL_SET,
3860
    }
3861
    self.share_locks[locking.LEVEL_NODE] = 1
3862

    
3863
  def Exec(self, feedback_fn):
3864
    """Redistribute the configuration.
3865

3866
    """
3867
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3868
    _RedistributeAncillaryFiles(self)
3869

    
3870

    
3871
class LUClusterActivateMasterIp(NoHooksLU):
3872
  """Activate the master IP on the master node.
3873

3874
  """
3875
  def Exec(self, feedback_fn):
3876
    """Activate the master IP.
3877

3878
    """
3879
    master = self.cfg.GetMasterNode()
3880
    self.rpc.call_node_activate_master_ip(master)
3881

    
3882

    
3883
class LUClusterDeactivateMasterIp(NoHooksLU):
3884
  """Deactivate the master IP on the master node.
3885

3886
  """
3887
  def Exec(self, feedback_fn):
3888
    """Deactivate the master IP.
3889

3890
    """
3891
    master = self.cfg.GetMasterNode()
3892
    self.rpc.call_node_deactivate_master_ip(master)
3893

    
3894

    
3895
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3896
  """Sleep and poll for an instance's disk to sync.
3897

3898
  """
3899
  if not instance.disks or disks is not None and not disks:
3900
    return True
3901

    
3902
  disks = _ExpandCheckDisks(instance, disks)
3903

    
3904
  if not oneshot:
3905
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3906

    
3907
  node = instance.primary_node
3908

    
3909
  for dev in disks:
3910
    lu.cfg.SetDiskID(dev, node)
3911

    
3912
  # TODO: Convert to utils.Retry
3913

    
3914
  retries = 0
3915
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3916
  while True:
3917
    max_time = 0
3918
    done = True
3919
    cumul_degraded = False
3920
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3921
    msg = rstats.fail_msg
3922
    if msg:
3923
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3924
      retries += 1
3925
      if retries >= 10:
3926
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3927
                                 " aborting." % node)
3928
      time.sleep(6)
3929
      continue
3930
    rstats = rstats.payload
3931
    retries = 0
3932
    for i, mstat in enumerate(rstats):
3933
      if mstat is None:
3934
        lu.LogWarning("Can't compute data for node %s/%s",
3935
                           node, disks[i].iv_name)
3936
        continue
3937

    
3938
      cumul_degraded = (cumul_degraded or
3939
                        (mstat.is_degraded and mstat.sync_percent is None))
3940
      if mstat.sync_percent is not None:
3941
        done = False
3942
        if mstat.estimated_time is not None:
3943
          rem_time = ("%s remaining (estimated)" %
3944
                      utils.FormatSeconds(mstat.estimated_time))
3945
          max_time = mstat.estimated_time
3946
        else:
3947
          rem_time = "no time estimate"
3948
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3949
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3950

    
3951
    # if we're done but degraded, let's do a few small retries, to
3952
    # make sure we see a stable and not transient situation; therefore
3953
    # we force restart of the loop
3954
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3955
      logging.info("Degraded disks found, %d retries left", degr_retries)
3956
      degr_retries -= 1
3957
      time.sleep(1)
3958
      continue
3959

    
3960
    if done or oneshot:
3961
      break
3962

    
3963
    time.sleep(min(60, max_time))
3964

    
3965
  if done:
3966
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3967
  return not cumul_degraded
3968

    
3969

    
3970
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3971
  """Check that mirrors are not degraded.
3972

3973
  The ldisk parameter, if True, will change the test from the
3974
  is_degraded attribute (which represents overall non-ok status for
3975
  the device(s)) to the ldisk (representing the local storage status).
3976

3977
  """
3978
  lu.cfg.SetDiskID(dev, node)
3979

    
3980
  result = True
3981

    
3982
  if on_primary or dev.AssembleOnSecondary():
3983
    rstats = lu.rpc.call_blockdev_find(node, dev)
3984
    msg = rstats.fail_msg
3985
    if msg:
3986
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3987
      result = False
3988
    elif not rstats.payload:
3989
      lu.LogWarning("Can't find disk on node %s", node)
3990
      result = False
3991
    else:
3992
      if ldisk:
3993
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3994
      else:
3995
        result = result and not rstats.payload.is_degraded
3996

    
3997
  if dev.children:
3998
    for child in dev.children:
3999
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4000

    
4001
  return result
4002

    
4003

    
4004
class LUOobCommand(NoHooksLU):
4005
  """Logical unit for OOB handling.
4006

4007
  """
4008
  REG_BGL = False
4009
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4010

    
4011
  def ExpandNames(self):
4012
    """Gather locks we need.
4013

4014
    """
4015
    if self.op.node_names:
4016
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4017
      lock_names = self.op.node_names
4018
    else:
4019
      lock_names = locking.ALL_SET
4020

    
4021
    self.needed_locks = {
4022
      locking.LEVEL_NODE: lock_names,
4023
      }
4024

    
4025
  def CheckPrereq(self):
4026
    """Check prerequisites.
4027

4028
    This checks:
4029
     - the node exists in the configuration
4030
     - OOB is supported
4031

4032
    Any errors are signaled by raising errors.OpPrereqError.
4033

4034
    """
4035
    self.nodes = []
4036
    self.master_node = self.cfg.GetMasterNode()
4037

    
4038
    assert self.op.power_delay >= 0.0
4039

    
4040
    if self.op.node_names:
4041
      if (self.op.command in self._SKIP_MASTER and
4042
          self.master_node in self.op.node_names):
4043
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4044
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4045

    
4046
        if master_oob_handler:
4047
          additional_text = ("run '%s %s %s' if you want to operate on the"
4048
                             " master regardless") % (master_oob_handler,
4049
                                                      self.op.command,
4050
                                                      self.master_node)
4051
        else:
4052
          additional_text = "it does not support out-of-band operations"
4053

    
4054
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4055
                                    " allowed for %s; %s") %
4056
                                   (self.master_node, self.op.command,
4057
                                    additional_text), errors.ECODE_INVAL)
4058
    else:
4059
      self.op.node_names = self.cfg.GetNodeList()
4060
      if self.op.command in self._SKIP_MASTER:
4061
        self.op.node_names.remove(self.master_node)
4062

    
4063
    if self.op.command in self._SKIP_MASTER:
4064
      assert self.master_node not in self.op.node_names
4065

    
4066
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4067
      if node is None:
4068
        raise errors.OpPrereqError("Node %s not found" % node_name,
4069
                                   errors.ECODE_NOENT)
4070
      else:
4071
        self.nodes.append(node)
4072

    
4073
      if (not self.op.ignore_status and
4074
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4075
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4076
                                    " not marked offline") % node_name,
4077
                                   errors.ECODE_STATE)
4078

    
4079
  def Exec(self, feedback_fn):
4080
    """Execute OOB and return result if we expect any.
4081

4082
    """
4083
    master_node = self.master_node
4084
    ret = []
4085

    
4086
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4087
                                              key=lambda node: node.name)):
4088
      node_entry = [(constants.RS_NORMAL, node.name)]
4089
      ret.append(node_entry)
4090

    
4091
      oob_program = _SupportsOob(self.cfg, node)
4092

    
4093
      if not oob_program:
4094
        node_entry.append((constants.RS_UNAVAIL, None))
4095
        continue
4096

    
4097
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4098
                   self.op.command, oob_program, node.name)
4099
      result = self.rpc.call_run_oob(master_node, oob_program,
4100
                                     self.op.command, node.name,
4101
                                     self.op.timeout)
4102

    
4103
      if result.fail_msg:
4104
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4105
                        node.name, result.fail_msg)
4106
        node_entry.append((constants.RS_NODATA, None))
4107
      else:
4108
        try:
4109
          self._CheckPayload(result)
4110
        except errors.OpExecError, err:
4111
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4112
                          node.name, err)
4113
          node_entry.append((constants.RS_NODATA, None))
4114
        else:
4115
          if self.op.command == constants.OOB_HEALTH:
4116
            # For health we should log important events
4117
            for item, status in result.payload:
4118
              if status in [constants.OOB_STATUS_WARNING,
4119
                            constants.OOB_STATUS_CRITICAL]:
4120
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4121
                                item, node.name, status)
4122

    
4123
          if self.op.command == constants.OOB_POWER_ON:
4124
            node.powered = True
4125
          elif self.op.command == constants.OOB_POWER_OFF:
4126
            node.powered = False
4127
          elif self.op.command == constants.OOB_POWER_STATUS:
4128
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4129
            if powered != node.powered:
4130
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4131
                               " match actual power state (%s)"), node.powered,
4132
                              node.name, powered)
4133

    
4134
          # For configuration changing commands we should update the node
4135
          if self.op.command in (constants.OOB_POWER_ON,
4136
                                 constants.OOB_POWER_OFF):
4137
            self.cfg.Update(node, feedback_fn)
4138

    
4139
          node_entry.append((constants.RS_NORMAL, result.payload))
4140

    
4141
          if (self.op.command == constants.OOB_POWER_ON and
4142
              idx < len(self.nodes) - 1):
4143
            time.sleep(self.op.power_delay)
4144

    
4145
    return ret
4146

    
4147
  def _CheckPayload(self, result):
4148
    """Checks if the payload is valid.
4149

4150
    @param result: RPC result
4151
    @raises errors.OpExecError: If payload is not valid
4152

4153
    """
4154
    errs = []
4155
    if self.op.command == constants.OOB_HEALTH:
4156
      if not isinstance(result.payload, list):
4157
        errs.append("command 'health' is expected to return a list but got %s" %
4158
                    type(result.payload))
4159
      else:
4160
        for item, status in result.payload:
4161
          if status not in constants.OOB_STATUSES:
4162
            errs.append("health item '%s' has invalid status '%s'" %
4163
                        (item, status))
4164

    
4165
    if self.op.command == constants.OOB_POWER_STATUS:
4166
      if not isinstance(result.payload, dict):
4167
        errs.append("power-status is expected to return a dict but got %s" %
4168
                    type(result.payload))
4169

    
4170
    if self.op.command in [
4171
        constants.OOB_POWER_ON,
4172
        constants.OOB_POWER_OFF,
4173
        constants.OOB_POWER_CYCLE,
4174
        ]:
4175
      if result.payload is not None:
4176
        errs.append("%s is expected to not return payload but got '%s'" %
4177
                    (self.op.command, result.payload))
4178

    
4179
    if errs:
4180
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4181
                               utils.CommaJoin(errs))
4182

    
4183

    
4184
class _OsQuery(_QueryBase):
4185
  FIELDS = query.OS_FIELDS
4186

    
4187
  def ExpandNames(self, lu):
4188
    # Lock all nodes in shared mode
4189
    # Temporary removal of locks, should be reverted later
4190
    # TODO: reintroduce locks when they are lighter-weight
4191
    lu.needed_locks = {}
4192
    #self.share_locks[locking.LEVEL_NODE] = 1
4193
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4194

    
4195
    # The following variables interact with _QueryBase._GetNames
4196
    if self.names:
4197
      self.wanted = self.names
4198
    else:
4199
      self.wanted = locking.ALL_SET
4200

    
4201
    self.do_locking = self.use_locking
4202

    
4203
  def DeclareLocks(self, lu, level):
4204
    pass
4205

    
4206
  @staticmethod
4207
  def _DiagnoseByOS(rlist):
4208
    """Remaps a per-node return list into an a per-os per-node dictionary
4209

4210
    @param rlist: a map with node names as keys and OS objects as values
4211

4212
    @rtype: dict
4213
    @return: a dictionary with osnames as keys and as value another
4214
        map, with nodes as keys and tuples of (path, status, diagnose,
4215
        variants, parameters, api_versions) as values, eg::
4216

4217
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4218
                                     (/srv/..., False, "invalid api")],
4219
                           "node2": [(/srv/..., True, "", [], [])]}
4220
          }
4221

4222
    """
4223
    all_os = {}
4224
    # we build here the list of nodes that didn't fail the RPC (at RPC
4225
    # level), so that nodes with a non-responding node daemon don't
4226
    # make all OSes invalid
4227
    good_nodes = [node_name for node_name in rlist
4228
                  if not rlist[node_name].fail_msg]
4229
    for node_name, nr in rlist.items():
4230
      if nr.fail_msg or not nr.payload:
4231
        continue
4232
      for (name, path, status, diagnose, variants,
4233
           params, api_versions) in nr.payload:
4234
        if name not in all_os:
4235
          # build a list of nodes for this os containing empty lists
4236
          # for each node in node_list
4237
          all_os[name] = {}
4238
          for nname in good_nodes:
4239
            all_os[name][nname] = []
4240
        # convert params from [name, help] to (name, help)
4241
        params = [tuple(v) for v in params]
4242
        all_os[name][node_name].append((path, status, diagnose,
4243
                                        variants, params, api_versions))
4244
    return all_os
4245

    
4246
  def _GetQueryData(self, lu):
4247
    """Computes the list of nodes and their attributes.
4248

4249
    """
4250
    # Locking is not used
4251
    assert not (compat.any(lu.glm.is_owned(level)
4252
                           for level in locking.LEVELS
4253
                           if level != locking.LEVEL_CLUSTER) or
4254
                self.do_locking or self.use_locking)
4255

    
4256
    valid_nodes = [node.name
4257
                   for node in lu.cfg.GetAllNodesInfo().values()
4258
                   if not node.offline and node.vm_capable]
4259
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4260
    cluster = lu.cfg.GetClusterInfo()
4261

    
4262
    data = {}
4263

    
4264
    for (os_name, os_data) in pol.items():
4265
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4266
                          hidden=(os_name in cluster.hidden_os),
4267
                          blacklisted=(os_name in cluster.blacklisted_os))
4268

    
4269
      variants = set()
4270
      parameters = set()
4271
      api_versions = set()
4272

    
4273
      for idx, osl in enumerate(os_data.values()):
4274
        info.valid = bool(info.valid and osl and osl[0][1])
4275
        if not info.valid:
4276
          break
4277

    
4278
        (node_variants, node_params, node_api) = osl[0][3:6]
4279
        if idx == 0:
4280
          # First entry
4281
          variants.update(node_variants)
4282
          parameters.update(node_params)
4283
          api_versions.update(node_api)
4284
        else:
4285
          # Filter out inconsistent values
4286
          variants.intersection_update(node_variants)
4287
          parameters.intersection_update(node_params)
4288
          api_versions.intersection_update(node_api)
4289

    
4290
      info.variants = list(variants)
4291
      info.parameters = list(parameters)
4292
      info.api_versions = list(api_versions)
4293

    
4294
      data[os_name] = info
4295

    
4296
    # Prepare data in requested order
4297
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4298
            if name in data]
4299

    
4300

    
4301
class LUOsDiagnose(NoHooksLU):
4302
  """Logical unit for OS diagnose/query.
4303

4304
  """
4305
  REQ_BGL = False
4306

    
4307
  @staticmethod
4308
  def _BuildFilter(fields, names):
4309
    """Builds a filter for querying OSes.
4310

4311
    """
4312
    name_filter = qlang.MakeSimpleFilter("name", names)
4313

    
4314
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4315
    # respective field is not requested
4316
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4317
                     for fname in ["hidden", "blacklisted"]
4318
                     if fname not in fields]
4319
    if "valid" not in fields:
4320
      status_filter.append([qlang.OP_TRUE, "valid"])
4321

    
4322
    if status_filter:
4323
      status_filter.insert(0, qlang.OP_AND)
4324
    else:
4325
      status_filter = None
4326

    
4327
    if name_filter and status_filter:
4328
      return [qlang.OP_AND, name_filter, status_filter]
4329
    elif name_filter:
4330
      return name_filter
4331
    else:
4332
      return status_filter
4333

    
4334
  def CheckArguments(self):
4335
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4336
                       self.op.output_fields, False)
4337

    
4338
  def ExpandNames(self):
4339
    self.oq.ExpandNames(self)
4340

    
4341
  def Exec(self, feedback_fn):
4342
    return self.oq.OldStyleQuery(self)
4343

    
4344

    
4345
class LUNodeRemove(LogicalUnit):
4346
  """Logical unit for removing a node.
4347

4348
  """
4349
  HPATH = "node-remove"
4350
  HTYPE = constants.HTYPE_NODE
4351

    
4352
  def BuildHooksEnv(self):
4353
    """Build hooks env.
4354

4355
    This doesn't run on the target node in the pre phase as a failed
4356
    node would then be impossible to remove.
4357

4358
    """
4359
    return {
4360
      "OP_TARGET": self.op.node_name,
4361
      "NODE_NAME": self.op.node_name,
4362
      }
4363

    
4364
  def BuildHooksNodes(self):
4365
    """Build hooks nodes.
4366

4367
    """
4368
    all_nodes = self.cfg.GetNodeList()
4369
    try:
4370
      all_nodes.remove(self.op.node_name)
4371
    except ValueError:
4372
      logging.warning("Node '%s', which is about to be removed, was not found"
4373
                      " in the list of all nodes", self.op.node_name)
4374
    return (all_nodes, all_nodes)
4375

    
4376
  def CheckPrereq(self):
4377
    """Check prerequisites.
4378

4379
    This checks:
4380
     - the node exists in the configuration
4381
     - it does not have primary or secondary instances
4382
     - it's not the master
4383

4384
    Any errors are signaled by raising errors.OpPrereqError.
4385

4386
    """
4387
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4388
    node = self.cfg.GetNodeInfo(self.op.node_name)
4389
    assert node is not None
4390

    
4391
    masternode = self.cfg.GetMasterNode()
4392
    if node.name == masternode:
4393
      raise errors.OpPrereqError("Node is the master node, failover to another"
4394
                                 " node is required", errors.ECODE_INVAL)
4395

    
4396
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4397
      if node.name in instance.all_nodes:
4398
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4399
                                   " please remove first" % instance_name,
4400
                                   errors.ECODE_INVAL)
4401
    self.op.node_name = node.name
4402
    self.node = node
4403

    
4404
  def Exec(self, feedback_fn):
4405
    """Removes the node from the cluster.
4406

4407
    """
4408
    node = self.node
4409
    logging.info("Stopping the node daemon and removing configs from node %s",
4410
                 node.name)
4411

    
4412
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4413

    
4414
    # Promote nodes to master candidate as needed
4415
    _AdjustCandidatePool(self, exceptions=[node.name])
4416
    self.context.RemoveNode(node.name)
4417

    
4418
    # Run post hooks on the node before it's removed
4419
    _RunPostHook(self, node.name)
4420

    
4421
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4422
    msg = result.fail_msg
4423
    if msg:
4424
      self.LogWarning("Errors encountered on the remote node while leaving"
4425
                      " the cluster: %s", msg)
4426

    
4427
    # Remove node from our /etc/hosts
4428
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4429
      master_node = self.cfg.GetMasterNode()
4430
      result = self.rpc.call_etc_hosts_modify(master_node,
4431
                                              constants.ETC_HOSTS_REMOVE,
4432
                                              node.name, None)
4433
      result.Raise("Can't update hosts file with new host data")
4434
      _RedistributeAncillaryFiles(self)
4435

    
4436

    
4437
class _NodeQuery(_QueryBase):
4438
  FIELDS = query.NODE_FIELDS
4439

    
4440
  def ExpandNames(self, lu):
4441
    lu.needed_locks = {}
4442
    lu.share_locks = _ShareAll()
4443

    
4444
    if self.names:
4445
      self.wanted = _GetWantedNodes(lu, self.names)
4446
    else:
4447
      self.wanted = locking.ALL_SET
4448

    
4449
    self.do_locking = (self.use_locking and
4450
                       query.NQ_LIVE in self.requested_data)
4451

    
4452
    if self.do_locking:
4453
      # If any non-static field is requested we need to lock the nodes
4454
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4455

    
4456
  def DeclareLocks(self, lu, level):
4457
    pass
4458

    
4459
  def _GetQueryData(self, lu):
4460
    """Computes the list of nodes and their attributes.
4461

4462
    """
4463
    all_info = lu.cfg.GetAllNodesInfo()
4464

    
4465
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4466

    
4467
    # Gather data as requested
4468
    if query.NQ_LIVE in self.requested_data:
4469
      # filter out non-vm_capable nodes
4470
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4471

    
4472
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4473
                                        lu.cfg.GetHypervisorType())
4474
      live_data = dict((name, nresult.payload)
4475
                       for (name, nresult) in node_data.items()
4476
                       if not nresult.fail_msg and nresult.payload)
4477
    else:
4478
      live_data = None
4479

    
4480
    if query.NQ_INST in self.requested_data:
4481
      node_to_primary = dict([(name, set()) for name in nodenames])
4482
      node_to_secondary = dict([(name, set()) for name in nodenames])
4483

    
4484
      inst_data = lu.cfg.GetAllInstancesInfo()
4485

    
4486
      for inst in inst_data.values():
4487
        if inst.primary_node in node_to_primary:
4488
          node_to_primary[inst.primary_node].add(inst.name)
4489
        for secnode in inst.secondary_nodes:
4490
          if secnode in node_to_secondary:
4491
            node_to_secondary[secnode].add(inst.name)
4492
    else:
4493
      node_to_primary = None
4494
      node_to_secondary = None
4495

    
4496
    if query.NQ_OOB in self.requested_data:
4497
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4498
                         for name, node in all_info.iteritems())
4499
    else:
4500
      oob_support = None
4501

    
4502
    if query.NQ_GROUP in self.requested_data:
4503
      groups = lu.cfg.GetAllNodeGroupsInfo()
4504
    else:
4505
      groups = {}
4506

    
4507
    return query.NodeQueryData([all_info[name] for name in nodenames],
4508
                               live_data, lu.cfg.GetMasterNode(),
4509
                               node_to_primary, node_to_secondary, groups,
4510
                               oob_support, lu.cfg.GetClusterInfo())
4511

    
4512

    
4513
class LUNodeQuery(NoHooksLU):
4514
  """Logical unit for querying nodes.
4515

4516
  """
4517
  # pylint: disable=W0142
4518
  REQ_BGL = False
4519

    
4520
  def CheckArguments(self):
4521
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4522
                         self.op.output_fields, self.op.use_locking)
4523

    
4524
  def ExpandNames(self):
4525
    self.nq.ExpandNames(self)
4526

    
4527
  def Exec(self, feedback_fn):
4528
    return self.nq.OldStyleQuery(self)
4529

    
4530

    
4531
class LUNodeQueryvols(NoHooksLU):
4532
  """Logical unit for getting volumes on node(s).
4533

4534
  """
4535
  REQ_BGL = False
4536
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4537
  _FIELDS_STATIC = utils.FieldSet("node")
4538

    
4539
  def CheckArguments(self):
4540
    _CheckOutputFields(static=self._FIELDS_STATIC,
4541
                       dynamic=self._FIELDS_DYNAMIC,
4542
                       selected=self.op.output_fields)
4543

    
4544
  def ExpandNames(self):
4545
    self.needed_locks = {}
4546
    self.share_locks[locking.LEVEL_NODE] = 1
4547
    if not self.op.nodes:
4548
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4549
    else:
4550
      self.needed_locks[locking.LEVEL_NODE] = \
4551
        _GetWantedNodes(self, self.op.nodes)
4552

    
4553
  def Exec(self, feedback_fn):
4554
    """Computes the list of nodes and their attributes.
4555

4556
    """
4557
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4558
    volumes = self.rpc.call_node_volumes(nodenames)
4559

    
4560
    ilist = self.cfg.GetAllInstancesInfo()
4561
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4562

    
4563
    output = []
4564
    for node in nodenames:
4565
      nresult = volumes[node]
4566
      if nresult.offline:
4567
        continue
4568
      msg = nresult.fail_msg
4569
      if msg:
4570
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4571
        continue
4572

    
4573
      node_vols = sorted(nresult.payload,
4574
                         key=operator.itemgetter("dev"))
4575

    
4576
      for vol in node_vols:
4577
        node_output = []
4578
        for field in self.op.output_fields:
4579
          if field == "node":
4580
            val = node
4581
          elif field == "phys":
4582
            val = vol["dev"]
4583
          elif field == "vg":
4584
            val = vol["vg"]
4585
          elif field == "name":
4586
            val = vol["name"]
4587
          elif field == "size":
4588
            val = int(float(vol["size"]))
4589
          elif field == "instance":
4590
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4591
          else:
4592
            raise errors.ParameterError(field)
4593
          node_output.append(str(val))
4594

    
4595
        output.append(node_output)
4596

    
4597
    return output
4598

    
4599

    
4600
class LUNodeQueryStorage(NoHooksLU):
4601
  """Logical unit for getting information on storage units on node(s).
4602

4603
  """
4604
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4605
  REQ_BGL = False
4606

    
4607
  def CheckArguments(self):
4608
    _CheckOutputFields(static=self._FIELDS_STATIC,
4609
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4610
                       selected=self.op.output_fields)
4611

    
4612
  def ExpandNames(self):
4613
    self.needed_locks = {}
4614
    self.share_locks[locking.LEVEL_NODE] = 1
4615

    
4616
    if self.op.nodes:
4617
      self.needed_locks[locking.LEVEL_NODE] = \
4618
        _GetWantedNodes(self, self.op.nodes)
4619
    else:
4620
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4621

    
4622
  def Exec(self, feedback_fn):
4623
    """Computes the list of nodes and their attributes.
4624

4625
    """
4626
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4627

    
4628
    # Always get name to sort by
4629
    if constants.SF_NAME in self.op.output_fields:
4630
      fields = self.op.output_fields[:]
4631
    else:
4632
      fields = [constants.SF_NAME] + self.op.output_fields
4633

    
4634
    # Never ask for node or type as it's only known to the LU
4635
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4636
      while extra in fields:
4637
        fields.remove(extra)
4638

    
4639
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4640
    name_idx = field_idx[constants.SF_NAME]
4641

    
4642
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4643
    data = self.rpc.call_storage_list(self.nodes,
4644
                                      self.op.storage_type, st_args,
4645
                                      self.op.name, fields)
4646

    
4647
    result = []
4648

    
4649
    for node in utils.NiceSort(self.nodes):
4650
      nresult = data[node]
4651
      if nresult.offline:
4652
        continue
4653

    
4654
      msg = nresult.fail_msg
4655
      if msg:
4656
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4657
        continue
4658

    
4659
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4660

    
4661
      for name in utils.NiceSort(rows.keys()):
4662
        row = rows[name]
4663

    
4664
        out = []
4665

    
4666
        for field in self.op.output_fields:
4667
          if field == constants.SF_NODE:
4668
            val = node
4669
          elif field == constants.SF_TYPE:
4670
            val = self.op.storage_type
4671
          elif field in field_idx:
4672
            val = row[field_idx[field]]
4673
          else:
4674
            raise errors.ParameterError(field)
4675

    
4676
          out.append(val)
4677

    
4678
        result.append(out)
4679

    
4680
    return result
4681

    
4682

    
4683
class _InstanceQuery(_QueryBase):
4684
  FIELDS = query.INSTANCE_FIELDS
4685

    
4686
  def ExpandNames(self, lu):
4687
    lu.needed_locks = {}
4688
    lu.share_locks = _ShareAll()
4689

    
4690
    if self.names:
4691
      self.wanted = _GetWantedInstances(lu, self.names)
4692
    else:
4693
      self.wanted = locking.ALL_SET
4694

    
4695
    self.do_locking = (self.use_locking and
4696
                       query.IQ_LIVE in self.requested_data)
4697
    if self.do_locking:
4698
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4699
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4700
      lu.needed_locks[locking.LEVEL_NODE] = []
4701
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4702

    
4703
    self.do_grouplocks = (self.do_locking and
4704
                          query.IQ_NODES in self.requested_data)
4705

    
4706
  def DeclareLocks(self, lu, level):
4707
    if self.do_locking:
4708
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4709
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4710

    
4711
        # Lock all groups used by instances optimistically; this requires going
4712
        # via the node before it's locked, requiring verification later on
4713
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4714
          set(group_uuid
4715
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4716
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4717
      elif level == locking.LEVEL_NODE:
4718
        lu._LockInstancesNodes() # pylint: disable=W0212
4719

    
4720
  @staticmethod
4721
  def _CheckGroupLocks(lu):
4722
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4723
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4724

    
4725
    # Check if node groups for locked instances are still correct
4726
    for instance_name in owned_instances:
4727
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4728

    
4729
  def _GetQueryData(self, lu):
4730
    """Computes the list of instances and their attributes.
4731

4732
    """
4733
    if self.do_grouplocks:
4734
      self._CheckGroupLocks(lu)
4735

    
4736
    cluster = lu.cfg.GetClusterInfo()
4737
    all_info = lu.cfg.GetAllInstancesInfo()
4738

    
4739
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4740

    
4741
    instance_list = [all_info[name] for name in instance_names]
4742
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4743
                                        for inst in instance_list)))
4744
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4745
    bad_nodes = []
4746
    offline_nodes = []
4747
    wrongnode_inst = set()
4748

    
4749
    # Gather data as requested
4750
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4751
      live_data = {}
4752
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4753
      for name in nodes:
4754
        result = node_data[name]
4755
        if result.offline:
4756
          # offline nodes will be in both lists
4757
          assert result.fail_msg
4758
          offline_nodes.append(name)
4759
        if result.fail_msg:
4760
          bad_nodes.append(name)
4761
        elif result.payload:
4762
          for inst in result.payload:
4763
            if inst in all_info:
4764
              if all_info[inst].primary_node == name:
4765
                live_data.update(result.payload)
4766
              else:
4767
                wrongnode_inst.add(inst)
4768
            else:
4769
              # orphan instance; we don't list it here as we don't
4770
              # handle this case yet in the output of instance listing
4771
              logging.warning("Orphan instance '%s' found on node %s",
4772
                              inst, name)
4773
        # else no instance is alive
4774
    else:
4775
      live_data = {}
4776

    
4777
    if query.IQ_DISKUSAGE in self.requested_data:
4778
      disk_usage = dict((inst.name,
4779
                         _ComputeDiskSize(inst.disk_template,
4780
                                          [{constants.IDISK_SIZE: disk.size}
4781
                                           for disk in inst.disks]))
4782
                        for inst in instance_list)
4783
    else:
4784
      disk_usage = None
4785

    
4786
    if query.IQ_CONSOLE in self.requested_data:
4787
      consinfo = {}
4788
      for inst in instance_list:
4789
        if inst.name in live_data:
4790
          # Instance is running
4791
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4792
        else:
4793
          consinfo[inst.name] = None
4794
      assert set(consinfo.keys()) == set(instance_names)
4795
    else:
4796
      consinfo = None
4797

    
4798
    if query.IQ_NODES in self.requested_data:
4799
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4800
                                            instance_list)))
4801
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4802
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4803
                    for uuid in set(map(operator.attrgetter("group"),
4804
                                        nodes.values())))
4805
    else:
4806
      nodes = None
4807
      groups = None
4808

    
4809
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4810
                                   disk_usage, offline_nodes, bad_nodes,
4811
                                   live_data, wrongnode_inst, consinfo,
4812
                                   nodes, groups)
4813

    
4814

    
4815
class LUQuery(NoHooksLU):
4816
  """Query for resources/items of a certain kind.
4817

4818
  """
4819
  # pylint: disable=W0142
4820
  REQ_BGL = False
4821

    
4822
  def CheckArguments(self):
4823
    qcls = _GetQueryImplementation(self.op.what)
4824

    
4825
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4826

    
4827
  def ExpandNames(self):
4828
    self.impl.ExpandNames(self)
4829

    
4830
  def DeclareLocks(self, level):
4831
    self.impl.DeclareLocks(self, level)
4832

    
4833
  def Exec(self, feedback_fn):
4834
    return self.impl.NewStyleQuery(self)
4835

    
4836

    
4837
class LUQueryFields(NoHooksLU):
4838
  """Query for resources/items of a certain kind.
4839

4840
  """
4841
  # pylint: disable=W0142
4842
  REQ_BGL = False
4843

    
4844
  def CheckArguments(self):
4845
    self.qcls = _GetQueryImplementation(self.op.what)
4846

    
4847
  def ExpandNames(self):
4848
    self.needed_locks = {}
4849

    
4850
  def Exec(self, feedback_fn):
4851
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4852

    
4853

    
4854
class LUNodeModifyStorage(NoHooksLU):
4855
  """Logical unit for modifying a storage volume on a node.
4856

4857
  """
4858
  REQ_BGL = False
4859

    
4860
  def CheckArguments(self):
4861
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4862

    
4863
    storage_type = self.op.storage_type
4864

    
4865
    try:
4866
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4867
    except KeyError:
4868
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4869
                                 " modified" % storage_type,
4870
                                 errors.ECODE_INVAL)
4871

    
4872
    diff = set(self.op.changes.keys()) - modifiable
4873
    if diff:
4874
      raise errors.OpPrereqError("The following fields can not be modified for"
4875
                                 " storage units of type '%s': %r" %
4876
                                 (storage_type, list(diff)),
4877
                                 errors.ECODE_INVAL)
4878

    
4879
  def ExpandNames(self):
4880
    self.needed_locks = {
4881
      locking.LEVEL_NODE: self.op.node_name,
4882
      }
4883

    
4884
  def Exec(self, feedback_fn):
4885
    """Computes the list of nodes and their attributes.
4886

4887
    """
4888
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4889
    result = self.rpc.call_storage_modify(self.op.node_name,
4890
                                          self.op.storage_type, st_args,
4891
                                          self.op.name, self.op.changes)
4892
    result.Raise("Failed to modify storage unit '%s' on %s" %
4893
                 (self.op.name, self.op.node_name))
4894

    
4895

    
4896
class LUNodeAdd(LogicalUnit):
4897
  """Logical unit for adding node to the cluster.
4898

4899
  """
4900
  HPATH = "node-add"
4901
  HTYPE = constants.HTYPE_NODE
4902
  _NFLAGS = ["master_capable", "vm_capable"]
4903

    
4904
  def CheckArguments(self):
4905
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4906
    # validate/normalize the node name
4907
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4908
                                         family=self.primary_ip_family)
4909
    self.op.node_name = self.hostname.name
4910

    
4911
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4912
      raise errors.OpPrereqError("Cannot readd the master node",
4913
                                 errors.ECODE_STATE)
4914

    
4915
    if self.op.readd and self.op.group:
4916
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4917
                                 " being readded", errors.ECODE_INVAL)
4918

    
4919
  def BuildHooksEnv(self):
4920
    """Build hooks env.
4921

4922
    This will run on all nodes before, and on all nodes + the new node after.
4923

4924
    """
4925
    return {
4926
      "OP_TARGET": self.op.node_name,
4927
      "NODE_NAME": self.op.node_name,
4928
      "NODE_PIP": self.op.primary_ip,
4929
      "NODE_SIP": self.op.secondary_ip,
4930
      "MASTER_CAPABLE": str(self.op.master_capable),
4931
      "VM_CAPABLE": str(self.op.vm_capable),
4932
      }
4933

    
4934
  def BuildHooksNodes(self):
4935
    """Build hooks nodes.
4936

4937
    """
4938
    # Exclude added node
4939
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4940
    post_nodes = pre_nodes + [self.op.node_name, ]
4941

    
4942
    return (pre_nodes, post_nodes)
4943

    
4944
  def CheckPrereq(self):
4945
    """Check prerequisites.
4946

4947
    This checks:
4948
     - the new node is not already in the config
4949
     - it is resolvable
4950
     - its parameters (single/dual homed) matches the cluster
4951

4952
    Any errors are signaled by raising errors.OpPrereqError.
4953

4954
    """
4955
    cfg = self.cfg
4956
    hostname = self.hostname
4957
    node = hostname.name
4958
    primary_ip = self.op.primary_ip = hostname.ip
4959
    if self.op.secondary_ip is None:
4960
      if self.primary_ip_family == netutils.IP6Address.family:
4961
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4962
                                   " IPv4 address must be given as secondary",
4963
                                   errors.ECODE_INVAL)
4964
      self.op.secondary_ip = primary_ip
4965

    
4966
    secondary_ip = self.op.secondary_ip
4967
    if not netutils.IP4Address.IsValid(secondary_ip):
4968
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4969
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4970

    
4971
    node_list = cfg.GetNodeList()
4972
    if not self.op.readd and node in node_list:
4973
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4974
                                 node, errors.ECODE_EXISTS)
4975
    elif self.op.readd and node not in node_list:
4976
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4977
                                 errors.ECODE_NOENT)
4978

    
4979
    self.changed_primary_ip = False
4980

    
4981
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4982
      if self.op.readd and node == existing_node_name:
4983
        if existing_node.secondary_ip != secondary_ip:
4984
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4985
                                     " address configuration as before",
4986
                                     errors.ECODE_INVAL)
4987
        if existing_node.primary_ip != primary_ip:
4988
          self.changed_primary_ip = True
4989

    
4990
        continue
4991

    
4992
      if (existing_node.primary_ip == primary_ip or
4993
          existing_node.secondary_ip == primary_ip or
4994
          existing_node.primary_ip == secondary_ip or
4995
          existing_node.secondary_ip == secondary_ip):
4996
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4997
                                   " existing node %s" % existing_node.name,
4998
                                   errors.ECODE_NOTUNIQUE)
4999

    
5000
    # After this 'if' block, None is no longer a valid value for the
5001
    # _capable op attributes
5002
    if self.op.readd:
5003
      old_node = self.cfg.GetNodeInfo(node)
5004
      assert old_node is not None, "Can't retrieve locked node %s" % node
5005
      for attr in self._NFLAGS:
5006
        if getattr(self.op, attr) is None:
5007
          setattr(self.op, attr, getattr(old_node, attr))
5008
    else:
5009
      for attr in self._NFLAGS:
5010
        if getattr(self.op, attr) is None:
5011
          setattr(self.op, attr, True)
5012

    
5013
    if self.op.readd and not self.op.vm_capable:
5014
      pri, sec = cfg.GetNodeInstances(node)
5015
      if pri or sec:
5016
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5017
                                   " flag set to false, but it already holds"
5018
                                   " instances" % node,
5019
                                   errors.ECODE_STATE)
5020

    
5021
    # check that the type of the node (single versus dual homed) is the
5022
    # same as for the master
5023
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5024
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5025
    newbie_singlehomed = secondary_ip == primary_ip
5026
    if master_singlehomed != newbie_singlehomed:
5027
      if master_singlehomed:
5028
        raise errors.OpPrereqError("The master has no secondary ip but the"
5029
                                   " new node has one",
5030
                                   errors.ECODE_INVAL)
5031
      else:
5032
        raise errors.OpPrereqError("The master has a secondary ip but the"
5033
                                   " new node doesn't have one",
5034
                                   errors.ECODE_INVAL)
5035

    
5036
    # checks reachability
5037
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5038
      raise errors.OpPrereqError("Node not reachable by ping",
5039
                                 errors.ECODE_ENVIRON)
5040

    
5041
    if not newbie_singlehomed:
5042
      # check reachability from my secondary ip to newbie's secondary ip
5043
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5044
                           source=myself.secondary_ip):
5045
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5046
                                   " based ping to node daemon port",
5047
                                   errors.ECODE_ENVIRON)
5048

    
5049
    if self.op.readd:
5050
      exceptions = [node]
5051
    else:
5052
      exceptions = []
5053

    
5054
    if self.op.master_capable:
5055
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5056
    else:
5057
      self.master_candidate = False
5058

    
5059
    if self.op.readd:
5060
      self.new_node = old_node
5061
    else:
5062
      node_group = cfg.LookupNodeGroup(self.op.group)
5063
      self.new_node = objects.Node(name=node,
5064
                                   primary_ip=primary_ip,
5065
                                   secondary_ip=secondary_ip,
5066
                                   master_candidate=self.master_candidate,
5067
                                   offline=False, drained=False,
5068
                                   group=node_group)
5069

    
5070
    if self.op.ndparams:
5071
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5072

    
5073
  def Exec(self, feedback_fn):
5074
    """Adds the new node to the cluster.
5075

5076
    """
5077
    new_node = self.new_node
5078
    node = new_node.name
5079

    
5080
    # We adding a new node so we assume it's powered
5081
    new_node.powered = True
5082

    
5083
    # for re-adds, reset the offline/drained/master-candidate flags;
5084
    # we need to reset here, otherwise offline would prevent RPC calls
5085
    # later in the procedure; this also means that if the re-add
5086
    # fails, we are left with a non-offlined, broken node
5087
    if self.op.readd:
5088
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5089
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5090
      # if we demote the node, we do cleanup later in the procedure
5091
      new_node.master_candidate = self.master_candidate
5092
      if self.changed_primary_ip:
5093
        new_node.primary_ip = self.op.primary_ip
5094

    
5095
    # copy the master/vm_capable flags
5096
    for attr in self._NFLAGS:
5097
      setattr(new_node, attr, getattr(self.op, attr))
5098

    
5099
    # notify the user about any possible mc promotion
5100
    if new_node.master_candidate:
5101
      self.LogInfo("Node will be a master candidate")
5102

    
5103
    if self.op.ndparams:
5104
      new_node.ndparams = self.op.ndparams
5105
    else:
5106
      new_node.ndparams = {}
5107

    
5108
    # check connectivity
5109
    result = self.rpc.call_version([node])[node]
5110
    result.Raise("Can't get version information from node %s" % node)
5111
    if constants.PROTOCOL_VERSION == result.payload:
5112
      logging.info("Communication to node %s fine, sw version %s match",
5113
                   node, result.payload)
5114
    else:
5115
      raise errors.OpExecError("Version mismatch master version %s,"
5116
                               " node version %s" %
5117
                               (constants.PROTOCOL_VERSION, result.payload))
5118

    
5119
    # Add node to our /etc/hosts, and add key to known_hosts
5120
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5121
      master_node = self.cfg.GetMasterNode()
5122
      result = self.rpc.call_etc_hosts_modify(master_node,
5123
                                              constants.ETC_HOSTS_ADD,
5124
                                              self.hostname.name,
5125
                                              self.hostname.ip)
5126
      result.Raise("Can't update hosts file with new host data")
5127

    
5128
    if new_node.secondary_ip != new_node.primary_ip:
5129
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5130
                               False)
5131

    
5132
    node_verify_list = [self.cfg.GetMasterNode()]
5133
    node_verify_param = {
5134
      constants.NV_NODELIST: ([node], {}),
5135
      # TODO: do a node-net-test as well?
5136
    }
5137

    
5138
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5139
                                       self.cfg.GetClusterName())
5140
    for verifier in node_verify_list:
5141
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5142
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5143
      if nl_payload:
5144
        for failed in nl_payload:
5145
          feedback_fn("ssh/hostname verification failed"
5146
                      " (checking from %s): %s" %
5147
                      (verifier, nl_payload[failed]))
5148
        raise errors.OpExecError("ssh/hostname verification failed")
5149

    
5150
    if self.op.readd:
5151
      _RedistributeAncillaryFiles(self)
5152
      self.context.ReaddNode(new_node)
5153
      # make sure we redistribute the config
5154
      self.cfg.Update(new_node, feedback_fn)
5155
      # and make sure the new node will not have old files around
5156
      if not new_node.master_candidate:
5157
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5158
        msg = result.fail_msg
5159
        if msg:
5160
          self.LogWarning("Node failed to demote itself from master"
5161
                          " candidate status: %s" % msg)
5162
    else:
5163
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5164
                                  additional_vm=self.op.vm_capable)
5165
      self.context.AddNode(new_node, self.proc.GetECId())
5166

    
5167

    
5168
class LUNodeSetParams(LogicalUnit):
5169
  """Modifies the parameters of a node.
5170

5171
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5172
      to the node role (as _ROLE_*)
5173
  @cvar _R2F: a dictionary from node role to tuples of flags
5174
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5175

5176
  """
5177
  HPATH = "node-modify"
5178
  HTYPE = constants.HTYPE_NODE
5179
  REQ_BGL = False
5180
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5181
  _F2R = {
5182
    (True, False, False): _ROLE_CANDIDATE,
5183
    (False, True, False): _ROLE_DRAINED,
5184
    (False, False, True): _ROLE_OFFLINE,
5185
    (False, False, False): _ROLE_REGULAR,
5186
    }
5187
  _R2F = dict((v, k) for k, v in _F2R.items())
5188
  _FLAGS = ["master_candidate", "drained", "offline"]
5189

    
5190
  def CheckArguments(self):
5191
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5192
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5193
                self.op.master_capable, self.op.vm_capable,
5194
                self.op.secondary_ip, self.op.ndparams]
5195
    if all_mods.count(None) == len(all_mods):
5196
      raise errors.OpPrereqError("Please pass at least one modification",
5197
                                 errors.ECODE_INVAL)
5198
    if all_mods.count(True) > 1:
5199
      raise errors.OpPrereqError("Can't set the node into more than one"
5200
                                 " state at the same time",
5201
                                 errors.ECODE_INVAL)
5202

    
5203
    # Boolean value that tells us whether we might be demoting from MC
5204
    self.might_demote = (self.op.master_candidate == False or
5205
                         self.op.offline == True or
5206
                         self.op.drained == True or
5207
                         self.op.master_capable == False)
5208

    
5209
    if self.op.secondary_ip:
5210
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5211
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5212
                                   " address" % self.op.secondary_ip,
5213
                                   errors.ECODE_INVAL)
5214

    
5215
    self.lock_all = self.op.auto_promote and self.might_demote
5216
    self.lock_instances = self.op.secondary_ip is not None
5217

    
5218
  def ExpandNames(self):
5219
    if self.lock_all:
5220
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5221
    else:
5222
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5223

    
5224
    if self.lock_instances:
5225
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5226

    
5227
  def DeclareLocks(self, level):
5228
    # If we have locked all instances, before waiting to lock nodes, release
5229
    # all the ones living on nodes unrelated to the current operation.
5230
    if level == locking.LEVEL_NODE and self.lock_instances:
5231
      self.affected_instances = []
5232
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5233
        instances_keep = []
5234

    
5235
        # Build list of instances to release
5236
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5237
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5238
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5239
              self.op.node_name in instance.all_nodes):
5240
            instances_keep.append(instance_name)
5241
            self.affected_instances.append(instance)
5242

    
5243
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5244

    
5245
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5246
                set(instances_keep))
5247

    
5248
  def BuildHooksEnv(self):
5249
    """Build hooks env.
5250

5251
    This runs on the master node.
5252

5253
    """
5254
    return {
5255
      "OP_TARGET": self.op.node_name,
5256
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5257
      "OFFLINE": str(self.op.offline),
5258
      "DRAINED": str(self.op.drained),
5259
      "MASTER_CAPABLE": str(self.op.master_capable),
5260
      "VM_CAPABLE": str(self.op.vm_capable),
5261
      }
5262

    
5263
  def BuildHooksNodes(self):
5264
    """Build hooks nodes.
5265

5266
    """
5267
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5268
    return (nl, nl)
5269

    
5270
  def CheckPrereq(self):
5271
    """Check prerequisites.
5272

5273
    This only checks the instance list against the existing names.
5274

5275
    """
5276
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5277

    
5278
    if (self.op.master_candidate is not None or
5279
        self.op.drained is not None or
5280
        self.op.offline is not None):
5281
      # we can't change the master's node flags
5282
      if self.op.node_name == self.cfg.GetMasterNode():
5283
        raise errors.OpPrereqError("The master role can be changed"
5284
                                   " only via master-failover",
5285
                                   errors.ECODE_INVAL)
5286

    
5287
    if self.op.master_candidate and not node.master_capable:
5288
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5289
                                 " it a master candidate" % node.name,
5290
                                 errors.ECODE_STATE)
5291

    
5292
    if self.op.vm_capable == False:
5293
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5294
      if ipri or isec:
5295
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5296
                                   " the vm_capable flag" % node.name,
5297
                                   errors.ECODE_STATE)
5298

    
5299
    if node.master_candidate and self.might_demote and not self.lock_all:
5300
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5301
      # check if after removing the current node, we're missing master
5302
      # candidates
5303
      (mc_remaining, mc_should, _) = \
5304
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5305
      if mc_remaining < mc_should:
5306
        raise errors.OpPrereqError("Not enough master candidates, please"
5307
                                   " pass auto promote option to allow"
5308
                                   " promotion", errors.ECODE_STATE)
5309

    
5310
    self.old_flags = old_flags = (node.master_candidate,
5311
                                  node.drained, node.offline)
5312
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5313
    self.old_role = old_role = self._F2R[old_flags]
5314

    
5315
    # Check for ineffective changes
5316
    for attr in self._FLAGS:
5317
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5318
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5319
        setattr(self.op, attr, None)
5320

    
5321
    # Past this point, any flag change to False means a transition
5322
    # away from the respective state, as only real changes are kept
5323

    
5324
    # TODO: We might query the real power state if it supports OOB
5325
    if _SupportsOob(self.cfg, node):
5326
      if self.op.offline is False and not (node.powered or
5327
                                           self.op.powered == True):
5328
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5329
                                    " offline status can be reset") %
5330
                                   self.op.node_name)
5331
    elif self.op.powered is not None:
5332
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5333
                                  " as it does not support out-of-band"
5334
                                  " handling") % self.op.node_name)
5335

    
5336
    # If we're being deofflined/drained, we'll MC ourself if needed
5337
    if (self.op.drained == False or self.op.offline == False or
5338
        (self.op.master_capable and not node.master_capable)):
5339
      if _DecideSelfPromotion(self):
5340
        self.op.master_candidate = True
5341
        self.LogInfo("Auto-promoting node to master candidate")
5342

    
5343
    # If we're no longer master capable, we'll demote ourselves from MC
5344
    if self.op.master_capable == False and node.master_candidate:
5345
      self.LogInfo("Demoting from master candidate")
5346
      self.op.master_candidate = False
5347

    
5348
    # Compute new role
5349
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5350
    if self.op.master_candidate:
5351
      new_role = self._ROLE_CANDIDATE
5352
    elif self.op.drained:
5353
      new_role = self._ROLE_DRAINED
5354
    elif self.op.offline:
5355
      new_role = self._ROLE_OFFLINE
5356
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5357
      # False is still in new flags, which means we're un-setting (the
5358
      # only) True flag
5359
      new_role = self._ROLE_REGULAR
5360
    else: # no new flags, nothing, keep old role
5361
      new_role = old_role
5362

    
5363
    self.new_role = new_role
5364

    
5365
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5366
      # Trying to transition out of offline status
5367
      result = self.rpc.call_version([node.name])[node.name]
5368
      if result.fail_msg:
5369
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5370
                                   " to report its version: %s" %
5371
                                   (node.name, result.fail_msg),
5372
                                   errors.ECODE_STATE)
5373
      else:
5374
        self.LogWarning("Transitioning node from offline to online state"
5375
                        " without using re-add. Please make sure the node"
5376
                        " is healthy!")
5377

    
5378
    if self.op.secondary_ip:
5379
      # Ok even without locking, because this can't be changed by any LU
5380
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5381
      master_singlehomed = master.secondary_ip == master.primary_ip
5382
      if master_singlehomed and self.op.secondary_ip:
5383
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5384
                                   " homed cluster", errors.ECODE_INVAL)
5385

    
5386
      if node.offline:
5387
        if self.affected_instances:
5388
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5389
                                     " node has instances (%s) configured"
5390
                                     " to use it" % self.affected_instances)
5391
      else:
5392
        # On online nodes, check that no instances are running, and that
5393
        # the node has the new ip and we can reach it.
5394
        for instance in self.affected_instances:
5395
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5396

    
5397
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5398
        if master.name != node.name:
5399
          # check reachability from master secondary ip to new secondary ip
5400
          if not netutils.TcpPing(self.op.secondary_ip,
5401
                                  constants.DEFAULT_NODED_PORT,
5402
                                  source=master.secondary_ip):
5403
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5404
                                       " based ping to node daemon port",
5405
                                       errors.ECODE_ENVIRON)
5406

    
5407
    if self.op.ndparams:
5408
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5409
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5410
      self.new_ndparams = new_ndparams
5411

    
5412
  def Exec(self, feedback_fn):
5413
    """Modifies a node.
5414

5415
    """
5416
    node = self.node
5417
    old_role = self.old_role
5418
    new_role = self.new_role
5419

    
5420
    result = []
5421

    
5422
    if self.op.ndparams:
5423
      node.ndparams = self.new_ndparams
5424

    
5425
    if self.op.powered is not None:
5426
      node.powered = self.op.powered
5427

    
5428
    for attr in ["master_capable", "vm_capable"]:
5429
      val = getattr(self.op, attr)
5430
      if val is not None:
5431
        setattr(node, attr, val)
5432
        result.append((attr, str(val)))
5433

    
5434
    if new_role != old_role:
5435
      # Tell the node to demote itself, if no longer MC and not offline
5436
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5437
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5438
        if msg:
5439
          self.LogWarning("Node failed to demote itself: %s", msg)
5440

    
5441
      new_flags = self._R2F[new_role]
5442
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5443
        if of != nf:
5444
          result.append((desc, str(nf)))
5445
      (node.master_candidate, node.drained, node.offline) = new_flags
5446

    
5447
      # we locked all nodes, we adjust the CP before updating this node
5448
      if self.lock_all:
5449
        _AdjustCandidatePool(self, [node.name])
5450

    
5451
    if self.op.secondary_ip:
5452
      node.secondary_ip = self.op.secondary_ip
5453
      result.append(("secondary_ip", self.op.secondary_ip))
5454

    
5455
    # this will trigger configuration file update, if needed
5456
    self.cfg.Update(node, feedback_fn)
5457

    
5458
    # this will trigger job queue propagation or cleanup if the mc
5459
    # flag changed
5460
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5461
      self.context.ReaddNode(node)
5462

    
5463
    return result
5464

    
5465

    
5466
class LUNodePowercycle(NoHooksLU):
5467
  """Powercycles a node.
5468

5469
  """
5470
  REQ_BGL = False
5471

    
5472
  def CheckArguments(self):
5473
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5474
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5475
      raise errors.OpPrereqError("The node is the master and the force"
5476
                                 " parameter was not set",
5477
                                 errors.ECODE_INVAL)
5478

    
5479
  def ExpandNames(self):
5480
    """Locking for PowercycleNode.
5481

5482
    This is a last-resort option and shouldn't block on other
5483
    jobs. Therefore, we grab no locks.
5484

5485
    """
5486
    self.needed_locks = {}
5487

    
5488
  def Exec(self, feedback_fn):
5489
    """Reboots a node.
5490

5491
    """
5492
    result = self.rpc.call_node_powercycle(self.op.node_name,
5493
                                           self.cfg.GetHypervisorType())
5494
    result.Raise("Failed to schedule the reboot")
5495
    return result.payload
5496

    
5497

    
5498
class LUClusterQuery(NoHooksLU):
5499
  """Query cluster configuration.
5500

5501
  """
5502
  REQ_BGL = False
5503

    
5504
  def ExpandNames(self):
5505
    self.needed_locks = {}
5506

    
5507
  def Exec(self, feedback_fn):
5508
    """Return cluster config.
5509

5510
    """
5511
    cluster = self.cfg.GetClusterInfo()
5512
    os_hvp = {}
5513

    
5514
    # Filter just for enabled hypervisors
5515
    for os_name, hv_dict in cluster.os_hvp.items():
5516
      os_hvp[os_name] = {}
5517
      for hv_name, hv_params in hv_dict.items():
5518
        if hv_name in cluster.enabled_hypervisors:
5519
          os_hvp[os_name][hv_name] = hv_params
5520

    
5521
    # Convert ip_family to ip_version
5522
    primary_ip_version = constants.IP4_VERSION
5523
    if cluster.primary_ip_family == netutils.IP6Address.family:
5524
      primary_ip_version = constants.IP6_VERSION
5525

    
5526
    result = {
5527
      "software_version": constants.RELEASE_VERSION,
5528
      "protocol_version": constants.PROTOCOL_VERSION,
5529
      "config_version": constants.CONFIG_VERSION,
5530
      "os_api_version": max(constants.OS_API_VERSIONS),
5531
      "export_version": constants.EXPORT_VERSION,
5532
      "architecture": (platform.architecture()[0], platform.machine()),
5533
      "name": cluster.cluster_name,
5534
      "master": cluster.master_node,
5535
      "default_hypervisor": cluster.enabled_hypervisors[0],
5536
      "enabled_hypervisors": cluster.enabled_hypervisors,
5537
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5538
                        for hypervisor_name in cluster.enabled_hypervisors]),
5539
      "os_hvp": os_hvp,
5540
      "beparams": cluster.beparams,
5541
      "osparams": cluster.osparams,
5542
      "nicparams": cluster.nicparams,
5543
      "ndparams": cluster.ndparams,
5544
      "candidate_pool_size": cluster.candidate_pool_size,
5545
      "master_netdev": cluster.master_netdev,
5546
      "master_netmask": cluster.master_netmask,
5547
      "volume_group_name": cluster.volume_group_name,
5548
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5549
      "file_storage_dir": cluster.file_storage_dir,
5550
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5551
      "maintain_node_health": cluster.maintain_node_health,
5552
      "ctime": cluster.ctime,
5553
      "mtime": cluster.mtime,
5554
      "uuid": cluster.uuid,
5555
      "tags": list(cluster.GetTags()),
5556
      "uid_pool": cluster.uid_pool,
5557
      "default_iallocator": cluster.default_iallocator,
5558
      "reserved_lvs": cluster.reserved_lvs,
5559
      "primary_ip_version": primary_ip_version,
5560
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5561
      "hidden_os": cluster.hidden_os,
5562
      "blacklisted_os": cluster.blacklisted_os,
5563
      }
5564

    
5565
    return result
5566

    
5567

    
5568
class LUClusterConfigQuery(NoHooksLU):
5569
  """Return configuration values.
5570

5571
  """
5572
  REQ_BGL = False
5573
  _FIELDS_DYNAMIC = utils.FieldSet()
5574
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5575
                                  "watcher_pause", "volume_group_name")
5576

    
5577
  def CheckArguments(self):
5578
    _CheckOutputFields(static=self._FIELDS_STATIC,
5579
                       dynamic=self._FIELDS_DYNAMIC,
5580
                       selected=self.op.output_fields)
5581

    
5582
  def ExpandNames(self):
5583
    self.needed_locks = {}
5584

    
5585
  def Exec(self, feedback_fn):
5586
    """Dump a representation of the cluster config to the standard output.
5587

5588
    """
5589
    values = []
5590
    for field in self.op.output_fields:
5591
      if field == "cluster_name":
5592
        entry = self.cfg.GetClusterName()
5593
      elif field == "master_node":
5594
        entry = self.cfg.GetMasterNode()
5595
      elif field == "drain_flag":
5596
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5597
      elif field == "watcher_pause":
5598
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5599
      elif field == "volume_group_name":
5600
        entry = self.cfg.GetVGName()
5601
      else:
5602
        raise errors.ParameterError(field)
5603
      values.append(entry)
5604
    return values
5605

    
5606

    
5607
class LUInstanceActivateDisks(NoHooksLU):
5608
  """Bring up an instance's disks.
5609

5610
  """
5611
  REQ_BGL = False
5612

    
5613
  def ExpandNames(self):
5614
    self._ExpandAndLockInstance()
5615
    self.needed_locks[locking.LEVEL_NODE] = []
5616
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5617

    
5618
  def DeclareLocks(self, level):
5619
    if level == locking.LEVEL_NODE:
5620
      self._LockInstancesNodes()
5621

    
5622
  def CheckPrereq(self):
5623
    """Check prerequisites.
5624

5625
    This checks that the instance is in the cluster.
5626

5627
    """
5628
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5629
    assert self.instance is not None, \
5630
      "Cannot retrieve locked instance %s" % self.op.instance_name
5631
    _CheckNodeOnline(self, self.instance.primary_node)
5632

    
5633
  def Exec(self, feedback_fn):
5634
    """Activate the disks.
5635

5636
    """
5637
    disks_ok, disks_info = \
5638
              _AssembleInstanceDisks(self, self.instance,
5639
                                     ignore_size=self.op.ignore_size)
5640
    if not disks_ok:
5641
      raise errors.OpExecError("Cannot activate block devices")
5642

    
5643
    return disks_info
5644

    
5645

    
5646
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5647
                           ignore_size=False):
5648
  """Prepare the block devices for an instance.
5649

5650
  This sets up the block devices on all nodes.
5651

5652
  @type lu: L{LogicalUnit}
5653
  @param lu: the logical unit on whose behalf we execute
5654
  @type instance: L{objects.Instance}
5655
  @param instance: the instance for whose disks we assemble
5656
  @type disks: list of L{objects.Disk} or None
5657
  @param disks: which disks to assemble (or all, if None)
5658
  @type ignore_secondaries: boolean
5659
  @param ignore_secondaries: if true, errors on secondary nodes
5660
      won't result in an error return from the function
5661
  @type ignore_size: boolean
5662
  @param ignore_size: if true, the current known size of the disk
5663
      will not be used during the disk activation, useful for cases
5664
      when the size is wrong
5665
  @return: False if the operation failed, otherwise a list of
5666
      (host, instance_visible_name, node_visible_name)
5667
      with the mapping from node devices to instance devices
5668

5669
  """
5670
  device_info = []
5671
  disks_ok = True
5672
  iname = instance.name
5673
  disks = _ExpandCheckDisks(instance, disks)
5674

    
5675
  # With the two passes mechanism we try to reduce the window of
5676
  # opportunity for the race condition of switching DRBD to primary
5677
  # before handshaking occured, but we do not eliminate it
5678

    
5679
  # The proper fix would be to wait (with some limits) until the
5680
  # connection has been made and drbd transitions from WFConnection
5681
  # into any other network-connected state (Connected, SyncTarget,
5682
  # SyncSource, etc.)
5683

    
5684
  # 1st pass, assemble on all nodes in secondary mode
5685
  for idx, inst_disk in enumerate(disks):
5686
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5687
      if ignore_size:
5688
        node_disk = node_disk.Copy()
5689
        node_disk.UnsetSize()
5690
      lu.cfg.SetDiskID(node_disk, node)
5691
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5692
      msg = result.fail_msg
5693
      if msg:
5694
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5695
                           " (is_primary=False, pass=1): %s",
5696
                           inst_disk.iv_name, node, msg)
5697
        if not ignore_secondaries:
5698
          disks_ok = False
5699

    
5700
  # FIXME: race condition on drbd migration to primary
5701

    
5702
  # 2nd pass, do only the primary node
5703
  for idx, inst_disk in enumerate(disks):
5704
    dev_path = None
5705

    
5706
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5707
      if node != instance.primary_node:
5708
        continue
5709
      if ignore_size:
5710
        node_disk = node_disk.Copy()
5711
        node_disk.UnsetSize()
5712
      lu.cfg.SetDiskID(node_disk, node)
5713
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5714
      msg = result.fail_msg
5715
      if msg:
5716
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5717
                           " (is_primary=True, pass=2): %s",
5718
                           inst_disk.iv_name, node, msg)
5719
        disks_ok = False
5720
      else:
5721
        dev_path = result.payload
5722

    
5723
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5724

    
5725
  # leave the disks configured for the primary node
5726
  # this is a workaround that would be fixed better by
5727
  # improving the logical/physical id handling
5728
  for disk in disks:
5729
    lu.cfg.SetDiskID(disk, instance.primary_node)
5730

    
5731
  return disks_ok, device_info
5732

    
5733

    
5734
def _StartInstanceDisks(lu, instance, force):
5735
  """Start the disks of an instance.
5736

5737
  """
5738
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5739
                                           ignore_secondaries=force)
5740
  if not disks_ok:
5741
    _ShutdownInstanceDisks(lu, instance)
5742
    if force is not None and not force:
5743
      lu.proc.LogWarning("", hint="If the message above refers to a"
5744
                         " secondary node,"
5745
                         " you can retry the operation using '--force'.")
5746
    raise errors.OpExecError("Disk consistency error")
5747

    
5748

    
5749
class LUInstanceDeactivateDisks(NoHooksLU):
5750
  """Shutdown an instance's disks.
5751

5752
  """
5753
  REQ_BGL = False
5754

    
5755
  def ExpandNames(self):
5756
    self._ExpandAndLockInstance()
5757
    self.needed_locks[locking.LEVEL_NODE] = []
5758
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5759

    
5760
  def DeclareLocks(self, level):
5761
    if level == locking.LEVEL_NODE:
5762
      self._LockInstancesNodes()
5763

    
5764
  def CheckPrereq(self):
5765
    """Check prerequisites.
5766

5767
    This checks that the instance is in the cluster.
5768

5769
    """
5770
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5771
    assert self.instance is not None, \
5772
      "Cannot retrieve locked instance %s" % self.op.instance_name
5773

    
5774
  def Exec(self, feedback_fn):
5775
    """Deactivate the disks
5776

5777
    """
5778
    instance = self.instance
5779
    if self.op.force:
5780
      _ShutdownInstanceDisks(self, instance)
5781
    else:
5782
      _SafeShutdownInstanceDisks(self, instance)
5783

    
5784

    
5785
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5786
  """Shutdown block devices of an instance.
5787

5788
  This function checks if an instance is running, before calling
5789
  _ShutdownInstanceDisks.
5790

5791
  """
5792
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5793
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5794

    
5795

    
5796
def _ExpandCheckDisks(instance, disks):
5797
  """Return the instance disks selected by the disks list
5798

5799
  @type disks: list of L{objects.Disk} or None
5800
  @param disks: selected disks
5801
  @rtype: list of L{objects.Disk}
5802
  @return: selected instance disks to act on
5803

5804
  """
5805
  if disks is None:
5806
    return instance.disks
5807
  else:
5808
    if not set(disks).issubset(instance.disks):
5809
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5810
                                   " target instance")
5811
    return disks
5812

    
5813

    
5814
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5815
  """Shutdown block devices of an instance.
5816

5817
  This does the shutdown on all nodes of the instance.
5818

5819
  If the ignore_primary is false, errors on the primary node are
5820
  ignored.
5821

5822
  """
5823
  all_result = True
5824
  disks = _ExpandCheckDisks(instance, disks)
5825

    
5826
  for disk in disks:
5827
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5828
      lu.cfg.SetDiskID(top_disk, node)
5829
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5830
      msg = result.fail_msg
5831
      if msg:
5832
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5833
                      disk.iv_name, node, msg)
5834
        if ((node == instance.primary_node and not ignore_primary) or
5835
            (node != instance.primary_node and not result.offline)):
5836
          all_result = False
5837
  return all_result
5838

    
5839

    
5840
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5841
  """Checks if a node has enough free memory.
5842

5843
  This function check if a given node has the needed amount of free
5844
  memory. In case the node has less memory or we cannot get the
5845
  information from the node, this function raise an OpPrereqError
5846
  exception.
5847

5848
  @type lu: C{LogicalUnit}
5849
  @param lu: a logical unit from which we get configuration data
5850
  @type node: C{str}
5851
  @param node: the node to check
5852
  @type reason: C{str}
5853
  @param reason: string to use in the error message
5854
  @type requested: C{int}
5855
  @param requested: the amount of memory in MiB to check for
5856
  @type hypervisor_name: C{str}
5857
  @param hypervisor_name: the hypervisor to ask for memory stats
5858
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5859
      we cannot check the node
5860

5861
  """
5862
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5863
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5864
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5865
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5866
  if not isinstance(free_mem, int):
5867
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5868
                               " was '%s'" % (node, free_mem),
5869
                               errors.ECODE_ENVIRON)
5870
  if requested > free_mem:
5871
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5872
                               " needed %s MiB, available %s MiB" %
5873
                               (node, reason, requested, free_mem),
5874
                               errors.ECODE_NORES)
5875

    
5876

    
5877
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5878
  """Checks if nodes have enough free disk space in the all VGs.
5879

5880
  This function check if all given nodes have the needed amount of
5881
  free disk. In case any node has less disk or we cannot get the
5882
  information from the node, this function raise an OpPrereqError
5883
  exception.
5884

5885
  @type lu: C{LogicalUnit}
5886
  @param lu: a logical unit from which we get configuration data
5887
  @type nodenames: C{list}
5888
  @param nodenames: the list of node names to check
5889
  @type req_sizes: C{dict}
5890
  @param req_sizes: the hash of vg and corresponding amount of disk in
5891
      MiB to check for
5892
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5893
      or we cannot check the node
5894

5895
  """
5896
  for vg, req_size in req_sizes.items():
5897
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5898

    
5899

    
5900
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5901
  """Checks if nodes have enough free disk space in the specified VG.
5902

5903
  This function check if all given nodes have the needed amount of
5904
  free disk. In case any node has less disk or we cannot get the
5905
  information from the node, this function raise an OpPrereqError
5906
  exception.
5907

5908
  @type lu: C{LogicalUnit}
5909
  @param lu: a logical unit from which we get configuration data
5910
  @type nodenames: C{list}
5911
  @param nodenames: the list of node names to check
5912
  @type vg: C{str}
5913
  @param vg: the volume group to check
5914
  @type requested: C{int}
5915
  @param requested: the amount of disk in MiB to check for
5916
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5917
      or we cannot check the node
5918

5919
  """
5920
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5921
  for node in nodenames:
5922
    info = nodeinfo[node]
5923
    info.Raise("Cannot get current information from node %s" % node,
5924
               prereq=True, ecode=errors.ECODE_ENVIRON)
5925
    vg_free = info.payload.get("vg_free", None)
5926
    if not isinstance(vg_free, int):
5927
      raise errors.OpPrereqError("Can't compute free disk space on node"
5928
                                 " %s for vg %s, result was '%s'" %
5929
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5930
    if requested > vg_free:
5931
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5932
                                 " vg %s: required %d MiB, available %d MiB" %
5933
                                 (node, vg, requested, vg_free),
5934
                                 errors.ECODE_NORES)
5935

    
5936

    
5937
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
5938
  """Checks if nodes have enough physical CPUs
5939

5940
  This function checks if all given nodes have the needed number of
5941
  physical CPUs. In case any node has less CPUs or we cannot get the
5942
  information from the node, this function raises an OpPrereqError
5943
  exception.
5944

5945
  @type lu: C{LogicalUnit}
5946
  @param lu: a logical unit from which we get configuration data
5947
  @type nodenames: C{list}
5948
  @param nodenames: the list of node names to check
5949
  @type requested: C{int}
5950
  @param requested: the minimum acceptable number of physical CPUs
5951
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
5952
      or we cannot check the node
5953

5954
  """
5955
  nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
5956
  for node in nodenames:
5957
    info = nodeinfo[node]
5958
    info.Raise("Cannot get current information from node %s" % node,
5959
               prereq=True, ecode=errors.ECODE_ENVIRON)
5960
    num_cpus = info.payload.get("cpu_total", None)
5961
    if not isinstance(num_cpus, int):
5962
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
5963
                                 " on node %s, result was '%s'" %
5964
                                 (node, num_cpus), errors.ECODE_ENVIRON)
5965
    if requested > num_cpus:
5966
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
5967
                                 "required" % (node, num_cpus, requested),
5968
                                 errors.ECODE_NORES)
5969

    
5970

    
5971
class LUInstanceStartup(LogicalUnit):
5972
  """Starts an instance.
5973

5974
  """
5975
  HPATH = "instance-start"
5976
  HTYPE = constants.HTYPE_INSTANCE
5977
  REQ_BGL = False
5978

    
5979
  def CheckArguments(self):
5980
    # extra beparams
5981
    if self.op.beparams:
5982
      # fill the beparams dict
5983
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5984

    
5985
  def ExpandNames(self):
5986
    self._ExpandAndLockInstance()
5987

    
5988
  def BuildHooksEnv(self):
5989
    """Build hooks env.
5990

5991
    This runs on master, primary and secondary nodes of the instance.
5992

5993
    """
5994
    env = {
5995
      "FORCE": self.op.force,
5996
      }
5997

    
5998
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5999

    
6000
    return env
6001

    
6002
  def BuildHooksNodes(self):
6003
    """Build hooks nodes.
6004

6005
    """
6006
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6007
    return (nl, nl)
6008

    
6009
  def CheckPrereq(self):
6010
    """Check prerequisites.
6011

6012
    This checks that the instance is in the cluster.
6013

6014
    """
6015
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6016
    assert self.instance is not None, \
6017
      "Cannot retrieve locked instance %s" % self.op.instance_name
6018

    
6019
    # extra hvparams
6020
    if self.op.hvparams:
6021
      # check hypervisor parameter syntax (locally)
6022
      cluster = self.cfg.GetClusterInfo()
6023
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6024
      filled_hvp = cluster.FillHV(instance)
6025
      filled_hvp.update(self.op.hvparams)
6026
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6027
      hv_type.CheckParameterSyntax(filled_hvp)
6028
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6029

    
6030
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6031

    
6032
    if self.primary_offline and self.op.ignore_offline_nodes:
6033
      self.proc.LogWarning("Ignoring offline primary node")
6034

    
6035
      if self.op.hvparams or self.op.beparams:
6036
        self.proc.LogWarning("Overridden parameters are ignored")
6037
    else:
6038
      _CheckNodeOnline(self, instance.primary_node)
6039

    
6040
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6041

    
6042
      # check bridges existence
6043
      _CheckInstanceBridgesExist(self, instance)
6044

    
6045
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6046
                                                instance.name,
6047
                                                instance.hypervisor)
6048
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6049
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6050
      if not remote_info.payload: # not running already
6051
        _CheckNodeFreeMemory(self, instance.primary_node,
6052
                             "starting instance %s" % instance.name,
6053
                             bep[constants.BE_MEMORY], instance.hypervisor)
6054

    
6055
  def Exec(self, feedback_fn):
6056
    """Start the instance.
6057

6058
    """
6059
    instance = self.instance
6060
    force = self.op.force
6061

    
6062
    if not self.op.no_remember:
6063
      self.cfg.MarkInstanceUp(instance.name)
6064

    
6065
    if self.primary_offline:
6066
      assert self.op.ignore_offline_nodes
6067
      self.proc.LogInfo("Primary node offline, marked instance as started")
6068
    else:
6069
      node_current = instance.primary_node
6070

    
6071
      _StartInstanceDisks(self, instance, force)
6072

    
6073
      result = self.rpc.call_instance_start(node_current, instance,
6074
                                            self.op.hvparams, self.op.beparams,
6075
                                            self.op.startup_paused)
6076
      msg = result.fail_msg
6077
      if msg:
6078
        _ShutdownInstanceDisks(self, instance)
6079
        raise errors.OpExecError("Could not start instance: %s" % msg)
6080

    
6081

    
6082
class LUInstanceReboot(LogicalUnit):
6083
  """Reboot an instance.
6084

6085
  """
6086
  HPATH = "instance-reboot"
6087
  HTYPE = constants.HTYPE_INSTANCE
6088
  REQ_BGL = False
6089

    
6090
  def ExpandNames(self):
6091
    self._ExpandAndLockInstance()
6092

    
6093
  def BuildHooksEnv(self):
6094
    """Build hooks env.
6095

6096
    This runs on master, primary and secondary nodes of the instance.
6097

6098
    """
6099
    env = {
6100
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6101
      "REBOOT_TYPE": self.op.reboot_type,
6102
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6103
      }
6104

    
6105
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6106

    
6107
    return env
6108

    
6109
  def BuildHooksNodes(self):
6110
    """Build hooks nodes.
6111

6112
    """
6113
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6114
    return (nl, nl)
6115

    
6116
  def CheckPrereq(self):
6117
    """Check prerequisites.
6118

6119
    This checks that the instance is in the cluster.
6120

6121
    """
6122
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6123
    assert self.instance is not None, \
6124
      "Cannot retrieve locked instance %s" % self.op.instance_name
6125

    
6126
    _CheckNodeOnline(self, instance.primary_node)
6127

    
6128
    # check bridges existence
6129
    _CheckInstanceBridgesExist(self, instance)
6130

    
6131
  def Exec(self, feedback_fn):
6132
    """Reboot the instance.
6133

6134
    """
6135
    instance = self.instance
6136
    ignore_secondaries = self.op.ignore_secondaries
6137
    reboot_type = self.op.reboot_type
6138

    
6139
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6140
                                              instance.name,
6141
                                              instance.hypervisor)
6142
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6143
    instance_running = bool(remote_info.payload)
6144

    
6145
    node_current = instance.primary_node
6146

    
6147
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6148
                                            constants.INSTANCE_REBOOT_HARD]:
6149
      for disk in instance.disks:
6150
        self.cfg.SetDiskID(disk, node_current)
6151
      result = self.rpc.call_instance_reboot(node_current, instance,
6152
                                             reboot_type,
6153
                                             self.op.shutdown_timeout)
6154
      result.Raise("Could not reboot instance")
6155
    else:
6156
      if instance_running:
6157
        result = self.rpc.call_instance_shutdown(node_current, instance,
6158
                                                 self.op.shutdown_timeout)
6159
        result.Raise("Could not shutdown instance for full reboot")
6160
        _ShutdownInstanceDisks(self, instance)
6161
      else:
6162
        self.LogInfo("Instance %s was already stopped, starting now",
6163
                     instance.name)
6164
      _StartInstanceDisks(self, instance, ignore_secondaries)
6165
      result = self.rpc.call_instance_start(node_current, instance,
6166
                                            None, None, False)
6167
      msg = result.fail_msg
6168
      if msg:
6169
        _ShutdownInstanceDisks(self, instance)
6170
        raise errors.OpExecError("Could not start instance for"
6171
                                 " full reboot: %s" % msg)
6172

    
6173
    self.cfg.MarkInstanceUp(instance.name)
6174

    
6175

    
6176
class LUInstanceShutdown(LogicalUnit):
6177
  """Shutdown an instance.
6178

6179
  """
6180
  HPATH = "instance-stop"
6181
  HTYPE = constants.HTYPE_INSTANCE
6182
  REQ_BGL = False
6183

    
6184
  def ExpandNames(self):
6185
    self._ExpandAndLockInstance()
6186

    
6187
  def BuildHooksEnv(self):
6188
    """Build hooks env.
6189

6190
    This runs on master, primary and secondary nodes of the instance.
6191

6192
    """
6193
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6194
    env["TIMEOUT"] = self.op.timeout
6195
    return env
6196

    
6197
  def BuildHooksNodes(self):
6198
    """Build hooks nodes.
6199

6200
    """
6201
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6202
    return (nl, nl)
6203

    
6204
  def CheckPrereq(self):
6205
    """Check prerequisites.
6206

6207
    This checks that the instance is in the cluster.
6208

6209
    """
6210
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6211
    assert self.instance is not None, \
6212
      "Cannot retrieve locked instance %s" % self.op.instance_name
6213

    
6214
    self.primary_offline = \
6215
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6216

    
6217
    if self.primary_offline and self.op.ignore_offline_nodes:
6218
      self.proc.LogWarning("Ignoring offline primary node")
6219
    else:
6220
      _CheckNodeOnline(self, self.instance.primary_node)
6221

    
6222
  def Exec(self, feedback_fn):
6223
    """Shutdown the instance.
6224

6225
    """
6226
    instance = self.instance
6227
    node_current = instance.primary_node
6228
    timeout = self.op.timeout
6229

    
6230
    if not self.op.no_remember:
6231
      self.cfg.MarkInstanceDown(instance.name)
6232

    
6233
    if self.primary_offline:
6234
      assert self.op.ignore_offline_nodes
6235
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6236
    else:
6237
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6238
      msg = result.fail_msg
6239
      if msg:
6240
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6241

    
6242
      _ShutdownInstanceDisks(self, instance)
6243

    
6244

    
6245
class LUInstanceReinstall(LogicalUnit):
6246
  """Reinstall an instance.
6247

6248
  """
6249
  HPATH = "instance-reinstall"
6250
  HTYPE = constants.HTYPE_INSTANCE
6251
  REQ_BGL = False
6252

    
6253
  def ExpandNames(self):
6254
    self._ExpandAndLockInstance()
6255

    
6256
  def BuildHooksEnv(self):
6257
    """Build hooks env.
6258

6259
    This runs on master, primary and secondary nodes of the instance.
6260

6261
    """
6262
    return _BuildInstanceHookEnvByObject(self, self.instance)
6263

    
6264
  def BuildHooksNodes(self):
6265
    """Build hooks nodes.
6266

6267
    """
6268
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6269
    return (nl, nl)
6270

    
6271
  def CheckPrereq(self):
6272
    """Check prerequisites.
6273

6274
    This checks that the instance is in the cluster and is not running.
6275

6276
    """
6277
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6278
    assert instance is not None, \
6279
      "Cannot retrieve locked instance %s" % self.op.instance_name
6280
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6281
                     " offline, cannot reinstall")
6282
    for node in instance.secondary_nodes:
6283
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6284
                       " cannot reinstall")
6285

    
6286
    if instance.disk_template == constants.DT_DISKLESS:
6287
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6288
                                 self.op.instance_name,
6289
                                 errors.ECODE_INVAL)
6290
    _CheckInstanceDown(self, instance, "cannot reinstall")
6291

    
6292
    if self.op.os_type is not None:
6293
      # OS verification
6294
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6295
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6296
      instance_os = self.op.os_type
6297
    else:
6298
      instance_os = instance.os
6299

    
6300
    nodelist = list(instance.all_nodes)
6301

    
6302
    if self.op.osparams:
6303
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6304
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6305
      self.os_inst = i_osdict # the new dict (without defaults)
6306
    else:
6307
      self.os_inst = None
6308

    
6309
    self.instance = instance
6310

    
6311
  def Exec(self, feedback_fn):
6312
    """Reinstall the instance.
6313

6314
    """
6315
    inst = self.instance
6316

    
6317
    if self.op.os_type is not None:
6318
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6319
      inst.os = self.op.os_type
6320
      # Write to configuration
6321
      self.cfg.Update(inst, feedback_fn)
6322

    
6323
    _StartInstanceDisks(self, inst, None)
6324
    try:
6325
      feedback_fn("Running the instance OS create scripts...")
6326
      # FIXME: pass debug option from opcode to backend
6327
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6328
                                             self.op.debug_level,
6329
                                             osparams=self.os_inst)
6330
      result.Raise("Could not install OS for instance %s on node %s" %
6331
                   (inst.name, inst.primary_node))
6332
    finally:
6333
      _ShutdownInstanceDisks(self, inst)
6334

    
6335

    
6336
class LUInstanceRecreateDisks(LogicalUnit):
6337
  """Recreate an instance's missing disks.
6338

6339
  """
6340
  HPATH = "instance-recreate-disks"
6341
  HTYPE = constants.HTYPE_INSTANCE
6342
  REQ_BGL = False
6343

    
6344
  def CheckArguments(self):
6345
    # normalise the disk list
6346
    self.op.disks = sorted(frozenset(self.op.disks))
6347

    
6348
  def ExpandNames(self):
6349
    self._ExpandAndLockInstance()
6350
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6351
    if self.op.nodes:
6352
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6353
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6354
    else:
6355
      self.needed_locks[locking.LEVEL_NODE] = []
6356

    
6357
  def DeclareLocks(self, level):
6358
    if level == locking.LEVEL_NODE:
6359
      # if we replace the nodes, we only need to lock the old primary,
6360
      # otherwise we need to lock all nodes for disk re-creation
6361
      primary_only = bool(self.op.nodes)
6362
      self._LockInstancesNodes(primary_only=primary_only)
6363

    
6364
  def BuildHooksEnv(self):
6365
    """Build hooks env.
6366

6367
    This runs on master, primary and secondary nodes of the instance.
6368

6369
    """
6370
    return _BuildInstanceHookEnvByObject(self, self.instance)
6371

    
6372
  def BuildHooksNodes(self):
6373
    """Build hooks nodes.
6374

6375
    """
6376
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6377
    return (nl, nl)
6378

    
6379
  def CheckPrereq(self):
6380
    """Check prerequisites.
6381

6382
    This checks that the instance is in the cluster and is not running.
6383

6384
    """
6385
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6386
    assert instance is not None, \
6387
      "Cannot retrieve locked instance %s" % self.op.instance_name
6388
    if self.op.nodes:
6389
      if len(self.op.nodes) != len(instance.all_nodes):
6390
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6391
                                   " %d replacement nodes were specified" %
6392
                                   (instance.name, len(instance.all_nodes),
6393
                                    len(self.op.nodes)),
6394
                                   errors.ECODE_INVAL)
6395
      assert instance.disk_template != constants.DT_DRBD8 or \
6396
          len(self.op.nodes) == 2
6397
      assert instance.disk_template != constants.DT_PLAIN or \
6398
          len(self.op.nodes) == 1
6399
      primary_node = self.op.nodes[0]
6400
    else:
6401
      primary_node = instance.primary_node
6402
    _CheckNodeOnline(self, primary_node)
6403

    
6404
    if instance.disk_template == constants.DT_DISKLESS:
6405
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6406
                                 self.op.instance_name, errors.ECODE_INVAL)
6407
    # if we replace nodes *and* the old primary is offline, we don't
6408
    # check
6409
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6410
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6411
    if not (self.op.nodes and old_pnode.offline):
6412
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6413

    
6414
    if not self.op.disks:
6415
      self.op.disks = range(len(instance.disks))
6416
    else:
6417
      for idx in self.op.disks:
6418
        if idx >= len(instance.disks):
6419
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6420
                                     errors.ECODE_INVAL)
6421
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6422
      raise errors.OpPrereqError("Can't recreate disks partially and"
6423
                                 " change the nodes at the same time",
6424
                                 errors.ECODE_INVAL)
6425
    self.instance = instance
6426

    
6427
  def Exec(self, feedback_fn):
6428
    """Recreate the disks.
6429

6430
    """
6431
    instance = self.instance
6432

    
6433
    to_skip = []
6434
    mods = [] # keeps track of needed logical_id changes
6435

    
6436
    for idx, disk in enumerate(instance.disks):
6437
      if idx not in self.op.disks: # disk idx has not been passed in
6438
        to_skip.append(idx)
6439
        continue
6440
      # update secondaries for disks, if needed
6441
      if self.op.nodes:
6442
        if disk.dev_type == constants.LD_DRBD8:
6443
          # need to update the nodes and minors
6444
          assert len(self.op.nodes) == 2
6445
          assert len(disk.logical_id) == 6 # otherwise disk internals
6446
                                           # have changed
6447
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6448
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6449
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6450
                    new_minors[0], new_minors[1], old_secret)
6451
          assert len(disk.logical_id) == len(new_id)
6452
          mods.append((idx, new_id))
6453

    
6454
    # now that we have passed all asserts above, we can apply the mods
6455
    # in a single run (to avoid partial changes)
6456
    for idx, new_id in mods:
6457
      instance.disks[idx].logical_id = new_id
6458

    
6459
    # change primary node, if needed
6460
    if self.op.nodes:
6461
      instance.primary_node = self.op.nodes[0]
6462
      self.LogWarning("Changing the instance's nodes, you will have to"
6463
                      " remove any disks left on the older nodes manually")
6464

    
6465
    if self.op.nodes:
6466
      self.cfg.Update(instance, feedback_fn)
6467

    
6468
    _CreateDisks(self, instance, to_skip=to_skip)
6469

    
6470

    
6471
class LUInstanceRename(LogicalUnit):
6472
  """Rename an instance.
6473

6474
  """
6475
  HPATH = "instance-rename"
6476
  HTYPE = constants.HTYPE_INSTANCE
6477

    
6478
  def CheckArguments(self):
6479
    """Check arguments.
6480

6481
    """
6482
    if self.op.ip_check and not self.op.name_check:
6483
      # TODO: make the ip check more flexible and not depend on the name check
6484
      raise errors.OpPrereqError("IP address check requires a name check",
6485
                                 errors.ECODE_INVAL)
6486

    
6487
  def BuildHooksEnv(self):
6488
    """Build hooks env.
6489

6490
    This runs on master, primary and secondary nodes of the instance.
6491

6492
    """
6493
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6494
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6495
    return env
6496

    
6497
  def BuildHooksNodes(self):
6498
    """Build hooks nodes.
6499

6500
    """
6501
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6502
    return (nl, nl)
6503

    
6504
  def CheckPrereq(self):
6505
    """Check prerequisites.
6506

6507
    This checks that the instance is in the cluster and is not running.
6508

6509
    """
6510
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6511
                                                self.op.instance_name)
6512
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6513
    assert instance is not None
6514
    _CheckNodeOnline(self, instance.primary_node)
6515
    _CheckInstanceDown(self, instance, "cannot rename")
6516
    self.instance = instance
6517

    
6518
    new_name = self.op.new_name
6519
    if self.op.name_check:
6520
      hostname = netutils.GetHostname(name=new_name)
6521
      if hostname != new_name:
6522
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6523
                     hostname.name)
6524
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6525
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6526
                                    " same as given hostname '%s'") %
6527
                                    (hostname.name, self.op.new_name),
6528
                                    errors.ECODE_INVAL)
6529
      new_name = self.op.new_name = hostname.name
6530
      if (self.op.ip_check and
6531
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6532
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6533
                                   (hostname.ip, new_name),
6534
                                   errors.ECODE_NOTUNIQUE)
6535

    
6536
    instance_list = self.cfg.GetInstanceList()
6537
    if new_name in instance_list and new_name != instance.name:
6538
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6539
                                 new_name, errors.ECODE_EXISTS)
6540

    
6541
  def Exec(self, feedback_fn):
6542
    """Rename the instance.
6543

6544
    """
6545
    inst = self.instance
6546
    old_name = inst.name
6547

    
6548
    rename_file_storage = False
6549
    if (inst.disk_template in constants.DTS_FILEBASED and
6550
        self.op.new_name != inst.name):
6551
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6552
      rename_file_storage = True
6553

    
6554
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6555
    # Change the instance lock. This is definitely safe while we hold the BGL.
6556
    # Otherwise the new lock would have to be added in acquired mode.
6557
    assert self.REQ_BGL
6558
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6559
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6560

    
6561
    # re-read the instance from the configuration after rename
6562
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6563

    
6564
    if rename_file_storage:
6565
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6566
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6567
                                                     old_file_storage_dir,
6568
                                                     new_file_storage_dir)
6569
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6570
                   " (but the instance has been renamed in Ganeti)" %
6571
                   (inst.primary_node, old_file_storage_dir,
6572
                    new_file_storage_dir))
6573

    
6574
    _StartInstanceDisks(self, inst, None)
6575
    try:
6576
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6577
                                                 old_name, self.op.debug_level)
6578
      msg = result.fail_msg
6579
      if msg:
6580
        msg = ("Could not run OS rename script for instance %s on node %s"
6581
               " (but the instance has been renamed in Ganeti): %s" %
6582
               (inst.name, inst.primary_node, msg))
6583
        self.proc.LogWarning(msg)
6584
    finally:
6585
      _ShutdownInstanceDisks(self, inst)
6586

    
6587
    return inst.name
6588

    
6589

    
6590
class LUInstanceRemove(LogicalUnit):
6591
  """Remove an instance.
6592

6593
  """
6594
  HPATH = "instance-remove"
6595
  HTYPE = constants.HTYPE_INSTANCE
6596
  REQ_BGL = False
6597

    
6598
  def ExpandNames(self):
6599
    self._ExpandAndLockInstance()
6600
    self.needed_locks[locking.LEVEL_NODE] = []
6601
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6602

    
6603
  def DeclareLocks(self, level):
6604
    if level == locking.LEVEL_NODE:
6605
      self._LockInstancesNodes()
6606

    
6607
  def BuildHooksEnv(self):
6608
    """Build hooks env.
6609

6610
    This runs on master, primary and secondary nodes of the instance.
6611

6612
    """
6613
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6614
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6615
    return env
6616

    
6617
  def BuildHooksNodes(self):
6618
    """Build hooks nodes.
6619

6620
    """
6621
    nl = [self.cfg.GetMasterNode()]
6622
    nl_post = list(self.instance.all_nodes) + nl
6623
    return (nl, nl_post)
6624

    
6625
  def CheckPrereq(self):
6626
    """Check prerequisites.
6627

6628
    This checks that the instance is in the cluster.
6629

6630
    """
6631
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6632
    assert self.instance is not None, \
6633
      "Cannot retrieve locked instance %s" % self.op.instance_name
6634

    
6635
  def Exec(self, feedback_fn):
6636
    """Remove the instance.
6637

6638
    """
6639
    instance = self.instance
6640
    logging.info("Shutting down instance %s on node %s",
6641
                 instance.name, instance.primary_node)
6642

    
6643
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6644
                                             self.op.shutdown_timeout)
6645
    msg = result.fail_msg
6646
    if msg:
6647
      if self.op.ignore_failures:
6648
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6649
      else:
6650
        raise errors.OpExecError("Could not shutdown instance %s on"
6651
                                 " node %s: %s" %
6652
                                 (instance.name, instance.primary_node, msg))
6653

    
6654
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6655

    
6656

    
6657
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6658
  """Utility function to remove an instance.
6659

6660
  """
6661
  logging.info("Removing block devices for instance %s", instance.name)
6662

    
6663
  if not _RemoveDisks(lu, instance):
6664
    if not ignore_failures:
6665
      raise errors.OpExecError("Can't remove instance's disks")
6666
    feedback_fn("Warning: can't remove instance's disks")
6667

    
6668
  logging.info("Removing instance %s out of cluster config", instance.name)
6669

    
6670
  lu.cfg.RemoveInstance(instance.name)
6671

    
6672
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6673
    "Instance lock removal conflict"
6674

    
6675
  # Remove lock for the instance
6676
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6677

    
6678

    
6679
class LUInstanceQuery(NoHooksLU):
6680
  """Logical unit for querying instances.
6681

6682
  """
6683
  # pylint: disable=W0142
6684
  REQ_BGL = False
6685

    
6686
  def CheckArguments(self):
6687
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6688
                             self.op.output_fields, self.op.use_locking)
6689

    
6690
  def ExpandNames(self):
6691
    self.iq.ExpandNames(self)
6692

    
6693
  def DeclareLocks(self, level):
6694
    self.iq.DeclareLocks(self, level)
6695

    
6696
  def Exec(self, feedback_fn):
6697
    return self.iq.OldStyleQuery(self)
6698

    
6699

    
6700
class LUInstanceFailover(LogicalUnit):
6701
  """Failover an instance.
6702

6703
  """
6704
  HPATH = "instance-failover"
6705
  HTYPE = constants.HTYPE_INSTANCE
6706
  REQ_BGL = False
6707

    
6708
  def CheckArguments(self):
6709
    """Check the arguments.
6710

6711
    """
6712
    self.iallocator = getattr(self.op, "iallocator", None)
6713
    self.target_node = getattr(self.op, "target_node", None)
6714

    
6715
  def ExpandNames(self):
6716
    self._ExpandAndLockInstance()
6717

    
6718
    if self.op.target_node is not None:
6719
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6720

    
6721
    self.needed_locks[locking.LEVEL_NODE] = []
6722
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6723

    
6724
    ignore_consistency = self.op.ignore_consistency
6725
    shutdown_timeout = self.op.shutdown_timeout
6726
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6727
                                       cleanup=False,
6728
                                       failover=True,
6729
                                       ignore_consistency=ignore_consistency,
6730
                                       shutdown_timeout=shutdown_timeout)
6731
    self.tasklets = [self._migrater]
6732

    
6733
  def DeclareLocks(self, level):
6734
    if level == locking.LEVEL_NODE:
6735
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6736
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6737
        if self.op.target_node is None:
6738
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6739
        else:
6740
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6741
                                                   self.op.target_node]
6742
        del self.recalculate_locks[locking.LEVEL_NODE]
6743
      else:
6744
        self._LockInstancesNodes()
6745

    
6746
  def BuildHooksEnv(self):
6747
    """Build hooks env.
6748

6749
    This runs on master, primary and secondary nodes of the instance.
6750

6751
    """
6752
    instance = self._migrater.instance
6753
    source_node = instance.primary_node
6754
    target_node = self.op.target_node
6755
    env = {
6756
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6757
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6758
      "OLD_PRIMARY": source_node,
6759
      "NEW_PRIMARY": target_node,
6760
      }
6761

    
6762
    if instance.disk_template in constants.DTS_INT_MIRROR:
6763
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6764
      env["NEW_SECONDARY"] = source_node
6765
    else:
6766
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6767

    
6768
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6769

    
6770
    return env
6771

    
6772
  def BuildHooksNodes(self):
6773
    """Build hooks nodes.
6774

6775
    """
6776
    instance = self._migrater.instance
6777
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6778
    return (nl, nl + [instance.primary_node])
6779

    
6780

    
6781
class LUInstanceMigrate(LogicalUnit):
6782
  """Migrate an instance.
6783

6784
  This is migration without shutting down, compared to the failover,
6785
  which is done with shutdown.
6786

6787
  """
6788
  HPATH = "instance-migrate"
6789
  HTYPE = constants.HTYPE_INSTANCE
6790
  REQ_BGL = False
6791

    
6792
  def ExpandNames(self):
6793
    self._ExpandAndLockInstance()
6794

    
6795
    if self.op.target_node is not None:
6796
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6797

    
6798
    self.needed_locks[locking.LEVEL_NODE] = []
6799
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6800

    
6801
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6802
                                       cleanup=self.op.cleanup,
6803
                                       failover=False,
6804
                                       fallback=self.op.allow_failover)
6805
    self.tasklets = [self._migrater]
6806

    
6807
  def DeclareLocks(self, level):
6808
    if level == locking.LEVEL_NODE:
6809
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6810
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6811
        if self.op.target_node is None:
6812
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6813
        else:
6814
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6815
                                                   self.op.target_node]
6816
        del self.recalculate_locks[locking.LEVEL_NODE]
6817
      else:
6818
        self._LockInstancesNodes()
6819

    
6820
  def BuildHooksEnv(self):
6821
    """Build hooks env.
6822

6823
    This runs on master, primary and secondary nodes of the instance.
6824

6825
    """
6826
    instance = self._migrater.instance
6827
    source_node = instance.primary_node
6828
    target_node = self.op.target_node
6829
    env = _BuildInstanceHookEnvByObject(self, instance)
6830
    env.update({
6831
      "MIGRATE_LIVE": self._migrater.live,
6832
      "MIGRATE_CLEANUP": self.op.cleanup,
6833
      "OLD_PRIMARY": source_node,
6834
      "NEW_PRIMARY": target_node,
6835
      })
6836

    
6837
    if instance.disk_template in constants.DTS_INT_MIRROR:
6838
      env["OLD_SECONDARY"] = target_node
6839
      env["NEW_SECONDARY"] = source_node
6840
    else:
6841
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6842

    
6843
    return env
6844

    
6845
  def BuildHooksNodes(self):
6846
    """Build hooks nodes.
6847

6848
    """
6849
    instance = self._migrater.instance
6850
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6851
    return (nl, nl + [instance.primary_node])
6852

    
6853

    
6854
class LUInstanceMove(LogicalUnit):
6855
  """Move an instance by data-copying.
6856

6857
  """
6858
  HPATH = "instance-move"
6859
  HTYPE = constants.HTYPE_INSTANCE
6860
  REQ_BGL = False
6861

    
6862
  def ExpandNames(self):
6863
    self._ExpandAndLockInstance()
6864
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6865
    self.op.target_node = target_node
6866
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6867
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6868

    
6869
  def DeclareLocks(self, level):
6870
    if level == locking.LEVEL_NODE:
6871
      self._LockInstancesNodes(primary_only=True)
6872

    
6873
  def BuildHooksEnv(self):
6874
    """Build hooks env.
6875

6876
    This runs on master, primary and secondary nodes of the instance.
6877

6878
    """
6879
    env = {
6880
      "TARGET_NODE": self.op.target_node,
6881
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6882
      }
6883
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6884
    return env
6885

    
6886
  def BuildHooksNodes(self):
6887
    """Build hooks nodes.
6888

6889
    """
6890
    nl = [
6891
      self.cfg.GetMasterNode(),
6892
      self.instance.primary_node,
6893
      self.op.target_node,
6894
      ]
6895
    return (nl, nl)
6896

    
6897
  def CheckPrereq(self):
6898
    """Check prerequisites.
6899

6900
    This checks that the instance is in the cluster.
6901

6902
    """
6903
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6904
    assert self.instance is not None, \
6905
      "Cannot retrieve locked instance %s" % self.op.instance_name
6906

    
6907
    node = self.cfg.GetNodeInfo(self.op.target_node)
6908
    assert node is not None, \
6909
      "Cannot retrieve locked node %s" % self.op.target_node
6910

    
6911
    self.target_node = target_node = node.name
6912

    
6913
    if target_node == instance.primary_node:
6914
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6915
                                 (instance.name, target_node),
6916
                                 errors.ECODE_STATE)
6917

    
6918
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6919

    
6920
    for idx, dsk in enumerate(instance.disks):
6921
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6922
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6923
                                   " cannot copy" % idx, errors.ECODE_STATE)
6924

    
6925
    _CheckNodeOnline(self, target_node)
6926
    _CheckNodeNotDrained(self, target_node)
6927
    _CheckNodeVmCapable(self, target_node)
6928

    
6929
    if instance.admin_up:
6930
      # check memory requirements on the secondary node
6931
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6932
                           instance.name, bep[constants.BE_MEMORY],
6933
                           instance.hypervisor)
6934
    else:
6935
      self.LogInfo("Not checking memory on the secondary node as"
6936
                   " instance will not be started")
6937

    
6938
    # check bridge existance
6939
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6940

    
6941
  def Exec(self, feedback_fn):
6942
    """Move an instance.
6943

6944
    The move is done by shutting it down on its present node, copying
6945
    the data over (slow) and starting it on the new node.
6946

6947
    """
6948
    instance = self.instance
6949

    
6950
    source_node = instance.primary_node
6951
    target_node = self.target_node
6952

    
6953
    self.LogInfo("Shutting down instance %s on source node %s",
6954
                 instance.name, source_node)
6955

    
6956
    result = self.rpc.call_instance_shutdown(source_node, instance,
6957
                                             self.op.shutdown_timeout)
6958
    msg = result.fail_msg
6959
    if msg:
6960
      if self.op.ignore_consistency:
6961
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6962
                             " Proceeding anyway. Please make sure node"
6963
                             " %s is down. Error details: %s",
6964
                             instance.name, source_node, source_node, msg)
6965
      else:
6966
        raise errors.OpExecError("Could not shutdown instance %s on"
6967
                                 " node %s: %s" %
6968
                                 (instance.name, source_node, msg))
6969

    
6970
    # create the target disks
6971
    try:
6972
      _CreateDisks(self, instance, target_node=target_node)
6973
    except errors.OpExecError:
6974
      self.LogWarning("Device creation failed, reverting...")
6975
      try:
6976
        _RemoveDisks(self, instance, target_node=target_node)
6977
      finally:
6978
        self.cfg.ReleaseDRBDMinors(instance.name)
6979
        raise
6980

    
6981
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6982

    
6983
    errs = []
6984
    # activate, get path, copy the data over
6985
    for idx, disk in enumerate(instance.disks):
6986
      self.LogInfo("Copying data for disk %d", idx)
6987
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6988
                                               instance.name, True, idx)
6989
      if result.fail_msg:
6990
        self.LogWarning("Can't assemble newly created disk %d: %s",
6991
                        idx, result.fail_msg)
6992
        errs.append(result.fail_msg)
6993
        break
6994
      dev_path = result.payload
6995
      result = self.rpc.call_blockdev_export(source_node, disk,
6996
                                             target_node, dev_path,
6997
                                             cluster_name)
6998
      if result.fail_msg:
6999
        self.LogWarning("Can't copy data over for disk %d: %s",
7000
                        idx, result.fail_msg)
7001
        errs.append(result.fail_msg)
7002
        break
7003

    
7004
    if errs:
7005
      self.LogWarning("Some disks failed to copy, aborting")
7006
      try:
7007
        _RemoveDisks(self, instance, target_node=target_node)
7008
      finally:
7009
        self.cfg.ReleaseDRBDMinors(instance.name)
7010
        raise errors.OpExecError("Errors during disk copy: %s" %
7011
                                 (",".join(errs),))
7012

    
7013
    instance.primary_node = target_node
7014
    self.cfg.Update(instance, feedback_fn)
7015

    
7016
    self.LogInfo("Removing the disks on the original node")
7017
    _RemoveDisks(self, instance, target_node=source_node)
7018

    
7019
    # Only start the instance if it's marked as up
7020
    if instance.admin_up:
7021
      self.LogInfo("Starting instance %s on node %s",
7022
                   instance.name, target_node)
7023

    
7024
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7025
                                           ignore_secondaries=True)
7026
      if not disks_ok:
7027
        _ShutdownInstanceDisks(self, instance)
7028
        raise errors.OpExecError("Can't activate the instance's disks")
7029

    
7030
      result = self.rpc.call_instance_start(target_node, instance,
7031
                                            None, None, False)
7032
      msg = result.fail_msg
7033
      if msg:
7034
        _ShutdownInstanceDisks(self, instance)
7035
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7036
                                 (instance.name, target_node, msg))
7037

    
7038

    
7039
class LUNodeMigrate(LogicalUnit):
7040
  """Migrate all instances from a node.
7041

7042
  """
7043
  HPATH = "node-migrate"
7044
  HTYPE = constants.HTYPE_NODE
7045
  REQ_BGL = False
7046

    
7047
  def CheckArguments(self):
7048
    pass
7049

    
7050
  def ExpandNames(self):
7051
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7052

    
7053
    self.share_locks = _ShareAll()
7054
    self.needed_locks = {
7055
      locking.LEVEL_NODE: [self.op.node_name],
7056
      }
7057

    
7058
  def BuildHooksEnv(self):
7059
    """Build hooks env.
7060

7061
    This runs on the master, the primary and all the secondaries.
7062

7063
    """
7064
    return {
7065
      "NODE_NAME": self.op.node_name,
7066
      }
7067

    
7068
  def BuildHooksNodes(self):
7069
    """Build hooks nodes.
7070

7071
    """
7072
    nl = [self.cfg.GetMasterNode()]
7073
    return (nl, nl)
7074

    
7075
  def CheckPrereq(self):
7076
    pass
7077

    
7078
  def Exec(self, feedback_fn):
7079
    # Prepare jobs for migration instances
7080
    jobs = [
7081
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7082
                                 mode=self.op.mode,
7083
                                 live=self.op.live,
7084
                                 iallocator=self.op.iallocator,
7085
                                 target_node=self.op.target_node)]
7086
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7087
      ]
7088

    
7089
    # TODO: Run iallocator in this opcode and pass correct placement options to
7090
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7091
    # running the iallocator and the actual migration, a good consistency model
7092
    # will have to be found.
7093

    
7094
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7095
            frozenset([self.op.node_name]))
7096

    
7097
    return ResultWithJobs(jobs)
7098

    
7099

    
7100
class TLMigrateInstance(Tasklet):
7101
  """Tasklet class for instance migration.
7102

7103
  @type live: boolean
7104
  @ivar live: whether the migration will be done live or non-live;
7105
      this variable is initalized only after CheckPrereq has run
7106
  @type cleanup: boolean
7107
  @ivar cleanup: Wheater we cleanup from a failed migration
7108
  @type iallocator: string
7109
  @ivar iallocator: The iallocator used to determine target_node
7110
  @type target_node: string
7111
  @ivar target_node: If given, the target_node to reallocate the instance to
7112
  @type failover: boolean
7113
  @ivar failover: Whether operation results in failover or migration
7114
  @type fallback: boolean
7115
  @ivar fallback: Whether fallback to failover is allowed if migration not
7116
                  possible
7117
  @type ignore_consistency: boolean
7118
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7119
                            and target node
7120
  @type shutdown_timeout: int
7121
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7122

7123
  """
7124

    
7125
  # Constants
7126
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7127
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7128

    
7129
  def __init__(self, lu, instance_name, cleanup=False,
7130
               failover=False, fallback=False,
7131
               ignore_consistency=False,
7132
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7133
    """Initializes this class.
7134

7135
    """
7136
    Tasklet.__init__(self, lu)
7137

    
7138
    # Parameters
7139
    self.instance_name = instance_name
7140
    self.cleanup = cleanup
7141
    self.live = False # will be overridden later
7142
    self.failover = failover
7143
    self.fallback = fallback
7144
    self.ignore_consistency = ignore_consistency
7145
    self.shutdown_timeout = shutdown_timeout
7146

    
7147
  def CheckPrereq(self):
7148
    """Check prerequisites.
7149

7150
    This checks that the instance is in the cluster.
7151

7152
    """
7153
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7154
    instance = self.cfg.GetInstanceInfo(instance_name)
7155
    assert instance is not None
7156
    self.instance = instance
7157

    
7158
    if (not self.cleanup and not instance.admin_up and not self.failover and
7159
        self.fallback):
7160
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7161
                      " to failover")
7162
      self.failover = True
7163

    
7164
    if instance.disk_template not in constants.DTS_MIRRORED:
7165
      if self.failover:
7166
        text = "failovers"
7167
      else:
7168
        text = "migrations"
7169
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7170
                                 " %s" % (instance.disk_template, text),
7171
                                 errors.ECODE_STATE)
7172

    
7173
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7174
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7175

    
7176
      if self.lu.op.iallocator:
7177
        self._RunAllocator()
7178
      else:
7179
        # We set set self.target_node as it is required by
7180
        # BuildHooksEnv
7181
        self.target_node = self.lu.op.target_node
7182

    
7183
      # self.target_node is already populated, either directly or by the
7184
      # iallocator run
7185
      target_node = self.target_node
7186
      if self.target_node == instance.primary_node:
7187
        raise errors.OpPrereqError("Cannot migrate instance %s"
7188
                                   " to its primary (%s)" %
7189
                                   (instance.name, instance.primary_node))
7190

    
7191
      if len(self.lu.tasklets) == 1:
7192
        # It is safe to release locks only when we're the only tasklet
7193
        # in the LU
7194
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7195
                      keep=[instance.primary_node, self.target_node])
7196

    
7197
    else:
7198
      secondary_nodes = instance.secondary_nodes
7199
      if not secondary_nodes:
7200
        raise errors.ConfigurationError("No secondary node but using"
7201
                                        " %s disk template" %
7202
                                        instance.disk_template)
7203
      target_node = secondary_nodes[0]
7204
      if self.lu.op.iallocator or (self.lu.op.target_node and
7205
                                   self.lu.op.target_node != target_node):
7206
        if self.failover:
7207
          text = "failed over"
7208
        else:
7209
          text = "migrated"
7210
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7211
                                   " be %s to arbitrary nodes"
7212
                                   " (neither an iallocator nor a target"
7213
                                   " node can be passed)" %
7214
                                   (instance.disk_template, text),
7215
                                   errors.ECODE_INVAL)
7216

    
7217
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7218

    
7219
    # check memory requirements on the secondary node
7220
    if not self.failover or instance.admin_up:
7221
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7222
                           instance.name, i_be[constants.BE_MEMORY],
7223
                           instance.hypervisor)
7224
    else:
7225
      self.lu.LogInfo("Not checking memory on the secondary node as"
7226
                      " instance will not be started")
7227

    
7228
    # check bridge existance
7229
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7230

    
7231
    if not self.cleanup:
7232
      _CheckNodeNotDrained(self.lu, target_node)
7233
      if not self.failover:
7234
        result = self.rpc.call_instance_migratable(instance.primary_node,
7235
                                                   instance)
7236
        if result.fail_msg and self.fallback:
7237
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7238
                          " failover")
7239
          self.failover = True
7240
        else:
7241
          result.Raise("Can't migrate, please use failover",
7242
                       prereq=True, ecode=errors.ECODE_STATE)
7243

    
7244
    assert not (self.failover and self.cleanup)
7245

    
7246
    if not self.failover:
7247
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7248
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7249
                                   " parameters are accepted",
7250
                                   errors.ECODE_INVAL)
7251
      if self.lu.op.live is not None:
7252
        if self.lu.op.live:
7253
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7254
        else:
7255
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7256
        # reset the 'live' parameter to None so that repeated
7257
        # invocations of CheckPrereq do not raise an exception
7258
        self.lu.op.live = None
7259
      elif self.lu.op.mode is None:
7260
        # read the default value from the hypervisor
7261
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7262
                                                skip_globals=False)
7263
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7264

    
7265
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7266
    else:
7267
      # Failover is never live
7268
      self.live = False
7269

    
7270
  def _RunAllocator(self):
7271
    """Run the allocator based on input opcode.
7272

7273
    """
7274
    ial = IAllocator(self.cfg, self.rpc,
7275
                     mode=constants.IALLOCATOR_MODE_RELOC,
7276
                     name=self.instance_name,
7277
                     # TODO See why hail breaks with a single node below
7278
                     relocate_from=[self.instance.primary_node,
7279
                                    self.instance.primary_node],
7280
                     )
7281

    
7282
    ial.Run(self.lu.op.iallocator)
7283

    
7284
    if not ial.success:
7285
      raise errors.OpPrereqError("Can't compute nodes using"
7286
                                 " iallocator '%s': %s" %
7287
                                 (self.lu.op.iallocator, ial.info),
7288
                                 errors.ECODE_NORES)
7289
    if len(ial.result) != ial.required_nodes:
7290
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7291
                                 " of nodes (%s), required %s" %
7292
                                 (self.lu.op.iallocator, len(ial.result),
7293
                                  ial.required_nodes), errors.ECODE_FAULT)
7294
    self.target_node = ial.result[0]
7295
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7296
                 self.instance_name, self.lu.op.iallocator,
7297
                 utils.CommaJoin(ial.result))
7298

    
7299
  def _WaitUntilSync(self):
7300
    """Poll with custom rpc for disk sync.
7301

7302
    This uses our own step-based rpc call.
7303

7304
    """
7305
    self.feedback_fn("* wait until resync is done")
7306
    all_done = False
7307
    while not all_done:
7308
      all_done = True
7309
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7310
                                            self.nodes_ip,
7311
                                            self.instance.disks)
7312
      min_percent = 100
7313
      for node, nres in result.items():
7314
        nres.Raise("Cannot resync disks on node %s" % node)
7315
        node_done, node_percent = nres.payload
7316
        all_done = all_done and node_done
7317
        if node_percent is not None:
7318
          min_percent = min(min_percent, node_percent)
7319
      if not all_done:
7320
        if min_percent < 100:
7321
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7322
        time.sleep(2)
7323

    
7324
  def _EnsureSecondary(self, node):
7325
    """Demote a node to secondary.
7326

7327
    """
7328
    self.feedback_fn("* switching node %s to secondary mode" % node)
7329

    
7330
    for dev in self.instance.disks:
7331
      self.cfg.SetDiskID(dev, node)
7332

    
7333
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7334
                                          self.instance.disks)
7335
    result.Raise("Cannot change disk to secondary on node %s" % node)
7336

    
7337
  def _GoStandalone(self):
7338
    """Disconnect from the network.
7339

7340
    """
7341
    self.feedback_fn("* changing into standalone mode")
7342
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7343
                                               self.instance.disks)
7344
    for node, nres in result.items():
7345
      nres.Raise("Cannot disconnect disks node %s" % node)
7346

    
7347
  def _GoReconnect(self, multimaster):
7348
    """Reconnect to the network.
7349

7350
    """
7351
    if multimaster:
7352
      msg = "dual-master"
7353
    else:
7354
      msg = "single-master"
7355
    self.feedback_fn("* changing disks into %s mode" % msg)
7356
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7357
                                           self.instance.disks,
7358
                                           self.instance.name, multimaster)
7359
    for node, nres in result.items():
7360
      nres.Raise("Cannot change disks config on node %s" % node)
7361

    
7362
  def _ExecCleanup(self):
7363
    """Try to cleanup after a failed migration.
7364

7365
    The cleanup is done by:
7366
      - check that the instance is running only on one node
7367
        (and update the config if needed)
7368
      - change disks on its secondary node to secondary
7369
      - wait until disks are fully synchronized
7370
      - disconnect from the network
7371
      - change disks into single-master mode
7372
      - wait again until disks are fully synchronized
7373

7374
    """
7375
    instance = self.instance
7376
    target_node = self.target_node
7377
    source_node = self.source_node
7378

    
7379
    # check running on only one node
7380
    self.feedback_fn("* checking where the instance actually runs"
7381
                     " (if this hangs, the hypervisor might be in"
7382
                     " a bad state)")
7383
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7384
    for node, result in ins_l.items():
7385
      result.Raise("Can't contact node %s" % node)
7386

    
7387
    runningon_source = instance.name in ins_l[source_node].payload
7388
    runningon_target = instance.name in ins_l[target_node].payload
7389

    
7390
    if runningon_source and runningon_target:
7391
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7392
                               " or the hypervisor is confused; you will have"
7393
                               " to ensure manually that it runs only on one"
7394
                               " and restart this operation")
7395

    
7396
    if not (runningon_source or runningon_target):
7397
      raise errors.OpExecError("Instance does not seem to be running at all;"
7398
                               " in this case it's safer to repair by"
7399
                               " running 'gnt-instance stop' to ensure disk"
7400
                               " shutdown, and then restarting it")
7401

    
7402
    if runningon_target:
7403
      # the migration has actually succeeded, we need to update the config
7404
      self.feedback_fn("* instance running on secondary node (%s),"
7405
                       " updating config" % target_node)
7406
      instance.primary_node = target_node
7407
      self.cfg.Update(instance, self.feedback_fn)
7408
      demoted_node = source_node
7409
    else:
7410
      self.feedback_fn("* instance confirmed to be running on its"
7411
                       " primary node (%s)" % source_node)
7412
      demoted_node = target_node
7413

    
7414
    if instance.disk_template in constants.DTS_INT_MIRROR:
7415
      self._EnsureSecondary(demoted_node)
7416
      try:
7417
        self._WaitUntilSync()
7418
      except errors.OpExecError:
7419
        # we ignore here errors, since if the device is standalone, it
7420
        # won't be able to sync
7421
        pass
7422
      self._GoStandalone()
7423
      self._GoReconnect(False)
7424
      self._WaitUntilSync()
7425

    
7426
    self.feedback_fn("* done")
7427

    
7428
  def _RevertDiskStatus(self):
7429
    """Try to revert the disk status after a failed migration.
7430

7431
    """
7432
    target_node = self.target_node
7433
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7434
      return
7435

    
7436
    try:
7437
      self._EnsureSecondary(target_node)
7438
      self._GoStandalone()
7439
      self._GoReconnect(False)
7440
      self._WaitUntilSync()
7441
    except errors.OpExecError, err:
7442
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7443
                         " please try to recover the instance manually;"
7444
                         " error '%s'" % str(err))
7445

    
7446
  def _AbortMigration(self):
7447
    """Call the hypervisor code to abort a started migration.
7448

7449
    """
7450
    instance = self.instance
7451
    target_node = self.target_node
7452
    source_node = self.source_node
7453
    migration_info = self.migration_info
7454

    
7455
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7456
                                                                 instance,
7457
                                                                 migration_info,
7458
                                                                 False)
7459
    abort_msg = abort_result.fail_msg
7460
    if abort_msg:
7461
      logging.error("Aborting migration failed on target node %s: %s",
7462
                    target_node, abort_msg)
7463
      # Don't raise an exception here, as we stil have to try to revert the
7464
      # disk status, even if this step failed.
7465

    
7466
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7467
        instance, False, self.live)
7468
    abort_msg = abort_result.fail_msg
7469
    if abort_msg:
7470
      logging.error("Aborting migration failed on source node %s: %s",
7471
                    source_node, abort_msg)
7472

    
7473
  def _ExecMigration(self):
7474
    """Migrate an instance.
7475

7476
    The migrate is done by:
7477
      - change the disks into dual-master mode
7478
      - wait until disks are fully synchronized again
7479
      - migrate the instance
7480
      - change disks on the new secondary node (the old primary) to secondary
7481
      - wait until disks are fully synchronized
7482
      - change disks into single-master mode
7483

7484
    """
7485
    instance = self.instance
7486
    target_node = self.target_node
7487
    source_node = self.source_node
7488

    
7489
    # Check for hypervisor version mismatch and warn the user.
7490
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7491
                                       None, self.instance.hypervisor)
7492
    src_info = nodeinfo[source_node]
7493
    dst_info = nodeinfo[target_node]
7494

    
7495
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7496
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7497
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7498
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7499
      if src_version != dst_version:
7500
        self.feedback_fn("* warning: hypervisor version mismatch between"
7501
                         " source (%s) and target (%s) node" %
7502
                         (src_version, dst_version))
7503

    
7504
    self.feedback_fn("* checking disk consistency between source and target")
7505
    for dev in instance.disks:
7506
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7507
        raise errors.OpExecError("Disk %s is degraded or not fully"
7508
                                 " synchronized on target node,"
7509
                                 " aborting migration" % dev.iv_name)
7510

    
7511
    # First get the migration information from the remote node
7512
    result = self.rpc.call_migration_info(source_node, instance)
7513
    msg = result.fail_msg
7514
    if msg:
7515
      log_err = ("Failed fetching source migration information from %s: %s" %
7516
                 (source_node, msg))
7517
      logging.error(log_err)
7518
      raise errors.OpExecError(log_err)
7519

    
7520
    self.migration_info = migration_info = result.payload
7521

    
7522
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7523
      # Then switch the disks to master/master mode
7524
      self._EnsureSecondary(target_node)
7525
      self._GoStandalone()
7526
      self._GoReconnect(True)
7527
      self._WaitUntilSync()
7528

    
7529
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7530
    result = self.rpc.call_accept_instance(target_node,
7531
                                           instance,
7532
                                           migration_info,
7533
                                           self.nodes_ip[target_node])
7534

    
7535
    msg = result.fail_msg
7536
    if msg:
7537
      logging.error("Instance pre-migration failed, trying to revert"
7538
                    " disk status: %s", msg)
7539
      self.feedback_fn("Pre-migration failed, aborting")
7540
      self._AbortMigration()
7541
      self._RevertDiskStatus()
7542
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7543
                               (instance.name, msg))
7544

    
7545
    self.feedback_fn("* migrating instance to %s" % target_node)
7546
    result = self.rpc.call_instance_migrate(source_node, instance,
7547
                                            self.nodes_ip[target_node],
7548
                                            self.live)
7549
    msg = result.fail_msg
7550
    if msg:
7551
      logging.error("Instance migration failed, trying to revert"
7552
                    " disk status: %s", msg)
7553
      self.feedback_fn("Migration failed, aborting")
7554
      self._AbortMigration()
7555
      self._RevertDiskStatus()
7556
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7557
                               (instance.name, msg))
7558

    
7559
    self.feedback_fn("* starting memory transfer")
7560
    last_feedback = time.time()
7561
    while True:
7562
      result = self.rpc.call_instance_get_migration_status(source_node,
7563
                                                           instance)
7564
      msg = result.fail_msg
7565
      ms = result.payload   # MigrationStatus instance
7566
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7567
        logging.error("Instance migration failed, trying to revert"
7568
                      " disk status: %s", msg)
7569
        self.feedback_fn("Migration failed, aborting")
7570
        self._AbortMigration()
7571
        self._RevertDiskStatus()
7572
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7573
                                 (instance.name, msg))
7574

    
7575
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7576
        self.feedback_fn("* memory transfer complete")
7577
        break
7578

    
7579
      if (utils.TimeoutExpired(last_feedback,
7580
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7581
          ms.transferred_ram is not None):
7582
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7583
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7584
        last_feedback = time.time()
7585

    
7586
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7587

    
7588
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7589
                                                           instance,
7590
                                                           True,
7591
                                                           self.live)
7592
    msg = result.fail_msg
7593
    if msg:
7594
      logging.error("Instance migration succeeded, but finalization failed"
7595
                    " on the source node: %s", msg)
7596
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7597
                               msg)
7598

    
7599
    instance.primary_node = target_node
7600

    
7601
    # distribute new instance config to the other nodes
7602
    self.cfg.Update(instance, self.feedback_fn)
7603

    
7604
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7605
                                                           instance,
7606
                                                           migration_info,
7607
                                                           True)
7608
    msg = result.fail_msg
7609
    if msg:
7610
      logging.error("Instance migration succeeded, but finalization failed"
7611
                    " on the target node: %s", msg)
7612
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7613
                               msg)
7614

    
7615
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7616
      self._EnsureSecondary(source_node)
7617
      self._WaitUntilSync()
7618
      self._GoStandalone()
7619
      self._GoReconnect(False)
7620
      self._WaitUntilSync()
7621

    
7622
    self.feedback_fn("* done")
7623

    
7624
  def _ExecFailover(self):
7625
    """Failover an instance.
7626

7627
    The failover is done by shutting it down on its present node and
7628
    starting it on the secondary.
7629

7630
    """
7631
    instance = self.instance
7632
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7633

    
7634
    source_node = instance.primary_node
7635
    target_node = self.target_node
7636

    
7637
    if instance.admin_up:
7638
      self.feedback_fn("* checking disk consistency between source and target")
7639
      for dev in instance.disks:
7640
        # for drbd, these are drbd over lvm
7641
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7642
          if primary_node.offline:
7643
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7644
                             " target node %s" %
7645
                             (primary_node.name, dev.iv_name, target_node))
7646
          elif not self.ignore_consistency:
7647
            raise errors.OpExecError("Disk %s is degraded on target node,"
7648
                                     " aborting failover" % dev.iv_name)
7649
    else:
7650
      self.feedback_fn("* not checking disk consistency as instance is not"
7651
                       " running")
7652

    
7653
    self.feedback_fn("* shutting down instance on source node")
7654
    logging.info("Shutting down instance %s on node %s",
7655
                 instance.name, source_node)
7656

    
7657
    result = self.rpc.call_instance_shutdown(source_node, instance,
7658
                                             self.shutdown_timeout)
7659
    msg = result.fail_msg
7660
    if msg:
7661
      if self.ignore_consistency or primary_node.offline:
7662
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7663
                           " proceeding anyway; please make sure node"
7664
                           " %s is down; error details: %s",
7665
                           instance.name, source_node, source_node, msg)
7666
      else:
7667
        raise errors.OpExecError("Could not shutdown instance %s on"
7668
                                 " node %s: %s" %
7669
                                 (instance.name, source_node, msg))
7670

    
7671
    self.feedback_fn("* deactivating the instance's disks on source node")
7672
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7673
      raise errors.OpExecError("Can't shut down the instance's disks")
7674

    
7675
    instance.primary_node = target_node
7676
    # distribute new instance config to the other nodes
7677
    self.cfg.Update(instance, self.feedback_fn)
7678

    
7679
    # Only start the instance if it's marked as up
7680
    if instance.admin_up:
7681
      self.feedback_fn("* activating the instance's disks on target node %s" %
7682
                       target_node)
7683
      logging.info("Starting instance %s on node %s",
7684
                   instance.name, target_node)
7685

    
7686
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7687
                                           ignore_secondaries=True)
7688
      if not disks_ok:
7689
        _ShutdownInstanceDisks(self.lu, instance)
7690
        raise errors.OpExecError("Can't activate the instance's disks")
7691

    
7692
      self.feedback_fn("* starting the instance on the target node %s" %
7693
                       target_node)
7694
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7695
                                            False)
7696
      msg = result.fail_msg
7697
      if msg:
7698
        _ShutdownInstanceDisks(self.lu, instance)
7699
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7700
                                 (instance.name, target_node, msg))
7701

    
7702
  def Exec(self, feedback_fn):
7703
    """Perform the migration.
7704

7705
    """
7706
    self.feedback_fn = feedback_fn
7707
    self.source_node = self.instance.primary_node
7708

    
7709
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7710
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7711
      self.target_node = self.instance.secondary_nodes[0]
7712
      # Otherwise self.target_node has been populated either
7713
      # directly, or through an iallocator.
7714

    
7715
    self.all_nodes = [self.source_node, self.target_node]
7716
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7717
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7718

    
7719
    if self.failover:
7720
      feedback_fn("Failover instance %s" % self.instance.name)
7721
      self._ExecFailover()
7722
    else:
7723
      feedback_fn("Migrating instance %s" % self.instance.name)
7724

    
7725
      if self.cleanup:
7726
        return self._ExecCleanup()
7727
      else:
7728
        return self._ExecMigration()
7729

    
7730

    
7731
def _CreateBlockDev(lu, node, instance, device, force_create,
7732
                    info, force_open):
7733
  """Create a tree of block devices on a given node.
7734

7735
  If this device type has to be created on secondaries, create it and
7736
  all its children.
7737

7738
  If not, just recurse to children keeping the same 'force' value.
7739

7740
  @param lu: the lu on whose behalf we execute
7741
  @param node: the node on which to create the device
7742
  @type instance: L{objects.Instance}
7743
  @param instance: the instance which owns the device
7744
  @type device: L{objects.Disk}
7745
  @param device: the device to create
7746
  @type force_create: boolean
7747
  @param force_create: whether to force creation of this device; this
7748
      will be change to True whenever we find a device which has
7749
      CreateOnSecondary() attribute
7750
  @param info: the extra 'metadata' we should attach to the device
7751
      (this will be represented as a LVM tag)
7752
  @type force_open: boolean
7753
  @param force_open: this parameter will be passes to the
7754
      L{backend.BlockdevCreate} function where it specifies
7755
      whether we run on primary or not, and it affects both
7756
      the child assembly and the device own Open() execution
7757

7758
  """
7759
  if device.CreateOnSecondary():
7760
    force_create = True
7761

    
7762
  if device.children:
7763
    for child in device.children:
7764
      _CreateBlockDev(lu, node, instance, child, force_create,
7765
                      info, force_open)
7766

    
7767
  if not force_create:
7768
    return
7769

    
7770
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7771

    
7772

    
7773
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7774
  """Create a single block device on a given node.
7775

7776
  This will not recurse over children of the device, so they must be
7777
  created in advance.
7778

7779
  @param lu: the lu on whose behalf we execute
7780
  @param node: the node on which to create the device
7781
  @type instance: L{objects.Instance}
7782
  @param instance: the instance which owns the device
7783
  @type device: L{objects.Disk}
7784
  @param device: the device to create
7785
  @param info: the extra 'metadata' we should attach to the device
7786
      (this will be represented as a LVM tag)
7787
  @type force_open: boolean
7788
  @param force_open: this parameter will be passes to the
7789
      L{backend.BlockdevCreate} function where it specifies
7790
      whether we run on primary or not, and it affects both
7791
      the child assembly and the device own Open() execution
7792

7793
  """
7794
  lu.cfg.SetDiskID(device, node)
7795
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7796
                                       instance.name, force_open, info)
7797
  result.Raise("Can't create block device %s on"
7798
               " node %s for instance %s" % (device, node, instance.name))
7799
  if device.physical_id is None:
7800
    device.physical_id = result.payload
7801

    
7802

    
7803
def _GenerateUniqueNames(lu, exts):
7804
  """Generate a suitable LV name.
7805

7806
  This will generate a logical volume name for the given instance.
7807

7808
  """
7809
  results = []
7810
  for val in exts:
7811
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7812
    results.append("%s%s" % (new_id, val))
7813
  return results
7814

    
7815

    
7816
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7817
                         iv_name, p_minor, s_minor):
7818
  """Generate a drbd8 device complete with its children.
7819

7820
  """
7821
  assert len(vgnames) == len(names) == 2
7822
  port = lu.cfg.AllocatePort()
7823
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7824
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7825
                          logical_id=(vgnames[0], names[0]))
7826
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7827
                          logical_id=(vgnames[1], names[1]))
7828
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7829
                          logical_id=(primary, secondary, port,
7830
                                      p_minor, s_minor,
7831
                                      shared_secret),
7832
                          children=[dev_data, dev_meta],
7833
                          iv_name=iv_name)
7834
  return drbd_dev
7835

    
7836

    
7837
def _GenerateDiskTemplate(lu, template_name,
7838
                          instance_name, primary_node,
7839
                          secondary_nodes, disk_info,
7840
                          file_storage_dir, file_driver,
7841
                          base_index, feedback_fn):
7842
  """Generate the entire disk layout for a given template type.
7843

7844
  """
7845
  #TODO: compute space requirements
7846

    
7847
  vgname = lu.cfg.GetVGName()
7848
  disk_count = len(disk_info)
7849
  disks = []
7850
  if template_name == constants.DT_DISKLESS:
7851
    pass
7852
  elif template_name == constants.DT_PLAIN:
7853
    if len(secondary_nodes) != 0:
7854
      raise errors.ProgrammerError("Wrong template configuration")
7855

    
7856
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7857
                                      for i in range(disk_count)])
7858
    for idx, disk in enumerate(disk_info):
7859
      disk_index = idx + base_index
7860
      vg = disk.get(constants.IDISK_VG, vgname)
7861
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7862
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7863
                              size=disk[constants.IDISK_SIZE],
7864
                              logical_id=(vg, names[idx]),
7865
                              iv_name="disk/%d" % disk_index,
7866
                              mode=disk[constants.IDISK_MODE])
7867
      disks.append(disk_dev)
7868
  elif template_name == constants.DT_DRBD8:
7869
    if len(secondary_nodes) != 1:
7870
      raise errors.ProgrammerError("Wrong template configuration")
7871
    remote_node = secondary_nodes[0]
7872
    minors = lu.cfg.AllocateDRBDMinor(
7873
      [primary_node, remote_node] * len(disk_info), instance_name)
7874

    
7875
    names = []
7876
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7877
                                               for i in range(disk_count)]):
7878
      names.append(lv_prefix + "_data")
7879
      names.append(lv_prefix + "_meta")
7880
    for idx, disk in enumerate(disk_info):
7881
      disk_index = idx + base_index
7882
      data_vg = disk.get(constants.IDISK_VG, vgname)
7883
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7884
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7885
                                      disk[constants.IDISK_SIZE],
7886
                                      [data_vg, meta_vg],
7887
                                      names[idx * 2:idx * 2 + 2],
7888
                                      "disk/%d" % disk_index,
7889
                                      minors[idx * 2], minors[idx * 2 + 1])
7890
      disk_dev.mode = disk[constants.IDISK_MODE]
7891
      disks.append(disk_dev)
7892
  elif template_name == constants.DT_FILE:
7893
    if len(secondary_nodes) != 0:
7894
      raise errors.ProgrammerError("Wrong template configuration")
7895

    
7896
    opcodes.RequireFileStorage()
7897

    
7898
    for idx, disk in enumerate(disk_info):
7899
      disk_index = idx + base_index
7900
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7901
                              size=disk[constants.IDISK_SIZE],
7902
                              iv_name="disk/%d" % disk_index,
7903
                              logical_id=(file_driver,
7904
                                          "%s/disk%d" % (file_storage_dir,
7905
                                                         disk_index)),
7906
                              mode=disk[constants.IDISK_MODE])
7907
      disks.append(disk_dev)
7908
  elif template_name == constants.DT_SHARED_FILE:
7909
    if len(secondary_nodes) != 0:
7910
      raise errors.ProgrammerError("Wrong template configuration")
7911

    
7912
    opcodes.RequireSharedFileStorage()
7913

    
7914
    for idx, disk in enumerate(disk_info):
7915
      disk_index = idx + base_index
7916
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7917
                              size=disk[constants.IDISK_SIZE],
7918
                              iv_name="disk/%d" % disk_index,
7919
                              logical_id=(file_driver,
7920
                                          "%s/disk%d" % (file_storage_dir,
7921
                                                         disk_index)),
7922
                              mode=disk[constants.IDISK_MODE])
7923
      disks.append(disk_dev)
7924
  elif template_name == constants.DT_BLOCK:
7925
    if len(secondary_nodes) != 0:
7926
      raise errors.ProgrammerError("Wrong template configuration")
7927

    
7928
    for idx, disk in enumerate(disk_info):
7929
      disk_index = idx + base_index
7930
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7931
                              size=disk[constants.IDISK_SIZE],
7932
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7933
                                          disk[constants.IDISK_ADOPT]),
7934
                              iv_name="disk/%d" % disk_index,
7935
                              mode=disk[constants.IDISK_MODE])
7936
      disks.append(disk_dev)
7937

    
7938
  else:
7939
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7940
  return disks
7941

    
7942

    
7943
def _GetInstanceInfoText(instance):
7944
  """Compute that text that should be added to the disk's metadata.
7945

7946
  """
7947
  return "originstname+%s" % instance.name
7948

    
7949

    
7950
def _CalcEta(time_taken, written, total_size):
7951
  """Calculates the ETA based on size written and total size.
7952

7953
  @param time_taken: The time taken so far
7954
  @param written: amount written so far
7955
  @param total_size: The total size of data to be written
7956
  @return: The remaining time in seconds
7957

7958
  """
7959
  avg_time = time_taken / float(written)
7960
  return (total_size - written) * avg_time
7961

    
7962

    
7963
def _WipeDisks(lu, instance):
7964
  """Wipes instance disks.
7965

7966
  @type lu: L{LogicalUnit}
7967
  @param lu: the logical unit on whose behalf we execute
7968
  @type instance: L{objects.Instance}
7969
  @param instance: the instance whose disks we should create
7970
  @return: the success of the wipe
7971

7972
  """
7973
  node = instance.primary_node
7974

    
7975
  for device in instance.disks:
7976
    lu.cfg.SetDiskID(device, node)
7977

    
7978
  logging.info("Pause sync of instance %s disks", instance.name)
7979
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7980

    
7981
  for idx, success in enumerate(result.payload):
7982
    if not success:
7983
      logging.warn("pause-sync of instance %s for disks %d failed",
7984
                   instance.name, idx)
7985

    
7986
  try:
7987
    for idx, device in enumerate(instance.disks):
7988
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7989
      # MAX_WIPE_CHUNK at max
7990
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7991
                            constants.MIN_WIPE_CHUNK_PERCENT)
7992
      # we _must_ make this an int, otherwise rounding errors will
7993
      # occur
7994
      wipe_chunk_size = int(wipe_chunk_size)
7995

    
7996
      lu.LogInfo("* Wiping disk %d", idx)
7997
      logging.info("Wiping disk %d for instance %s, node %s using"
7998
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7999

    
8000
      offset = 0
8001
      size = device.size
8002
      last_output = 0
8003
      start_time = time.time()
8004

    
8005
      while offset < size:
8006
        wipe_size = min(wipe_chunk_size, size - offset)
8007
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8008
                      idx, offset, wipe_size)
8009
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8010
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8011
                     (idx, offset, wipe_size))
8012
        now = time.time()
8013
        offset += wipe_size
8014
        if now - last_output >= 60:
8015
          eta = _CalcEta(now - start_time, offset, size)
8016
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8017
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8018
          last_output = now
8019
  finally:
8020
    logging.info("Resume sync of instance %s disks", instance.name)
8021

    
8022
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8023

    
8024
    for idx, success in enumerate(result.payload):
8025
      if not success:
8026
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8027
                      " look at the status and troubleshoot the issue", idx)
8028
        logging.warn("resume-sync of instance %s for disks %d failed",
8029
                     instance.name, idx)
8030

    
8031

    
8032
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8033
  """Create all disks for an instance.
8034

8035
  This abstracts away some work from AddInstance.
8036

8037
  @type lu: L{LogicalUnit}
8038
  @param lu: the logical unit on whose behalf we execute
8039
  @type instance: L{objects.Instance}
8040
  @param instance: the instance whose disks we should create
8041
  @type to_skip: list
8042
  @param to_skip: list of indices to skip
8043
  @type target_node: string
8044
  @param target_node: if passed, overrides the target node for creation
8045
  @rtype: boolean
8046
  @return: the success of the creation
8047

8048
  """
8049
  info = _GetInstanceInfoText(instance)
8050
  if target_node is None:
8051
    pnode = instance.primary_node
8052
    all_nodes = instance.all_nodes
8053
  else:
8054
    pnode = target_node
8055
    all_nodes = [pnode]
8056

    
8057
  if instance.disk_template in constants.DTS_FILEBASED:
8058
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8059
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8060

    
8061
    result.Raise("Failed to create directory '%s' on"
8062
                 " node %s" % (file_storage_dir, pnode))
8063

    
8064
  # Note: this needs to be kept in sync with adding of disks in
8065
  # LUInstanceSetParams
8066
  for idx, device in enumerate(instance.disks):
8067
    if to_skip and idx in to_skip:
8068
      continue
8069
    logging.info("Creating volume %s for instance %s",
8070
                 device.iv_name, instance.name)
8071
    #HARDCODE
8072
    for node in all_nodes:
8073
      f_create = node == pnode
8074
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8075

    
8076

    
8077
def _RemoveDisks(lu, instance, target_node=None):
8078
  """Remove all disks for an instance.
8079

8080
  This abstracts away some work from `AddInstance()` and
8081
  `RemoveInstance()`. Note that in case some of the devices couldn't
8082
  be removed, the removal will continue with the other ones (compare
8083
  with `_CreateDisks()`).
8084

8085
  @type lu: L{LogicalUnit}
8086
  @param lu: the logical unit on whose behalf we execute
8087
  @type instance: L{objects.Instance}
8088
  @param instance: the instance whose disks we should remove
8089
  @type target_node: string
8090
  @param target_node: used to override the node on which to remove the disks
8091
  @rtype: boolean
8092
  @return: the success of the removal
8093

8094
  """
8095
  logging.info("Removing block devices for instance %s", instance.name)
8096

    
8097
  all_result = True
8098
  for device in instance.disks:
8099
    if target_node:
8100
      edata = [(target_node, device)]
8101
    else:
8102
      edata = device.ComputeNodeTree(instance.primary_node)
8103
    for node, disk in edata:
8104
      lu.cfg.SetDiskID(disk, node)
8105
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8106
      if msg:
8107
        lu.LogWarning("Could not remove block device %s on node %s,"
8108
                      " continuing anyway: %s", device.iv_name, node, msg)
8109
        all_result = False
8110

    
8111
  if instance.disk_template == constants.DT_FILE:
8112
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8113
    if target_node:
8114
      tgt = target_node
8115
    else:
8116
      tgt = instance.primary_node
8117
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8118
    if result.fail_msg:
8119
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8120
                    file_storage_dir, instance.primary_node, result.fail_msg)
8121
      all_result = False
8122

    
8123
  return all_result
8124

    
8125

    
8126
def _ComputeDiskSizePerVG(disk_template, disks):
8127
  """Compute disk size requirements in the volume group
8128

8129
  """
8130
  def _compute(disks, payload):
8131
    """Universal algorithm.
8132

8133
    """
8134
    vgs = {}
8135
    for disk in disks:
8136
      vgs[disk[constants.IDISK_VG]] = \
8137
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8138

    
8139
    return vgs
8140

    
8141
  # Required free disk space as a function of disk and swap space
8142
  req_size_dict = {
8143
    constants.DT_DISKLESS: {},
8144
    constants.DT_PLAIN: _compute(disks, 0),
8145
    # 128 MB are added for drbd metadata for each disk
8146
    constants.DT_DRBD8: _compute(disks, 128),
8147
    constants.DT_FILE: {},
8148
    constants.DT_SHARED_FILE: {},
8149
  }
8150

    
8151
  if disk_template not in req_size_dict:
8152
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8153
                                 " is unknown" % disk_template)
8154

    
8155
  return req_size_dict[disk_template]
8156

    
8157

    
8158
def _ComputeDiskSize(disk_template, disks):
8159
  """Compute disk size requirements in the volume group
8160

8161
  """
8162
  # Required free disk space as a function of disk and swap space
8163
  req_size_dict = {
8164
    constants.DT_DISKLESS: None,
8165
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8166
    # 128 MB are added for drbd metadata for each disk
8167
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
8168
    constants.DT_FILE: None,
8169
    constants.DT_SHARED_FILE: 0,
8170
    constants.DT_BLOCK: 0,
8171
  }
8172

    
8173
  if disk_template not in req_size_dict:
8174
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8175
                                 " is unknown" % disk_template)
8176

    
8177
  return req_size_dict[disk_template]
8178

    
8179

    
8180
def _FilterVmNodes(lu, nodenames):
8181
  """Filters out non-vm_capable nodes from a list.
8182

8183
  @type lu: L{LogicalUnit}
8184
  @param lu: the logical unit for which we check
8185
  @type nodenames: list
8186
  @param nodenames: the list of nodes on which we should check
8187
  @rtype: list
8188
  @return: the list of vm-capable nodes
8189

8190
  """
8191
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8192
  return [name for name in nodenames if name not in vm_nodes]
8193

    
8194

    
8195
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8196
  """Hypervisor parameter validation.
8197

8198
  This function abstract the hypervisor parameter validation to be
8199
  used in both instance create and instance modify.
8200

8201
  @type lu: L{LogicalUnit}
8202
  @param lu: the logical unit for which we check
8203
  @type nodenames: list
8204
  @param nodenames: the list of nodes on which we should check
8205
  @type hvname: string
8206
  @param hvname: the name of the hypervisor we should use
8207
  @type hvparams: dict
8208
  @param hvparams: the parameters which we need to check
8209
  @raise errors.OpPrereqError: if the parameters are not valid
8210

8211
  """
8212
  nodenames = _FilterVmNodes(lu, nodenames)
8213
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8214
                                                  hvname,
8215
                                                  hvparams)
8216
  for node in nodenames:
8217
    info = hvinfo[node]
8218
    if info.offline:
8219
      continue
8220
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8221

    
8222

    
8223
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8224
  """OS parameters validation.
8225

8226
  @type lu: L{LogicalUnit}
8227
  @param lu: the logical unit for which we check
8228
  @type required: boolean
8229
  @param required: whether the validation should fail if the OS is not
8230
      found
8231
  @type nodenames: list
8232
  @param nodenames: the list of nodes on which we should check
8233
  @type osname: string
8234
  @param osname: the name of the hypervisor we should use
8235
  @type osparams: dict
8236
  @param osparams: the parameters which we need to check
8237
  @raise errors.OpPrereqError: if the parameters are not valid
8238

8239
  """
8240
  nodenames = _FilterVmNodes(lu, nodenames)
8241
  result = lu.rpc.call_os_validate(required, nodenames, osname,
8242
                                   [constants.OS_VALIDATE_PARAMETERS],
8243
                                   osparams)
8244
  for node, nres in result.items():
8245
    # we don't check for offline cases since this should be run only
8246
    # against the master node and/or an instance's nodes
8247
    nres.Raise("OS Parameters validation failed on node %s" % node)
8248
    if not nres.payload:
8249
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8250
                 osname, node)
8251

    
8252

    
8253
class LUInstanceCreate(LogicalUnit):
8254
  """Create an instance.
8255

8256
  """
8257
  HPATH = "instance-add"
8258
  HTYPE = constants.HTYPE_INSTANCE
8259
  REQ_BGL = False
8260

    
8261
  def CheckArguments(self):
8262
    """Check arguments.
8263

8264
    """
8265
    # do not require name_check to ease forward/backward compatibility
8266
    # for tools
8267
    if self.op.no_install and self.op.start:
8268
      self.LogInfo("No-installation mode selected, disabling startup")
8269
      self.op.start = False
8270
    # validate/normalize the instance name
8271
    self.op.instance_name = \
8272
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8273

    
8274
    if self.op.ip_check and not self.op.name_check:
8275
      # TODO: make the ip check more flexible and not depend on the name check
8276
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8277
                                 " check", errors.ECODE_INVAL)
8278

    
8279
    # check nics' parameter names
8280
    for nic in self.op.nics:
8281
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8282

    
8283
    # check disks. parameter names and consistent adopt/no-adopt strategy
8284
    has_adopt = has_no_adopt = False
8285
    for disk in self.op.disks:
8286
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8287
      if constants.IDISK_ADOPT in disk:
8288
        has_adopt = True
8289
      else:
8290
        has_no_adopt = True
8291
    if has_adopt and has_no_adopt:
8292
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8293
                                 errors.ECODE_INVAL)
8294
    if has_adopt:
8295
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8296
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8297
                                   " '%s' disk template" %
8298
                                   self.op.disk_template,
8299
                                   errors.ECODE_INVAL)
8300
      if self.op.iallocator is not None:
8301
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8302
                                   " iallocator script", errors.ECODE_INVAL)
8303
      if self.op.mode == constants.INSTANCE_IMPORT:
8304
        raise errors.OpPrereqError("Disk adoption not allowed for"
8305
                                   " instance import", errors.ECODE_INVAL)
8306
    else:
8307
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8308
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8309
                                   " but no 'adopt' parameter given" %
8310
                                   self.op.disk_template,
8311
                                   errors.ECODE_INVAL)
8312

    
8313
    self.adopt_disks = has_adopt
8314

    
8315
    # instance name verification
8316
    if self.op.name_check:
8317
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8318
      self.op.instance_name = self.hostname1.name
8319
      # used in CheckPrereq for ip ping check
8320
      self.check_ip = self.hostname1.ip
8321
    else:
8322
      self.check_ip = None
8323

    
8324
    # file storage checks
8325
    if (self.op.file_driver and
8326
        not self.op.file_driver in constants.FILE_DRIVER):
8327
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8328
                                 self.op.file_driver, errors.ECODE_INVAL)
8329

    
8330
    if self.op.disk_template == constants.DT_FILE:
8331
      opcodes.RequireFileStorage()
8332
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8333
      opcodes.RequireSharedFileStorage()
8334

    
8335
    ### Node/iallocator related checks
8336
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8337

    
8338
    if self.op.pnode is not None:
8339
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8340
        if self.op.snode is None:
8341
          raise errors.OpPrereqError("The networked disk templates need"
8342
                                     " a mirror node", errors.ECODE_INVAL)
8343
      elif self.op.snode:
8344
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8345
                        " template")
8346
        self.op.snode = None
8347

    
8348
    self._cds = _GetClusterDomainSecret()
8349

    
8350
    if self.op.mode == constants.INSTANCE_IMPORT:
8351
      # On import force_variant must be True, because if we forced it at
8352
      # initial install, our only chance when importing it back is that it
8353
      # works again!
8354
      self.op.force_variant = True
8355

    
8356
      if self.op.no_install:
8357
        self.LogInfo("No-installation mode has no effect during import")
8358

    
8359
    elif self.op.mode == constants.INSTANCE_CREATE:
8360
      if self.op.os_type is None:
8361
        raise errors.OpPrereqError("No guest OS specified",
8362
                                   errors.ECODE_INVAL)
8363
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8364
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8365
                                   " installation" % self.op.os_type,
8366
                                   errors.ECODE_STATE)
8367
      if self.op.disk_template is None:
8368
        raise errors.OpPrereqError("No disk template specified",
8369
                                   errors.ECODE_INVAL)
8370

    
8371
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8372
      # Check handshake to ensure both clusters have the same domain secret
8373
      src_handshake = self.op.source_handshake
8374
      if not src_handshake:
8375
        raise errors.OpPrereqError("Missing source handshake",
8376
                                   errors.ECODE_INVAL)
8377

    
8378
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8379
                                                           src_handshake)
8380
      if errmsg:
8381
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8382
                                   errors.ECODE_INVAL)
8383

    
8384
      # Load and check source CA
8385
      self.source_x509_ca_pem = self.op.source_x509_ca
8386
      if not self.source_x509_ca_pem:
8387
        raise errors.OpPrereqError("Missing source X509 CA",
8388
                                   errors.ECODE_INVAL)
8389

    
8390
      try:
8391
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8392
                                                    self._cds)
8393
      except OpenSSL.crypto.Error, err:
8394
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8395
                                   (err, ), errors.ECODE_INVAL)
8396

    
8397
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8398
      if errcode is not None:
8399
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8400
                                   errors.ECODE_INVAL)
8401

    
8402
      self.source_x509_ca = cert
8403

    
8404
      src_instance_name = self.op.source_instance_name
8405
      if not src_instance_name:
8406
        raise errors.OpPrereqError("Missing source instance name",
8407
                                   errors.ECODE_INVAL)
8408

    
8409
      self.source_instance_name = \
8410
          netutils.GetHostname(name=src_instance_name).name
8411

    
8412
    else:
8413
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8414
                                 self.op.mode, errors.ECODE_INVAL)
8415

    
8416
  def ExpandNames(self):
8417
    """ExpandNames for CreateInstance.
8418

8419
    Figure out the right locks for instance creation.
8420

8421
    """
8422
    self.needed_locks = {}
8423

    
8424
    instance_name = self.op.instance_name
8425
    # this is just a preventive check, but someone might still add this
8426
    # instance in the meantime, and creation will fail at lock-add time
8427
    if instance_name in self.cfg.GetInstanceList():
8428
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8429
                                 instance_name, errors.ECODE_EXISTS)
8430

    
8431
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8432

    
8433
    if self.op.iallocator:
8434
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8435
    else:
8436
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8437
      nodelist = [self.op.pnode]
8438
      if self.op.snode is not None:
8439
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8440
        nodelist.append(self.op.snode)
8441
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8442

    
8443
    # in case of import lock the source node too
8444
    if self.op.mode == constants.INSTANCE_IMPORT:
8445
      src_node = self.op.src_node
8446
      src_path = self.op.src_path
8447

    
8448
      if src_path is None:
8449
        self.op.src_path = src_path = self.op.instance_name
8450

    
8451
      if src_node is None:
8452
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8453
        self.op.src_node = None
8454
        if os.path.isabs(src_path):
8455
          raise errors.OpPrereqError("Importing an instance from a path"
8456
                                     " requires a source node option",
8457
                                     errors.ECODE_INVAL)
8458
      else:
8459
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8460
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8461
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8462
        if not os.path.isabs(src_path):
8463
          self.op.src_path = src_path = \
8464
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8465

    
8466
  def _RunAllocator(self):
8467
    """Run the allocator based on input opcode.
8468

8469
    """
8470
    nics = [n.ToDict() for n in self.nics]
8471
    ial = IAllocator(self.cfg, self.rpc,
8472
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8473
                     name=self.op.instance_name,
8474
                     disk_template=self.op.disk_template,
8475
                     tags=self.op.tags,
8476
                     os=self.op.os_type,
8477
                     vcpus=self.be_full[constants.BE_VCPUS],
8478
                     memory=self.be_full[constants.BE_MEMORY],
8479
                     disks=self.disks,
8480
                     nics=nics,
8481
                     hypervisor=self.op.hypervisor,
8482
                     )
8483

    
8484
    ial.Run(self.op.iallocator)
8485

    
8486
    if not ial.success:
8487
      raise errors.OpPrereqError("Can't compute nodes using"
8488
                                 " iallocator '%s': %s" %
8489
                                 (self.op.iallocator, ial.info),
8490
                                 errors.ECODE_NORES)
8491
    if len(ial.result) != ial.required_nodes:
8492
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8493
                                 " of nodes (%s), required %s" %
8494
                                 (self.op.iallocator, len(ial.result),
8495
                                  ial.required_nodes), errors.ECODE_FAULT)
8496
    self.op.pnode = ial.result[0]
8497
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8498
                 self.op.instance_name, self.op.iallocator,
8499
                 utils.CommaJoin(ial.result))
8500
    if ial.required_nodes == 2:
8501
      self.op.snode = ial.result[1]
8502

    
8503
  def BuildHooksEnv(self):
8504
    """Build hooks env.
8505

8506
    This runs on master, primary and secondary nodes of the instance.
8507

8508
    """
8509
    env = {
8510
      "ADD_MODE": self.op.mode,
8511
      }
8512
    if self.op.mode == constants.INSTANCE_IMPORT:
8513
      env["SRC_NODE"] = self.op.src_node
8514
      env["SRC_PATH"] = self.op.src_path
8515
      env["SRC_IMAGES"] = self.src_images
8516

    
8517
    env.update(_BuildInstanceHookEnv(
8518
      name=self.op.instance_name,
8519
      primary_node=self.op.pnode,
8520
      secondary_nodes=self.secondaries,
8521
      status=self.op.start,
8522
      os_type=self.op.os_type,
8523
      memory=self.be_full[constants.BE_MEMORY],
8524
      vcpus=self.be_full[constants.BE_VCPUS],
8525
      nics=_NICListToTuple(self, self.nics),
8526
      disk_template=self.op.disk_template,
8527
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8528
             for d in self.disks],
8529
      bep=self.be_full,
8530
      hvp=self.hv_full,
8531
      hypervisor_name=self.op.hypervisor,
8532
      tags=self.op.tags,
8533
    ))
8534

    
8535
    return env
8536

    
8537
  def BuildHooksNodes(self):
8538
    """Build hooks nodes.
8539

8540
    """
8541
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8542
    return nl, nl
8543

    
8544
  def _ReadExportInfo(self):
8545
    """Reads the export information from disk.
8546

8547
    It will override the opcode source node and path with the actual
8548
    information, if these two were not specified before.
8549

8550
    @return: the export information
8551

8552
    """
8553
    assert self.op.mode == constants.INSTANCE_IMPORT
8554

    
8555
    src_node = self.op.src_node
8556
    src_path = self.op.src_path
8557

    
8558
    if src_node is None:
8559
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8560
      exp_list = self.rpc.call_export_list(locked_nodes)
8561
      found = False
8562
      for node in exp_list:
8563
        if exp_list[node].fail_msg:
8564
          continue
8565
        if src_path in exp_list[node].payload:
8566
          found = True
8567
          self.op.src_node = src_node = node
8568
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8569
                                                       src_path)
8570
          break
8571
      if not found:
8572
        raise errors.OpPrereqError("No export found for relative path %s" %
8573
                                    src_path, errors.ECODE_INVAL)
8574

    
8575
    _CheckNodeOnline(self, src_node)
8576
    result = self.rpc.call_export_info(src_node, src_path)
8577
    result.Raise("No export or invalid export found in dir %s" % src_path)
8578

    
8579
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8580
    if not export_info.has_section(constants.INISECT_EXP):
8581
      raise errors.ProgrammerError("Corrupted export config",
8582
                                   errors.ECODE_ENVIRON)
8583

    
8584
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8585
    if (int(ei_version) != constants.EXPORT_VERSION):
8586
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8587
                                 (ei_version, constants.EXPORT_VERSION),
8588
                                 errors.ECODE_ENVIRON)
8589
    return export_info
8590

    
8591
  def _ReadExportParams(self, einfo):
8592
    """Use export parameters as defaults.
8593

8594
    In case the opcode doesn't specify (as in override) some instance
8595
    parameters, then try to use them from the export information, if
8596
    that declares them.
8597

8598
    """
8599
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8600

    
8601
    if self.op.disk_template is None:
8602
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8603
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8604
                                          "disk_template")
8605
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8606
          raise errors.OpPrereqError("Disk template specified in configuration"
8607
                                     " file is not one of the allowed values:"
8608
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8609
      else:
8610
        raise errors.OpPrereqError("No disk template specified and the export"
8611
                                   " is missing the disk_template information",
8612
                                   errors.ECODE_INVAL)
8613

    
8614
    if not self.op.disks:
8615
      disks = []
8616
      # TODO: import the disk iv_name too
8617
      for idx in range(constants.MAX_DISKS):
8618
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8619
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8620
          disks.append({constants.IDISK_SIZE: disk_sz})
8621
      self.op.disks = disks
8622
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8623
        raise errors.OpPrereqError("No disk info specified and the export"
8624
                                   " is missing the disk information",
8625
                                   errors.ECODE_INVAL)
8626

    
8627
    if not self.op.nics:
8628
      nics = []
8629
      for idx in range(constants.MAX_NICS):
8630
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8631
          ndict = {}
8632
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8633
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8634
            ndict[name] = v
8635
          nics.append(ndict)
8636
        else:
8637
          break
8638
      self.op.nics = nics
8639

    
8640
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8641
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8642

    
8643
    if (self.op.hypervisor is None and
8644
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8645
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8646

    
8647
    if einfo.has_section(constants.INISECT_HYP):
8648
      # use the export parameters but do not override the ones
8649
      # specified by the user
8650
      for name, value in einfo.items(constants.INISECT_HYP):
8651
        if name not in self.op.hvparams:
8652
          self.op.hvparams[name] = value
8653

    
8654
    if einfo.has_section(constants.INISECT_BEP):
8655
      # use the parameters, without overriding
8656
      for name, value in einfo.items(constants.INISECT_BEP):
8657
        if name not in self.op.beparams:
8658
          self.op.beparams[name] = value
8659
    else:
8660
      # try to read the parameters old style, from the main section
8661
      for name in constants.BES_PARAMETERS:
8662
        if (name not in self.op.beparams and
8663
            einfo.has_option(constants.INISECT_INS, name)):
8664
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8665

    
8666
    if einfo.has_section(constants.INISECT_OSP):
8667
      # use the parameters, without overriding
8668
      for name, value in einfo.items(constants.INISECT_OSP):
8669
        if name not in self.op.osparams:
8670
          self.op.osparams[name] = value
8671

    
8672
  def _RevertToDefaults(self, cluster):
8673
    """Revert the instance parameters to the default values.
8674

8675
    """
8676
    # hvparams
8677
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8678
    for name in self.op.hvparams.keys():
8679
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8680
        del self.op.hvparams[name]
8681
    # beparams
8682
    be_defs = cluster.SimpleFillBE({})
8683
    for name in self.op.beparams.keys():
8684
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8685
        del self.op.beparams[name]
8686
    # nic params
8687
    nic_defs = cluster.SimpleFillNIC({})
8688
    for nic in self.op.nics:
8689
      for name in constants.NICS_PARAMETERS:
8690
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8691
          del nic[name]
8692
    # osparams
8693
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8694
    for name in self.op.osparams.keys():
8695
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8696
        del self.op.osparams[name]
8697

    
8698
  def _CalculateFileStorageDir(self):
8699
    """Calculate final instance file storage dir.
8700

8701
    """
8702
    # file storage dir calculation/check
8703
    self.instance_file_storage_dir = None
8704
    if self.op.disk_template in constants.DTS_FILEBASED:
8705
      # build the full file storage dir path
8706
      joinargs = []
8707

    
8708
      if self.op.disk_template == constants.DT_SHARED_FILE:
8709
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8710
      else:
8711
        get_fsd_fn = self.cfg.GetFileStorageDir
8712

    
8713
      cfg_storagedir = get_fsd_fn()
8714
      if not cfg_storagedir:
8715
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8716
      joinargs.append(cfg_storagedir)
8717

    
8718
      if self.op.file_storage_dir is not None:
8719
        joinargs.append(self.op.file_storage_dir)
8720

    
8721
      joinargs.append(self.op.instance_name)
8722

    
8723
      # pylint: disable=W0142
8724
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8725

    
8726
  def CheckPrereq(self):
8727
    """Check prerequisites.
8728

8729
    """
8730
    self._CalculateFileStorageDir()
8731

    
8732
    if self.op.mode == constants.INSTANCE_IMPORT:
8733
      export_info = self._ReadExportInfo()
8734
      self._ReadExportParams(export_info)
8735

    
8736
    if (not self.cfg.GetVGName() and
8737
        self.op.disk_template not in constants.DTS_NOT_LVM):
8738
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8739
                                 " instances", errors.ECODE_STATE)
8740

    
8741
    if (self.op.hypervisor is None or
8742
        self.op.hypervisor == constants.VALUE_AUTO):
8743
      self.op.hypervisor = self.cfg.GetHypervisorType()
8744

    
8745
    cluster = self.cfg.GetClusterInfo()
8746
    enabled_hvs = cluster.enabled_hypervisors
8747
    if self.op.hypervisor not in enabled_hvs:
8748
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8749
                                 " cluster (%s)" % (self.op.hypervisor,
8750
                                  ",".join(enabled_hvs)),
8751
                                 errors.ECODE_STATE)
8752

    
8753
    # Check tag validity
8754
    for tag in self.op.tags:
8755
      objects.TaggableObject.ValidateTag(tag)
8756

    
8757
    # check hypervisor parameter syntax (locally)
8758
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8759
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8760
                                      self.op.hvparams)
8761
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8762
    hv_type.CheckParameterSyntax(filled_hvp)
8763
    self.hv_full = filled_hvp
8764
    # check that we don't specify global parameters on an instance
8765
    _CheckGlobalHvParams(self.op.hvparams)
8766

    
8767
    # fill and remember the beparams dict
8768
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
8769
    for param, value in self.op.beparams.iteritems():
8770
      if value == constants.VALUE_AUTO:
8771
        self.op.beparams[param] = default_beparams[param]
8772
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8773
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8774

    
8775
    # build os parameters
8776
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8777

    
8778
    # now that hvp/bep are in final format, let's reset to defaults,
8779
    # if told to do so
8780
    if self.op.identify_defaults:
8781
      self._RevertToDefaults(cluster)
8782

    
8783
    # NIC buildup
8784
    self.nics = []
8785
    for idx, nic in enumerate(self.op.nics):
8786
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8787
      nic_mode = nic_mode_req
8788
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8789
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8790

    
8791
      # in routed mode, for the first nic, the default ip is 'auto'
8792
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8793
        default_ip_mode = constants.VALUE_AUTO
8794
      else:
8795
        default_ip_mode = constants.VALUE_NONE
8796

    
8797
      # ip validity checks
8798
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8799
      if ip is None or ip.lower() == constants.VALUE_NONE:
8800
        nic_ip = None
8801
      elif ip.lower() == constants.VALUE_AUTO:
8802
        if not self.op.name_check:
8803
          raise errors.OpPrereqError("IP address set to auto but name checks"
8804
                                     " have been skipped",
8805
                                     errors.ECODE_INVAL)
8806
        nic_ip = self.hostname1.ip
8807
      else:
8808
        if not netutils.IPAddress.IsValid(ip):
8809
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8810
                                     errors.ECODE_INVAL)
8811
        nic_ip = ip
8812

    
8813
      # TODO: check the ip address for uniqueness
8814
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8815
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8816
                                   errors.ECODE_INVAL)
8817

    
8818
      # MAC address verification
8819
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8820
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8821
        mac = utils.NormalizeAndValidateMac(mac)
8822

    
8823
        try:
8824
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8825
        except errors.ReservationError:
8826
          raise errors.OpPrereqError("MAC address %s already in use"
8827
                                     " in cluster" % mac,
8828
                                     errors.ECODE_NOTUNIQUE)
8829

    
8830
      #  Build nic parameters
8831
      link = nic.get(constants.INIC_LINK, None)
8832
      if link == constants.VALUE_AUTO:
8833
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8834
      nicparams = {}
8835
      if nic_mode_req:
8836
        nicparams[constants.NIC_MODE] = nic_mode
8837
      if link:
8838
        nicparams[constants.NIC_LINK] = link
8839

    
8840
      check_params = cluster.SimpleFillNIC(nicparams)
8841
      objects.NIC.CheckParameterSyntax(check_params)
8842
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8843

    
8844
    # disk checks/pre-build
8845
    default_vg = self.cfg.GetVGName()
8846
    self.disks = []
8847
    for disk in self.op.disks:
8848
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8849
      if mode not in constants.DISK_ACCESS_SET:
8850
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8851
                                   mode, errors.ECODE_INVAL)
8852
      size = disk.get(constants.IDISK_SIZE, None)
8853
      if size is None:
8854
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8855
      try:
8856
        size = int(size)
8857
      except (TypeError, ValueError):
8858
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8859
                                   errors.ECODE_INVAL)
8860

    
8861
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8862
      new_disk = {
8863
        constants.IDISK_SIZE: size,
8864
        constants.IDISK_MODE: mode,
8865
        constants.IDISK_VG: data_vg,
8866
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8867
        }
8868
      if constants.IDISK_ADOPT in disk:
8869
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8870
      self.disks.append(new_disk)
8871

    
8872
    if self.op.mode == constants.INSTANCE_IMPORT:
8873
      disk_images = []
8874
      for idx in range(len(self.disks)):
8875
        option = "disk%d_dump" % idx
8876
        if export_info.has_option(constants.INISECT_INS, option):
8877
          # FIXME: are the old os-es, disk sizes, etc. useful?
8878
          export_name = export_info.get(constants.INISECT_INS, option)
8879
          image = utils.PathJoin(self.op.src_path, export_name)
8880
          disk_images.append(image)
8881
        else:
8882
          disk_images.append(False)
8883

    
8884
      self.src_images = disk_images
8885

    
8886
      old_name = export_info.get(constants.INISECT_INS, "name")
8887
      if self.op.instance_name == old_name:
8888
        for idx, nic in enumerate(self.nics):
8889
          if nic.mac == constants.VALUE_AUTO:
8890
            nic_mac_ini = "nic%d_mac" % idx
8891
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8892

    
8893
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8894

    
8895
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8896
    if self.op.ip_check:
8897
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8898
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8899
                                   (self.check_ip, self.op.instance_name),
8900
                                   errors.ECODE_NOTUNIQUE)
8901

    
8902
    #### mac address generation
8903
    # By generating here the mac address both the allocator and the hooks get
8904
    # the real final mac address rather than the 'auto' or 'generate' value.
8905
    # There is a race condition between the generation and the instance object
8906
    # creation, which means that we know the mac is valid now, but we're not
8907
    # sure it will be when we actually add the instance. If things go bad
8908
    # adding the instance will abort because of a duplicate mac, and the
8909
    # creation job will fail.
8910
    for nic in self.nics:
8911
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8912
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8913

    
8914
    #### allocator run
8915

    
8916
    if self.op.iallocator is not None:
8917
      self._RunAllocator()
8918

    
8919
    #### node related checks
8920

    
8921
    # check primary node
8922
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8923
    assert self.pnode is not None, \
8924
      "Cannot retrieve locked node %s" % self.op.pnode
8925
    if pnode.offline:
8926
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8927
                                 pnode.name, errors.ECODE_STATE)
8928
    if pnode.drained:
8929
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8930
                                 pnode.name, errors.ECODE_STATE)
8931
    if not pnode.vm_capable:
8932
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8933
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8934

    
8935
    self.secondaries = []
8936

    
8937
    # mirror node verification
8938
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8939
      if self.op.snode == pnode.name:
8940
        raise errors.OpPrereqError("The secondary node cannot be the"
8941
                                   " primary node", errors.ECODE_INVAL)
8942
      _CheckNodeOnline(self, self.op.snode)
8943
      _CheckNodeNotDrained(self, self.op.snode)
8944
      _CheckNodeVmCapable(self, self.op.snode)
8945
      self.secondaries.append(self.op.snode)
8946

    
8947
    nodenames = [pnode.name] + self.secondaries
8948

    
8949
    if not self.adopt_disks:
8950
      # Check lv size requirements, if not adopting
8951
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8952
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8953

    
8954
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8955
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8956
                                disk[constants.IDISK_ADOPT])
8957
                     for disk in self.disks])
8958
      if len(all_lvs) != len(self.disks):
8959
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8960
                                   errors.ECODE_INVAL)
8961
      for lv_name in all_lvs:
8962
        try:
8963
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8964
          # to ReserveLV uses the same syntax
8965
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8966
        except errors.ReservationError:
8967
          raise errors.OpPrereqError("LV named %s used by another instance" %
8968
                                     lv_name, errors.ECODE_NOTUNIQUE)
8969

    
8970
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8971
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8972

    
8973
      node_lvs = self.rpc.call_lv_list([pnode.name],
8974
                                       vg_names.payload.keys())[pnode.name]
8975
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8976
      node_lvs = node_lvs.payload
8977

    
8978
      delta = all_lvs.difference(node_lvs.keys())
8979
      if delta:
8980
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8981
                                   utils.CommaJoin(delta),
8982
                                   errors.ECODE_INVAL)
8983
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8984
      if online_lvs:
8985
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8986
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8987
                                   errors.ECODE_STATE)
8988
      # update the size of disk based on what is found
8989
      for dsk in self.disks:
8990
        dsk[constants.IDISK_SIZE] = \
8991
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8992
                                        dsk[constants.IDISK_ADOPT])][0]))
8993

    
8994
    elif self.op.disk_template == constants.DT_BLOCK:
8995
      # Normalize and de-duplicate device paths
8996
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8997
                       for disk in self.disks])
8998
      if len(all_disks) != len(self.disks):
8999
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9000
                                   errors.ECODE_INVAL)
9001
      baddisks = [d for d in all_disks
9002
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9003
      if baddisks:
9004
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9005
                                   " cannot be adopted" %
9006
                                   (", ".join(baddisks),
9007
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9008
                                   errors.ECODE_INVAL)
9009

    
9010
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9011
                                            list(all_disks))[pnode.name]
9012
      node_disks.Raise("Cannot get block device information from node %s" %
9013
                       pnode.name)
9014
      node_disks = node_disks.payload
9015
      delta = all_disks.difference(node_disks.keys())
9016
      if delta:
9017
        raise errors.OpPrereqError("Missing block device(s): %s" %
9018
                                   utils.CommaJoin(delta),
9019
                                   errors.ECODE_INVAL)
9020
      for dsk in self.disks:
9021
        dsk[constants.IDISK_SIZE] = \
9022
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9023

    
9024
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9025

    
9026
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9027
    # check OS parameters (remotely)
9028
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9029

    
9030
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9031

    
9032
    # memory check on primary node
9033
    if self.op.start:
9034
      _CheckNodeFreeMemory(self, self.pnode.name,
9035
                           "creating instance %s" % self.op.instance_name,
9036
                           self.be_full[constants.BE_MEMORY],
9037
                           self.op.hypervisor)
9038

    
9039
    self.dry_run_result = list(nodenames)
9040

    
9041
  def Exec(self, feedback_fn):
9042
    """Create and add the instance to the cluster.
9043

9044
    """
9045
    instance = self.op.instance_name
9046
    pnode_name = self.pnode.name
9047

    
9048
    ht_kind = self.op.hypervisor
9049
    if ht_kind in constants.HTS_REQ_PORT:
9050
      network_port = self.cfg.AllocatePort()
9051
    else:
9052
      network_port = None
9053

    
9054
    disks = _GenerateDiskTemplate(self,
9055
                                  self.op.disk_template,
9056
                                  instance, pnode_name,
9057
                                  self.secondaries,
9058
                                  self.disks,
9059
                                  self.instance_file_storage_dir,
9060
                                  self.op.file_driver,
9061
                                  0,
9062
                                  feedback_fn)
9063

    
9064
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9065
                            primary_node=pnode_name,
9066
                            nics=self.nics, disks=disks,
9067
                            disk_template=self.op.disk_template,
9068
                            admin_up=False,
9069
                            network_port=network_port,
9070
                            beparams=self.op.beparams,
9071
                            hvparams=self.op.hvparams,
9072
                            hypervisor=self.op.hypervisor,
9073
                            osparams=self.op.osparams,
9074
                            )
9075

    
9076
    if self.op.tags:
9077
      for tag in self.op.tags:
9078
        iobj.AddTag(tag)
9079

    
9080
    if self.adopt_disks:
9081
      if self.op.disk_template == constants.DT_PLAIN:
9082
        # rename LVs to the newly-generated names; we need to construct
9083
        # 'fake' LV disks with the old data, plus the new unique_id
9084
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9085
        rename_to = []
9086
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9087
          rename_to.append(t_dsk.logical_id)
9088
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9089
          self.cfg.SetDiskID(t_dsk, pnode_name)
9090
        result = self.rpc.call_blockdev_rename(pnode_name,
9091
                                               zip(tmp_disks, rename_to))
9092
        result.Raise("Failed to rename adoped LVs")
9093
    else:
9094
      feedback_fn("* creating instance disks...")
9095
      try:
9096
        _CreateDisks(self, iobj)
9097
      except errors.OpExecError:
9098
        self.LogWarning("Device creation failed, reverting...")
9099
        try:
9100
          _RemoveDisks(self, iobj)
9101
        finally:
9102
          self.cfg.ReleaseDRBDMinors(instance)
9103
          raise
9104

    
9105
    feedback_fn("adding instance %s to cluster config" % instance)
9106

    
9107
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9108

    
9109
    # Declare that we don't want to remove the instance lock anymore, as we've
9110
    # added the instance to the config
9111
    del self.remove_locks[locking.LEVEL_INSTANCE]
9112

    
9113
    if self.op.mode == constants.INSTANCE_IMPORT:
9114
      # Release unused nodes
9115
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9116
    else:
9117
      # Release all nodes
9118
      _ReleaseLocks(self, locking.LEVEL_NODE)
9119

    
9120
    disk_abort = False
9121
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9122
      feedback_fn("* wiping instance disks...")
9123
      try:
9124
        _WipeDisks(self, iobj)
9125
      except errors.OpExecError, err:
9126
        logging.exception("Wiping disks failed")
9127
        self.LogWarning("Wiping instance disks failed (%s)", err)
9128
        disk_abort = True
9129

    
9130
    if disk_abort:
9131
      # Something is already wrong with the disks, don't do anything else
9132
      pass
9133
    elif self.op.wait_for_sync:
9134
      disk_abort = not _WaitForSync(self, iobj)
9135
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9136
      # make sure the disks are not degraded (still sync-ing is ok)
9137
      feedback_fn("* checking mirrors status")
9138
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9139
    else:
9140
      disk_abort = False
9141

    
9142
    if disk_abort:
9143
      _RemoveDisks(self, iobj)
9144
      self.cfg.RemoveInstance(iobj.name)
9145
      # Make sure the instance lock gets removed
9146
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9147
      raise errors.OpExecError("There are some degraded disks for"
9148
                               " this instance")
9149

    
9150
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9151
      if self.op.mode == constants.INSTANCE_CREATE:
9152
        if not self.op.no_install:
9153
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9154
                        not self.op.wait_for_sync)
9155
          if pause_sync:
9156
            feedback_fn("* pausing disk sync to install instance OS")
9157
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9158
                                                              iobj.disks, True)
9159
            for idx, success in enumerate(result.payload):
9160
              if not success:
9161
                logging.warn("pause-sync of instance %s for disk %d failed",
9162
                             instance, idx)
9163

    
9164
          feedback_fn("* running the instance OS create scripts...")
9165
          # FIXME: pass debug option from opcode to backend
9166
          os_add_result = \
9167
            self.rpc.call_instance_os_add(pnode_name, iobj, False,
9168
                                          self.op.debug_level)
9169
          if pause_sync:
9170
            feedback_fn("* resuming disk sync")
9171
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9172
                                                              iobj.disks, False)
9173
            for idx, success in enumerate(result.payload):
9174
              if not success:
9175
                logging.warn("resume-sync of instance %s for disk %d failed",
9176
                             instance, idx)
9177

    
9178
          os_add_result.Raise("Could not add os for instance %s"
9179
                              " on node %s" % (instance, pnode_name))
9180

    
9181
      elif self.op.mode == constants.INSTANCE_IMPORT:
9182
        feedback_fn("* running the instance OS import scripts...")
9183

    
9184
        transfers = []
9185

    
9186
        for idx, image in enumerate(self.src_images):
9187
          if not image:
9188
            continue
9189

    
9190
          # FIXME: pass debug option from opcode to backend
9191
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9192
                                             constants.IEIO_FILE, (image, ),
9193
                                             constants.IEIO_SCRIPT,
9194
                                             (iobj.disks[idx], idx),
9195
                                             None)
9196
          transfers.append(dt)
9197

    
9198
        import_result = \
9199
          masterd.instance.TransferInstanceData(self, feedback_fn,
9200
                                                self.op.src_node, pnode_name,
9201
                                                self.pnode.secondary_ip,
9202
                                                iobj, transfers)
9203
        if not compat.all(import_result):
9204
          self.LogWarning("Some disks for instance %s on node %s were not"
9205
                          " imported successfully" % (instance, pnode_name))
9206

    
9207
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9208
        feedback_fn("* preparing remote import...")
9209
        # The source cluster will stop the instance before attempting to make a
9210
        # connection. In some cases stopping an instance can take a long time,
9211
        # hence the shutdown timeout is added to the connection timeout.
9212
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9213
                           self.op.source_shutdown_timeout)
9214
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9215

    
9216
        assert iobj.primary_node == self.pnode.name
9217
        disk_results = \
9218
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9219
                                        self.source_x509_ca,
9220
                                        self._cds, timeouts)
9221
        if not compat.all(disk_results):
9222
          # TODO: Should the instance still be started, even if some disks
9223
          # failed to import (valid for local imports, too)?
9224
          self.LogWarning("Some disks for instance %s on node %s were not"
9225
                          " imported successfully" % (instance, pnode_name))
9226

    
9227
        # Run rename script on newly imported instance
9228
        assert iobj.name == instance
9229
        feedback_fn("Running rename script for %s" % instance)
9230
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9231
                                                   self.source_instance_name,
9232
                                                   self.op.debug_level)
9233
        if result.fail_msg:
9234
          self.LogWarning("Failed to run rename script for %s on node"
9235
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9236

    
9237
      else:
9238
        # also checked in the prereq part
9239
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9240
                                     % self.op.mode)
9241

    
9242
    if self.op.start:
9243
      iobj.admin_up = True
9244
      self.cfg.Update(iobj, feedback_fn)
9245
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9246
      feedback_fn("* starting instance...")
9247
      result = self.rpc.call_instance_start(pnode_name, iobj,
9248
                                            None, None, False)
9249
      result.Raise("Could not start instance")
9250

    
9251
    return list(iobj.all_nodes)
9252

    
9253

    
9254
class LUInstanceConsole(NoHooksLU):
9255
  """Connect to an instance's console.
9256

9257
  This is somewhat special in that it returns the command line that
9258
  you need to run on the master node in order to connect to the
9259
  console.
9260

9261
  """
9262
  REQ_BGL = False
9263

    
9264
  def ExpandNames(self):
9265
    self._ExpandAndLockInstance()
9266

    
9267
  def CheckPrereq(self):
9268
    """Check prerequisites.
9269

9270
    This checks that the instance is in the cluster.
9271

9272
    """
9273
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9274
    assert self.instance is not None, \
9275
      "Cannot retrieve locked instance %s" % self.op.instance_name
9276
    _CheckNodeOnline(self, self.instance.primary_node)
9277

    
9278
  def Exec(self, feedback_fn):
9279
    """Connect to the console of an instance
9280

9281
    """
9282
    instance = self.instance
9283
    node = instance.primary_node
9284

    
9285
    node_insts = self.rpc.call_instance_list([node],
9286
                                             [instance.hypervisor])[node]
9287
    node_insts.Raise("Can't get node information from %s" % node)
9288

    
9289
    if instance.name not in node_insts.payload:
9290
      if instance.admin_up:
9291
        state = constants.INSTST_ERRORDOWN
9292
      else:
9293
        state = constants.INSTST_ADMINDOWN
9294
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9295
                               (instance.name, state))
9296

    
9297
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9298

    
9299
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9300

    
9301

    
9302
def _GetInstanceConsole(cluster, instance):
9303
  """Returns console information for an instance.
9304

9305
  @type cluster: L{objects.Cluster}
9306
  @type instance: L{objects.Instance}
9307
  @rtype: dict
9308

9309
  """
9310
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9311
  # beparams and hvparams are passed separately, to avoid editing the
9312
  # instance and then saving the defaults in the instance itself.
9313
  hvparams = cluster.FillHV(instance)
9314
  beparams = cluster.FillBE(instance)
9315
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9316

    
9317
  assert console.instance == instance.name
9318
  assert console.Validate()
9319

    
9320
  return console.ToDict()
9321

    
9322

    
9323
class LUInstanceReplaceDisks(LogicalUnit):
9324
  """Replace the disks of an instance.
9325

9326
  """
9327
  HPATH = "mirrors-replace"
9328
  HTYPE = constants.HTYPE_INSTANCE
9329
  REQ_BGL = False
9330

    
9331
  def CheckArguments(self):
9332
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9333
                                  self.op.iallocator)
9334

    
9335
  def ExpandNames(self):
9336
    self._ExpandAndLockInstance()
9337

    
9338
    assert locking.LEVEL_NODE not in self.needed_locks
9339
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9340

    
9341
    assert self.op.iallocator is None or self.op.remote_node is None, \
9342
      "Conflicting options"
9343

    
9344
    if self.op.remote_node is not None:
9345
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9346

    
9347
      # Warning: do not remove the locking of the new secondary here
9348
      # unless DRBD8.AddChildren is changed to work in parallel;
9349
      # currently it doesn't since parallel invocations of
9350
      # FindUnusedMinor will conflict
9351
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9352
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9353
    else:
9354
      self.needed_locks[locking.LEVEL_NODE] = []
9355
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9356

    
9357
      if self.op.iallocator is not None:
9358
        # iallocator will select a new node in the same group
9359
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9360

    
9361
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9362
                                   self.op.iallocator, self.op.remote_node,
9363
                                   self.op.disks, False, self.op.early_release)
9364

    
9365
    self.tasklets = [self.replacer]
9366

    
9367
  def DeclareLocks(self, level):
9368
    if level == locking.LEVEL_NODEGROUP:
9369
      assert self.op.remote_node is None
9370
      assert self.op.iallocator is not None
9371
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9372

    
9373
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9374
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9375
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9376

    
9377
    elif level == locking.LEVEL_NODE:
9378
      if self.op.iallocator is not None:
9379
        assert self.op.remote_node is None
9380
        assert not self.needed_locks[locking.LEVEL_NODE]
9381

    
9382
        # Lock member nodes of all locked groups
9383
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9384
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9385
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9386
      else:
9387
        self._LockInstancesNodes()
9388

    
9389
  def BuildHooksEnv(self):
9390
    """Build hooks env.
9391

9392
    This runs on the master, the primary and all the secondaries.
9393

9394
    """
9395
    instance = self.replacer.instance
9396
    env = {
9397
      "MODE": self.op.mode,
9398
      "NEW_SECONDARY": self.op.remote_node,
9399
      "OLD_SECONDARY": instance.secondary_nodes[0],
9400
      }
9401
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9402
    return env
9403

    
9404
  def BuildHooksNodes(self):
9405
    """Build hooks nodes.
9406

9407
    """
9408
    instance = self.replacer.instance
9409
    nl = [
9410
      self.cfg.GetMasterNode(),
9411
      instance.primary_node,
9412
      ]
9413
    if self.op.remote_node is not None:
9414
      nl.append(self.op.remote_node)
9415
    return nl, nl
9416

    
9417
  def CheckPrereq(self):
9418
    """Check prerequisites.
9419

9420
    """
9421
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9422
            self.op.iallocator is None)
9423

    
9424
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9425
    if owned_groups:
9426
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9427

    
9428
    return LogicalUnit.CheckPrereq(self)
9429

    
9430

    
9431
class TLReplaceDisks(Tasklet):
9432
  """Replaces disks for an instance.
9433

9434
  Note: Locking is not within the scope of this class.
9435

9436
  """
9437
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9438
               disks, delay_iallocator, early_release):
9439
    """Initializes this class.
9440

9441
    """
9442
    Tasklet.__init__(self, lu)
9443

    
9444
    # Parameters
9445
    self.instance_name = instance_name
9446
    self.mode = mode
9447
    self.iallocator_name = iallocator_name
9448
    self.remote_node = remote_node
9449
    self.disks = disks
9450
    self.delay_iallocator = delay_iallocator
9451
    self.early_release = early_release
9452

    
9453
    # Runtime data
9454
    self.instance = None
9455
    self.new_node = None
9456
    self.target_node = None
9457
    self.other_node = None
9458
    self.remote_node_info = None
9459
    self.node_secondary_ip = None
9460

    
9461
  @staticmethod
9462
  def CheckArguments(mode, remote_node, iallocator):
9463
    """Helper function for users of this class.
9464

9465
    """
9466
    # check for valid parameter combination
9467
    if mode == constants.REPLACE_DISK_CHG:
9468
      if remote_node is None and iallocator is None:
9469
        raise errors.OpPrereqError("When changing the secondary either an"
9470
                                   " iallocator script must be used or the"
9471
                                   " new node given", errors.ECODE_INVAL)
9472

    
9473
      if remote_node is not None and iallocator is not None:
9474
        raise errors.OpPrereqError("Give either the iallocator or the new"
9475
                                   " secondary, not both", errors.ECODE_INVAL)
9476

    
9477
    elif remote_node is not None or iallocator is not None:
9478
      # Not replacing the secondary
9479
      raise errors.OpPrereqError("The iallocator and new node options can"
9480
                                 " only be used when changing the"
9481
                                 " secondary node", errors.ECODE_INVAL)
9482

    
9483
  @staticmethod
9484
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9485
    """Compute a new secondary node using an IAllocator.
9486

9487
    """
9488
    ial = IAllocator(lu.cfg, lu.rpc,
9489
                     mode=constants.IALLOCATOR_MODE_RELOC,
9490
                     name=instance_name,
9491
                     relocate_from=list(relocate_from))
9492

    
9493
    ial.Run(iallocator_name)
9494

    
9495
    if not ial.success:
9496
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9497
                                 " %s" % (iallocator_name, ial.info),
9498
                                 errors.ECODE_NORES)
9499

    
9500
    if len(ial.result) != ial.required_nodes:
9501
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9502
                                 " of nodes (%s), required %s" %
9503
                                 (iallocator_name,
9504
                                  len(ial.result), ial.required_nodes),
9505
                                 errors.ECODE_FAULT)
9506

    
9507
    remote_node_name = ial.result[0]
9508

    
9509
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9510
               instance_name, remote_node_name)
9511

    
9512
    return remote_node_name
9513

    
9514
  def _FindFaultyDisks(self, node_name):
9515
    """Wrapper for L{_FindFaultyInstanceDisks}.
9516

9517
    """
9518
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9519
                                    node_name, True)
9520

    
9521
  def _CheckDisksActivated(self, instance):
9522
    """Checks if the instance disks are activated.
9523

9524
    @param instance: The instance to check disks
9525
    @return: True if they are activated, False otherwise
9526

9527
    """
9528
    nodes = instance.all_nodes
9529

    
9530
    for idx, dev in enumerate(instance.disks):
9531
      for node in nodes:
9532
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9533
        self.cfg.SetDiskID(dev, node)
9534

    
9535
        result = self.rpc.call_blockdev_find(node, dev)
9536

    
9537
        if result.offline:
9538
          continue
9539
        elif result.fail_msg or not result.payload:
9540
          return False
9541

    
9542
    return True
9543

    
9544
  def CheckPrereq(self):
9545
    """Check prerequisites.
9546

9547
    This checks that the instance is in the cluster.
9548

9549
    """
9550
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9551
    assert instance is not None, \
9552
      "Cannot retrieve locked instance %s" % self.instance_name
9553

    
9554
    if instance.disk_template != constants.DT_DRBD8:
9555
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9556
                                 " instances", errors.ECODE_INVAL)
9557

    
9558
    if len(instance.secondary_nodes) != 1:
9559
      raise errors.OpPrereqError("The instance has a strange layout,"
9560
                                 " expected one secondary but found %d" %
9561
                                 len(instance.secondary_nodes),
9562
                                 errors.ECODE_FAULT)
9563

    
9564
    if not self.delay_iallocator:
9565
      self._CheckPrereq2()
9566

    
9567
  def _CheckPrereq2(self):
9568
    """Check prerequisites, second part.
9569

9570
    This function should always be part of CheckPrereq. It was separated and is
9571
    now called from Exec because during node evacuation iallocator was only
9572
    called with an unmodified cluster model, not taking planned changes into
9573
    account.
9574

9575
    """
9576
    instance = self.instance
9577
    secondary_node = instance.secondary_nodes[0]
9578

    
9579
    if self.iallocator_name is None:
9580
      remote_node = self.remote_node
9581
    else:
9582
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9583
                                       instance.name, instance.secondary_nodes)
9584

    
9585
    if remote_node is None:
9586
      self.remote_node_info = None
9587
    else:
9588
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9589
             "Remote node '%s' is not locked" % remote_node
9590

    
9591
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9592
      assert self.remote_node_info is not None, \
9593
        "Cannot retrieve locked node %s" % remote_node
9594

    
9595
    if remote_node == self.instance.primary_node:
9596
      raise errors.OpPrereqError("The specified node is the primary node of"
9597
                                 " the instance", errors.ECODE_INVAL)
9598

    
9599
    if remote_node == secondary_node:
9600
      raise errors.OpPrereqError("The specified node is already the"
9601
                                 " secondary node of the instance",
9602
                                 errors.ECODE_INVAL)
9603

    
9604
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9605
                                    constants.REPLACE_DISK_CHG):
9606
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9607
                                 errors.ECODE_INVAL)
9608

    
9609
    if self.mode == constants.REPLACE_DISK_AUTO:
9610
      if not self._CheckDisksActivated(instance):
9611
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9612
                                   " first" % self.instance_name,
9613
                                   errors.ECODE_STATE)
9614
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9615
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9616

    
9617
      if faulty_primary and faulty_secondary:
9618
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9619
                                   " one node and can not be repaired"
9620
                                   " automatically" % self.instance_name,
9621
                                   errors.ECODE_STATE)
9622

    
9623
      if faulty_primary:
9624
        self.disks = faulty_primary
9625
        self.target_node = instance.primary_node
9626
        self.other_node = secondary_node
9627
        check_nodes = [self.target_node, self.other_node]
9628
      elif faulty_secondary:
9629
        self.disks = faulty_secondary
9630
        self.target_node = secondary_node
9631
        self.other_node = instance.primary_node
9632
        check_nodes = [self.target_node, self.other_node]
9633
      else:
9634
        self.disks = []
9635
        check_nodes = []
9636

    
9637
    else:
9638
      # Non-automatic modes
9639
      if self.mode == constants.REPLACE_DISK_PRI:
9640
        self.target_node = instance.primary_node
9641
        self.other_node = secondary_node
9642
        check_nodes = [self.target_node, self.other_node]
9643

    
9644
      elif self.mode == constants.REPLACE_DISK_SEC:
9645
        self.target_node = secondary_node
9646
        self.other_node = instance.primary_node
9647
        check_nodes = [self.target_node, self.other_node]
9648

    
9649
      elif self.mode == constants.REPLACE_DISK_CHG:
9650
        self.new_node = remote_node
9651
        self.other_node = instance.primary_node
9652
        self.target_node = secondary_node
9653
        check_nodes = [self.new_node, self.other_node]
9654

    
9655
        _CheckNodeNotDrained(self.lu, remote_node)
9656
        _CheckNodeVmCapable(self.lu, remote_node)
9657

    
9658
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9659
        assert old_node_info is not None
9660
        if old_node_info.offline and not self.early_release:
9661
          # doesn't make sense to delay the release
9662
          self.early_release = True
9663
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9664
                          " early-release mode", secondary_node)
9665

    
9666
      else:
9667
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9668
                                     self.mode)
9669

    
9670
      # If not specified all disks should be replaced
9671
      if not self.disks:
9672
        self.disks = range(len(self.instance.disks))
9673

    
9674
    for node in check_nodes:
9675
      _CheckNodeOnline(self.lu, node)
9676

    
9677
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9678
                                                          self.other_node,
9679
                                                          self.target_node]
9680
                              if node_name is not None)
9681

    
9682
    # Release unneeded node locks
9683
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9684

    
9685
    # Release any owned node group
9686
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9687
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9688

    
9689
    # Check whether disks are valid
9690
    for disk_idx in self.disks:
9691
      instance.FindDisk(disk_idx)
9692

    
9693
    # Get secondary node IP addresses
9694
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9695
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9696

    
9697
  def Exec(self, feedback_fn):
9698
    """Execute disk replacement.
9699

9700
    This dispatches the disk replacement to the appropriate handler.
9701

9702
    """
9703
    if self.delay_iallocator:
9704
      self._CheckPrereq2()
9705

    
9706
    if __debug__:
9707
      # Verify owned locks before starting operation
9708
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9709
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9710
          ("Incorrect node locks, owning %s, expected %s" %
9711
           (owned_nodes, self.node_secondary_ip.keys()))
9712

    
9713
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9714
      assert list(owned_instances) == [self.instance_name], \
9715
          "Instance '%s' not locked" % self.instance_name
9716

    
9717
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9718
          "Should not own any node group lock at this point"
9719

    
9720
    if not self.disks:
9721
      feedback_fn("No disks need replacement")
9722
      return
9723

    
9724
    feedback_fn("Replacing disk(s) %s for %s" %
9725
                (utils.CommaJoin(self.disks), self.instance.name))
9726

    
9727
    activate_disks = (not self.instance.admin_up)
9728

    
9729
    # Activate the instance disks if we're replacing them on a down instance
9730
    if activate_disks:
9731
      _StartInstanceDisks(self.lu, self.instance, True)
9732

    
9733
    try:
9734
      # Should we replace the secondary node?
9735
      if self.new_node is not None:
9736
        fn = self._ExecDrbd8Secondary
9737
      else:
9738
        fn = self._ExecDrbd8DiskOnly
9739

    
9740
      result = fn(feedback_fn)
9741
    finally:
9742
      # Deactivate the instance disks if we're replacing them on a
9743
      # down instance
9744
      if activate_disks:
9745
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9746

    
9747
    if __debug__:
9748
      # Verify owned locks
9749
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9750
      nodes = frozenset(self.node_secondary_ip)
9751
      assert ((self.early_release and not owned_nodes) or
9752
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9753
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9754
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9755

    
9756
    return result
9757

    
9758
  def _CheckVolumeGroup(self, nodes):
9759
    self.lu.LogInfo("Checking volume groups")
9760

    
9761
    vgname = self.cfg.GetVGName()
9762

    
9763
    # Make sure volume group exists on all involved nodes
9764
    results = self.rpc.call_vg_list(nodes)
9765
    if not results:
9766
      raise errors.OpExecError("Can't list volume groups on the nodes")
9767

    
9768
    for node in nodes:
9769
      res = results[node]
9770
      res.Raise("Error checking node %s" % node)
9771
      if vgname not in res.payload:
9772
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9773
                                 (vgname, node))
9774

    
9775
  def _CheckDisksExistence(self, nodes):
9776
    # Check disk existence
9777
    for idx, dev in enumerate(self.instance.disks):
9778
      if idx not in self.disks:
9779
        continue
9780

    
9781
      for node in nodes:
9782
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9783
        self.cfg.SetDiskID(dev, node)
9784

    
9785
        result = self.rpc.call_blockdev_find(node, dev)
9786

    
9787
        msg = result.fail_msg
9788
        if msg or not result.payload:
9789
          if not msg:
9790
            msg = "disk not found"
9791
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9792
                                   (idx, node, msg))
9793

    
9794
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9795
    for idx, dev in enumerate(self.instance.disks):
9796
      if idx not in self.disks:
9797
        continue
9798

    
9799
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9800
                      (idx, node_name))
9801

    
9802
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9803
                                   ldisk=ldisk):
9804
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9805
                                 " replace disks for instance %s" %
9806
                                 (node_name, self.instance.name))
9807

    
9808
  def _CreateNewStorage(self, node_name):
9809
    """Create new storage on the primary or secondary node.
9810

9811
    This is only used for same-node replaces, not for changing the
9812
    secondary node, hence we don't want to modify the existing disk.
9813

9814
    """
9815
    iv_names = {}
9816

    
9817
    for idx, dev in enumerate(self.instance.disks):
9818
      if idx not in self.disks:
9819
        continue
9820

    
9821
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9822

    
9823
      self.cfg.SetDiskID(dev, node_name)
9824

    
9825
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9826
      names = _GenerateUniqueNames(self.lu, lv_names)
9827

    
9828
      vg_data = dev.children[0].logical_id[0]
9829
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9830
                             logical_id=(vg_data, names[0]))
9831
      vg_meta = dev.children[1].logical_id[0]
9832
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9833
                             logical_id=(vg_meta, names[1]))
9834

    
9835
      new_lvs = [lv_data, lv_meta]
9836
      old_lvs = [child.Copy() for child in dev.children]
9837
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9838

    
9839
      # we pass force_create=True to force the LVM creation
9840
      for new_lv in new_lvs:
9841
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9842
                        _GetInstanceInfoText(self.instance), False)
9843

    
9844
    return iv_names
9845

    
9846
  def _CheckDevices(self, node_name, iv_names):
9847
    for name, (dev, _, _) in iv_names.iteritems():
9848
      self.cfg.SetDiskID(dev, node_name)
9849

    
9850
      result = self.rpc.call_blockdev_find(node_name, dev)
9851

    
9852
      msg = result.fail_msg
9853
      if msg or not result.payload:
9854
        if not msg:
9855
          msg = "disk not found"
9856
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9857
                                 (name, msg))
9858

    
9859
      if result.payload.is_degraded:
9860
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9861

    
9862
  def _RemoveOldStorage(self, node_name, iv_names):
9863
    for name, (_, old_lvs, _) in iv_names.iteritems():
9864
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9865

    
9866
      for lv in old_lvs:
9867
        self.cfg.SetDiskID(lv, node_name)
9868

    
9869
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9870
        if msg:
9871
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9872
                             hint="remove unused LVs manually")
9873

    
9874
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
9875
    """Replace a disk on the primary or secondary for DRBD 8.
9876

9877
    The algorithm for replace is quite complicated:
9878

9879
      1. for each disk to be replaced:
9880

9881
        1. create new LVs on the target node with unique names
9882
        1. detach old LVs from the drbd device
9883
        1. rename old LVs to name_replaced.<time_t>
9884
        1. rename new LVs to old LVs
9885
        1. attach the new LVs (with the old names now) to the drbd device
9886

9887
      1. wait for sync across all devices
9888

9889
      1. for each modified disk:
9890

9891
        1. remove old LVs (which have the name name_replaces.<time_t>)
9892

9893
    Failures are not very well handled.
9894

9895
    """
9896
    steps_total = 6
9897

    
9898
    # Step: check device activation
9899
    self.lu.LogStep(1, steps_total, "Check device existence")
9900
    self._CheckDisksExistence([self.other_node, self.target_node])
9901
    self._CheckVolumeGroup([self.target_node, self.other_node])
9902

    
9903
    # Step: check other node consistency
9904
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9905
    self._CheckDisksConsistency(self.other_node,
9906
                                self.other_node == self.instance.primary_node,
9907
                                False)
9908

    
9909
    # Step: create new storage
9910
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9911
    iv_names = self._CreateNewStorage(self.target_node)
9912

    
9913
    # Step: for each lv, detach+rename*2+attach
9914
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9915
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9916
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9917

    
9918
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9919
                                                     old_lvs)
9920
      result.Raise("Can't detach drbd from local storage on node"
9921
                   " %s for device %s" % (self.target_node, dev.iv_name))
9922
      #dev.children = []
9923
      #cfg.Update(instance)
9924

    
9925
      # ok, we created the new LVs, so now we know we have the needed
9926
      # storage; as such, we proceed on the target node to rename
9927
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9928
      # using the assumption that logical_id == physical_id (which in
9929
      # turn is the unique_id on that node)
9930

    
9931
      # FIXME(iustin): use a better name for the replaced LVs
9932
      temp_suffix = int(time.time())
9933
      ren_fn = lambda d, suff: (d.physical_id[0],
9934
                                d.physical_id[1] + "_replaced-%s" % suff)
9935

    
9936
      # Build the rename list based on what LVs exist on the node
9937
      rename_old_to_new = []
9938
      for to_ren in old_lvs:
9939
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9940
        if not result.fail_msg and result.payload:
9941
          # device exists
9942
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9943

    
9944
      self.lu.LogInfo("Renaming the old LVs on the target node")
9945
      result = self.rpc.call_blockdev_rename(self.target_node,
9946
                                             rename_old_to_new)
9947
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9948

    
9949
      # Now we rename the new LVs to the old LVs
9950
      self.lu.LogInfo("Renaming the new LVs on the target node")
9951
      rename_new_to_old = [(new, old.physical_id)
9952
                           for old, new in zip(old_lvs, new_lvs)]
9953
      result = self.rpc.call_blockdev_rename(self.target_node,
9954
                                             rename_new_to_old)
9955
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9956

    
9957
      # Intermediate steps of in memory modifications
9958
      for old, new in zip(old_lvs, new_lvs):
9959
        new.logical_id = old.logical_id
9960
        self.cfg.SetDiskID(new, self.target_node)
9961

    
9962
      # We need to modify old_lvs so that removal later removes the
9963
      # right LVs, not the newly added ones; note that old_lvs is a
9964
      # copy here
9965
      for disk in old_lvs:
9966
        disk.logical_id = ren_fn(disk, temp_suffix)
9967
        self.cfg.SetDiskID(disk, self.target_node)
9968

    
9969
      # Now that the new lvs have the old name, we can add them to the device
9970
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9971
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9972
                                                  new_lvs)
9973
      msg = result.fail_msg
9974
      if msg:
9975
        for new_lv in new_lvs:
9976
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9977
                                               new_lv).fail_msg
9978
          if msg2:
9979
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9980
                               hint=("cleanup manually the unused logical"
9981
                                     "volumes"))
9982
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9983

    
9984
    cstep = 5
9985
    if self.early_release:
9986
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9987
      cstep += 1
9988
      self._RemoveOldStorage(self.target_node, iv_names)
9989
      # WARNING: we release both node locks here, do not do other RPCs
9990
      # than WaitForSync to the primary node
9991
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9992
                    names=[self.target_node, self.other_node])
9993

    
9994
    # Wait for sync
9995
    # This can fail as the old devices are degraded and _WaitForSync
9996
    # does a combined result over all disks, so we don't check its return value
9997
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9998
    cstep += 1
9999
    _WaitForSync(self.lu, self.instance)
10000

    
10001
    # Check all devices manually
10002
    self._CheckDevices(self.instance.primary_node, iv_names)
10003

    
10004
    # Step: remove old storage
10005
    if not self.early_release:
10006
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10007
      cstep += 1
10008
      self._RemoveOldStorage(self.target_node, iv_names)
10009

    
10010
  def _ExecDrbd8Secondary(self, feedback_fn):
10011
    """Replace the secondary node for DRBD 8.
10012

10013
    The algorithm for replace is quite complicated:
10014
      - for all disks of the instance:
10015
        - create new LVs on the new node with same names
10016
        - shutdown the drbd device on the old secondary
10017
        - disconnect the drbd network on the primary
10018
        - create the drbd device on the new secondary
10019
        - network attach the drbd on the primary, using an artifice:
10020
          the drbd code for Attach() will connect to the network if it
10021
          finds a device which is connected to the good local disks but
10022
          not network enabled
10023
      - wait for sync across all devices
10024
      - remove all disks from the old secondary
10025

10026
    Failures are not very well handled.
10027

10028
    """
10029
    steps_total = 6
10030

    
10031
    pnode = self.instance.primary_node
10032

    
10033
    # Step: check device activation
10034
    self.lu.LogStep(1, steps_total, "Check device existence")
10035
    self._CheckDisksExistence([self.instance.primary_node])
10036
    self._CheckVolumeGroup([self.instance.primary_node])
10037

    
10038
    # Step: check other node consistency
10039
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10040
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10041

    
10042
    # Step: create new storage
10043
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10044
    for idx, dev in enumerate(self.instance.disks):
10045
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10046
                      (self.new_node, idx))
10047
      # we pass force_create=True to force LVM creation
10048
      for new_lv in dev.children:
10049
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10050
                        _GetInstanceInfoText(self.instance), False)
10051

    
10052
    # Step 4: dbrd minors and drbd setups changes
10053
    # after this, we must manually remove the drbd minors on both the
10054
    # error and the success paths
10055
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10056
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10057
                                         for dev in self.instance.disks],
10058
                                        self.instance.name)
10059
    logging.debug("Allocated minors %r", minors)
10060

    
10061
    iv_names = {}
10062
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10063
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10064
                      (self.new_node, idx))
10065
      # create new devices on new_node; note that we create two IDs:
10066
      # one without port, so the drbd will be activated without
10067
      # networking information on the new node at this stage, and one
10068
      # with network, for the latter activation in step 4
10069
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10070
      if self.instance.primary_node == o_node1:
10071
        p_minor = o_minor1
10072
      else:
10073
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10074
        p_minor = o_minor2
10075

    
10076
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10077
                      p_minor, new_minor, o_secret)
10078
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10079
                    p_minor, new_minor, o_secret)
10080

    
10081
      iv_names[idx] = (dev, dev.children, new_net_id)
10082
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10083
                    new_net_id)
10084
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10085
                              logical_id=new_alone_id,
10086
                              children=dev.children,
10087
                              size=dev.size)
10088
      try:
10089
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10090
                              _GetInstanceInfoText(self.instance), False)
10091
      except errors.GenericError:
10092
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10093
        raise
10094

    
10095
    # We have new devices, shutdown the drbd on the old secondary
10096
    for idx, dev in enumerate(self.instance.disks):
10097
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10098
      self.cfg.SetDiskID(dev, self.target_node)
10099
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10100
      if msg:
10101
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10102
                           "node: %s" % (idx, msg),
10103
                           hint=("Please cleanup this device manually as"
10104
                                 " soon as possible"))
10105

    
10106
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10107
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10108
                                               self.instance.disks)[pnode]
10109

    
10110
    msg = result.fail_msg
10111
    if msg:
10112
      # detaches didn't succeed (unlikely)
10113
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10114
      raise errors.OpExecError("Can't detach the disks from the network on"
10115
                               " old node: %s" % (msg,))
10116

    
10117
    # if we managed to detach at least one, we update all the disks of
10118
    # the instance to point to the new secondary
10119
    self.lu.LogInfo("Updating instance configuration")
10120
    for dev, _, new_logical_id in iv_names.itervalues():
10121
      dev.logical_id = new_logical_id
10122
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10123

    
10124
    self.cfg.Update(self.instance, feedback_fn)
10125

    
10126
    # and now perform the drbd attach
10127
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10128
                    " (standalone => connected)")
10129
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10130
                                            self.new_node],
10131
                                           self.node_secondary_ip,
10132
                                           self.instance.disks,
10133
                                           self.instance.name,
10134
                                           False)
10135
    for to_node, to_result in result.items():
10136
      msg = to_result.fail_msg
10137
      if msg:
10138
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10139
                           to_node, msg,
10140
                           hint=("please do a gnt-instance info to see the"
10141
                                 " status of disks"))
10142
    cstep = 5
10143
    if self.early_release:
10144
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10145
      cstep += 1
10146
      self._RemoveOldStorage(self.target_node, iv_names)
10147
      # WARNING: we release all node locks here, do not do other RPCs
10148
      # than WaitForSync to the primary node
10149
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10150
                    names=[self.instance.primary_node,
10151
                           self.target_node,
10152
                           self.new_node])
10153

    
10154
    # Wait for sync
10155
    # This can fail as the old devices are degraded and _WaitForSync
10156
    # does a combined result over all disks, so we don't check its return value
10157
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10158
    cstep += 1
10159
    _WaitForSync(self.lu, self.instance)
10160

    
10161
    # Check all devices manually
10162
    self._CheckDevices(self.instance.primary_node, iv_names)
10163

    
10164
    # Step: remove old storage
10165
    if not self.early_release:
10166
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10167
      self._RemoveOldStorage(self.target_node, iv_names)
10168

    
10169

    
10170
class LURepairNodeStorage(NoHooksLU):
10171
  """Repairs the volume group on a node.
10172

10173
  """
10174
  REQ_BGL = False
10175

    
10176
  def CheckArguments(self):
10177
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10178

    
10179
    storage_type = self.op.storage_type
10180

    
10181
    if (constants.SO_FIX_CONSISTENCY not in
10182
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10183
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10184
                                 " repaired" % storage_type,
10185
                                 errors.ECODE_INVAL)
10186

    
10187
  def ExpandNames(self):
10188
    self.needed_locks = {
10189
      locking.LEVEL_NODE: [self.op.node_name],
10190
      }
10191

    
10192
  def _CheckFaultyDisks(self, instance, node_name):
10193
    """Ensure faulty disks abort the opcode or at least warn."""
10194
    try:
10195
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10196
                                  node_name, True):
10197
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10198
                                   " node '%s'" % (instance.name, node_name),
10199
                                   errors.ECODE_STATE)
10200
    except errors.OpPrereqError, err:
10201
      if self.op.ignore_consistency:
10202
        self.proc.LogWarning(str(err.args[0]))
10203
      else:
10204
        raise
10205

    
10206
  def CheckPrereq(self):
10207
    """Check prerequisites.
10208

10209
    """
10210
    # Check whether any instance on this node has faulty disks
10211
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10212
      if not inst.admin_up:
10213
        continue
10214
      check_nodes = set(inst.all_nodes)
10215
      check_nodes.discard(self.op.node_name)
10216
      for inst_node_name in check_nodes:
10217
        self._CheckFaultyDisks(inst, inst_node_name)
10218

    
10219
  def Exec(self, feedback_fn):
10220
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10221
                (self.op.name, self.op.node_name))
10222

    
10223
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10224
    result = self.rpc.call_storage_execute(self.op.node_name,
10225
                                           self.op.storage_type, st_args,
10226
                                           self.op.name,
10227
                                           constants.SO_FIX_CONSISTENCY)
10228
    result.Raise("Failed to repair storage unit '%s' on %s" %
10229
                 (self.op.name, self.op.node_name))
10230

    
10231

    
10232
class LUNodeEvacuate(NoHooksLU):
10233
  """Evacuates instances off a list of nodes.
10234

10235
  """
10236
  REQ_BGL = False
10237

    
10238
  def CheckArguments(self):
10239
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10240

    
10241
  def ExpandNames(self):
10242
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10243

    
10244
    if self.op.remote_node is not None:
10245
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10246
      assert self.op.remote_node
10247

    
10248
      if self.op.remote_node == self.op.node_name:
10249
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10250
                                   " secondary node", errors.ECODE_INVAL)
10251

    
10252
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10253
        raise errors.OpPrereqError("Without the use of an iallocator only"
10254
                                   " secondary instances can be evacuated",
10255
                                   errors.ECODE_INVAL)
10256

    
10257
    # Declare locks
10258
    self.share_locks = _ShareAll()
10259
    self.needed_locks = {
10260
      locking.LEVEL_INSTANCE: [],
10261
      locking.LEVEL_NODEGROUP: [],
10262
      locking.LEVEL_NODE: [],
10263
      }
10264

    
10265
    if self.op.remote_node is None:
10266
      # Iallocator will choose any node(s) in the same group
10267
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10268
    else:
10269
      group_nodes = frozenset([self.op.remote_node])
10270

    
10271
    # Determine nodes to be locked
10272
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10273

    
10274
  def _DetermineInstances(self):
10275
    """Builds list of instances to operate on.
10276

10277
    """
10278
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10279

    
10280
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10281
      # Primary instances only
10282
      inst_fn = _GetNodePrimaryInstances
10283
      assert self.op.remote_node is None, \
10284
        "Evacuating primary instances requires iallocator"
10285
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10286
      # Secondary instances only
10287
      inst_fn = _GetNodeSecondaryInstances
10288
    else:
10289
      # All instances
10290
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10291
      inst_fn = _GetNodeInstances
10292

    
10293
    return inst_fn(self.cfg, self.op.node_name)
10294

    
10295
  def DeclareLocks(self, level):
10296
    if level == locking.LEVEL_INSTANCE:
10297
      # Lock instances optimistically, needs verification once node and group
10298
      # locks have been acquired
10299
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10300
        set(i.name for i in self._DetermineInstances())
10301

    
10302
    elif level == locking.LEVEL_NODEGROUP:
10303
      # Lock node groups optimistically, needs verification once nodes have
10304
      # been acquired
10305
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10306
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10307

    
10308
    elif level == locking.LEVEL_NODE:
10309
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10310

    
10311
  def CheckPrereq(self):
10312
    # Verify locks
10313
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10314
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10315
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10316

    
10317
    assert owned_nodes == self.lock_nodes
10318

    
10319
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10320
    if owned_groups != wanted_groups:
10321
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10322
                               " current groups are '%s', used to be '%s'" %
10323
                               (utils.CommaJoin(wanted_groups),
10324
                                utils.CommaJoin(owned_groups)))
10325

    
10326
    # Determine affected instances
10327
    self.instances = self._DetermineInstances()
10328
    self.instance_names = [i.name for i in self.instances]
10329

    
10330
    if set(self.instance_names) != owned_instances:
10331
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10332
                               " were acquired, current instances are '%s',"
10333
                               " used to be '%s'" %
10334
                               (self.op.node_name,
10335
                                utils.CommaJoin(self.instance_names),
10336
                                utils.CommaJoin(owned_instances)))
10337

    
10338
    if self.instance_names:
10339
      self.LogInfo("Evacuating instances from node '%s': %s",
10340
                   self.op.node_name,
10341
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10342
    else:
10343
      self.LogInfo("No instances to evacuate from node '%s'",
10344
                   self.op.node_name)
10345

    
10346
    if self.op.remote_node is not None:
10347
      for i in self.instances:
10348
        if i.primary_node == self.op.remote_node:
10349
          raise errors.OpPrereqError("Node %s is the primary node of"
10350
                                     " instance %s, cannot use it as"
10351
                                     " secondary" %
10352
                                     (self.op.remote_node, i.name),
10353
                                     errors.ECODE_INVAL)
10354

    
10355
  def Exec(self, feedback_fn):
10356
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10357

    
10358
    if not self.instance_names:
10359
      # No instances to evacuate
10360
      jobs = []
10361

    
10362
    elif self.op.iallocator is not None:
10363
      # TODO: Implement relocation to other group
10364
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10365
                       evac_mode=self.op.mode,
10366
                       instances=list(self.instance_names))
10367

    
10368
      ial.Run(self.op.iallocator)
10369

    
10370
      if not ial.success:
10371
        raise errors.OpPrereqError("Can't compute node evacuation using"
10372
                                   " iallocator '%s': %s" %
10373
                                   (self.op.iallocator, ial.info),
10374
                                   errors.ECODE_NORES)
10375

    
10376
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10377

    
10378
    elif self.op.remote_node is not None:
10379
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10380
      jobs = [
10381
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10382
                                        remote_node=self.op.remote_node,
10383
                                        disks=[],
10384
                                        mode=constants.REPLACE_DISK_CHG,
10385
                                        early_release=self.op.early_release)]
10386
        for instance_name in self.instance_names
10387
        ]
10388

    
10389
    else:
10390
      raise errors.ProgrammerError("No iallocator or remote node")
10391

    
10392
    return ResultWithJobs(jobs)
10393

    
10394

    
10395
def _SetOpEarlyRelease(early_release, op):
10396
  """Sets C{early_release} flag on opcodes if available.
10397

10398
  """
10399
  try:
10400
    op.early_release = early_release
10401
  except AttributeError:
10402
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10403

    
10404
  return op
10405

    
10406

    
10407
def _NodeEvacDest(use_nodes, group, nodes):
10408
  """Returns group or nodes depending on caller's choice.
10409

10410
  """
10411
  if use_nodes:
10412
    return utils.CommaJoin(nodes)
10413
  else:
10414
    return group
10415

    
10416

    
10417
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10418
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10419

10420
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10421
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10422

10423
  @type lu: L{LogicalUnit}
10424
  @param lu: Logical unit instance
10425
  @type alloc_result: tuple/list
10426
  @param alloc_result: Result from iallocator
10427
  @type early_release: bool
10428
  @param early_release: Whether to release locks early if possible
10429
  @type use_nodes: bool
10430
  @param use_nodes: Whether to display node names instead of groups
10431

10432
  """
10433
  (moved, failed, jobs) = alloc_result
10434

    
10435
  if failed:
10436
    lu.LogWarning("Unable to evacuate instances %s",
10437
                  utils.CommaJoin("%s (%s)" % (name, reason)
10438
                                  for (name, reason) in failed))
10439

    
10440
  if moved:
10441
    lu.LogInfo("Instances to be moved: %s",
10442
               utils.CommaJoin("%s (to %s)" %
10443
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10444
                               for (name, group, nodes) in moved))
10445

    
10446
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10447
              map(opcodes.OpCode.LoadOpCode, ops))
10448
          for ops in jobs]
10449

    
10450

    
10451
class LUInstanceGrowDisk(LogicalUnit):
10452
  """Grow a disk of an instance.
10453

10454
  """
10455
  HPATH = "disk-grow"
10456
  HTYPE = constants.HTYPE_INSTANCE
10457
  REQ_BGL = False
10458

    
10459
  def ExpandNames(self):
10460
    self._ExpandAndLockInstance()
10461
    self.needed_locks[locking.LEVEL_NODE] = []
10462
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10463

    
10464
  def DeclareLocks(self, level):
10465
    if level == locking.LEVEL_NODE:
10466
      self._LockInstancesNodes()
10467

    
10468
  def BuildHooksEnv(self):
10469
    """Build hooks env.
10470

10471
    This runs on the master, the primary and all the secondaries.
10472

10473
    """
10474
    env = {
10475
      "DISK": self.op.disk,
10476
      "AMOUNT": self.op.amount,
10477
      }
10478
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10479
    return env
10480

    
10481
  def BuildHooksNodes(self):
10482
    """Build hooks nodes.
10483

10484
    """
10485
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10486
    return (nl, nl)
10487

    
10488
  def CheckPrereq(self):
10489
    """Check prerequisites.
10490

10491
    This checks that the instance is in the cluster.
10492

10493
    """
10494
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10495
    assert instance is not None, \
10496
      "Cannot retrieve locked instance %s" % self.op.instance_name
10497
    nodenames = list(instance.all_nodes)
10498
    for node in nodenames:
10499
      _CheckNodeOnline(self, node)
10500

    
10501
    self.instance = instance
10502

    
10503
    if instance.disk_template not in constants.DTS_GROWABLE:
10504
      raise errors.OpPrereqError("Instance's disk layout does not support"
10505
                                 " growing", errors.ECODE_INVAL)
10506

    
10507
    self.disk = instance.FindDisk(self.op.disk)
10508

    
10509
    if instance.disk_template not in (constants.DT_FILE,
10510
                                      constants.DT_SHARED_FILE):
10511
      # TODO: check the free disk space for file, when that feature will be
10512
      # supported
10513
      _CheckNodesFreeDiskPerVG(self, nodenames,
10514
                               self.disk.ComputeGrowth(self.op.amount))
10515

    
10516
  def Exec(self, feedback_fn):
10517
    """Execute disk grow.
10518

10519
    """
10520
    instance = self.instance
10521
    disk = self.disk
10522

    
10523
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10524
    if not disks_ok:
10525
      raise errors.OpExecError("Cannot activate block device to grow")
10526

    
10527
    # First run all grow ops in dry-run mode
10528
    for node in instance.all_nodes:
10529
      self.cfg.SetDiskID(disk, node)
10530
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10531
      result.Raise("Grow request failed to node %s" % node)
10532

    
10533
    # We know that (as far as we can test) operations across different
10534
    # nodes will succeed, time to run it for real
10535
    for node in instance.all_nodes:
10536
      self.cfg.SetDiskID(disk, node)
10537
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10538
      result.Raise("Grow request failed to node %s" % node)
10539

    
10540
      # TODO: Rewrite code to work properly
10541
      # DRBD goes into sync mode for a short amount of time after executing the
10542
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10543
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10544
      # time is a work-around.
10545
      time.sleep(5)
10546

    
10547
    disk.RecordGrow(self.op.amount)
10548
    self.cfg.Update(instance, feedback_fn)
10549
    if self.op.wait_for_sync:
10550
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10551
      if disk_abort:
10552
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10553
                             " status; please check the instance")
10554
      if not instance.admin_up:
10555
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10556
    elif not instance.admin_up:
10557
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10558
                           " not supposed to be running because no wait for"
10559
                           " sync mode was requested")
10560

    
10561

    
10562
class LUInstanceQueryData(NoHooksLU):
10563
  """Query runtime instance data.
10564

10565
  """
10566
  REQ_BGL = False
10567

    
10568
  def ExpandNames(self):
10569
    self.needed_locks = {}
10570

    
10571
    # Use locking if requested or when non-static information is wanted
10572
    if not (self.op.static or self.op.use_locking):
10573
      self.LogWarning("Non-static data requested, locks need to be acquired")
10574
      self.op.use_locking = True
10575

    
10576
    if self.op.instances or not self.op.use_locking:
10577
      # Expand instance names right here
10578
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10579
    else:
10580
      # Will use acquired locks
10581
      self.wanted_names = None
10582

    
10583
    if self.op.use_locking:
10584
      self.share_locks = _ShareAll()
10585

    
10586
      if self.wanted_names is None:
10587
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10588
      else:
10589
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10590

    
10591
      self.needed_locks[locking.LEVEL_NODE] = []
10592
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10593

    
10594
  def DeclareLocks(self, level):
10595
    if self.op.use_locking and level == locking.LEVEL_NODE:
10596
      self._LockInstancesNodes()
10597

    
10598
  def CheckPrereq(self):
10599
    """Check prerequisites.
10600

10601
    This only checks the optional instance list against the existing names.
10602

10603
    """
10604
    if self.wanted_names is None:
10605
      assert self.op.use_locking, "Locking was not used"
10606
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10607

    
10608
    self.wanted_instances = \
10609
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10610

    
10611
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10612
    """Returns the status of a block device
10613

10614
    """
10615
    if self.op.static or not node:
10616
      return None
10617

    
10618
    self.cfg.SetDiskID(dev, node)
10619

    
10620
    result = self.rpc.call_blockdev_find(node, dev)
10621
    if result.offline:
10622
      return None
10623

    
10624
    result.Raise("Can't compute disk status for %s" % instance_name)
10625

    
10626
    status = result.payload
10627
    if status is None:
10628
      return None
10629

    
10630
    return (status.dev_path, status.major, status.minor,
10631
            status.sync_percent, status.estimated_time,
10632
            status.is_degraded, status.ldisk_status)
10633

    
10634
  def _ComputeDiskStatus(self, instance, snode, dev):
10635
    """Compute block device status.
10636

10637
    """
10638
    if dev.dev_type in constants.LDS_DRBD:
10639
      # we change the snode then (otherwise we use the one passed in)
10640
      if dev.logical_id[0] == instance.primary_node:
10641
        snode = dev.logical_id[1]
10642
      else:
10643
        snode = dev.logical_id[0]
10644

    
10645
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10646
                                              instance.name, dev)
10647
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10648

    
10649
    if dev.children:
10650
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10651
                                        instance, snode),
10652
                         dev.children)
10653
    else:
10654
      dev_children = []
10655

    
10656
    return {
10657
      "iv_name": dev.iv_name,
10658
      "dev_type": dev.dev_type,
10659
      "logical_id": dev.logical_id,
10660
      "physical_id": dev.physical_id,
10661
      "pstatus": dev_pstatus,
10662
      "sstatus": dev_sstatus,
10663
      "children": dev_children,
10664
      "mode": dev.mode,
10665
      "size": dev.size,
10666
      }
10667

    
10668
  def Exec(self, feedback_fn):
10669
    """Gather and return data"""
10670
    result = {}
10671

    
10672
    cluster = self.cfg.GetClusterInfo()
10673

    
10674
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10675
                                          for i in self.wanted_instances)
10676
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10677
      if self.op.static or pnode.offline:
10678
        remote_state = None
10679
        if pnode.offline:
10680
          self.LogWarning("Primary node %s is marked offline, returning static"
10681
                          " information only for instance %s" %
10682
                          (pnode.name, instance.name))
10683
      else:
10684
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10685
                                                  instance.name,
10686
                                                  instance.hypervisor)
10687
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10688
        remote_info = remote_info.payload
10689
        if remote_info and "state" in remote_info:
10690
          remote_state = "up"
10691
        else:
10692
          remote_state = "down"
10693

    
10694
      if instance.admin_up:
10695
        config_state = "up"
10696
      else:
10697
        config_state = "down"
10698

    
10699
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10700
                  instance.disks)
10701

    
10702
      result[instance.name] = {
10703
        "name": instance.name,
10704
        "config_state": config_state,
10705
        "run_state": remote_state,
10706
        "pnode": instance.primary_node,
10707
        "snodes": instance.secondary_nodes,
10708
        "os": instance.os,
10709
        # this happens to be the same format used for hooks
10710
        "nics": _NICListToTuple(self, instance.nics),
10711
        "disk_template": instance.disk_template,
10712
        "disks": disks,
10713
        "hypervisor": instance.hypervisor,
10714
        "network_port": instance.network_port,
10715
        "hv_instance": instance.hvparams,
10716
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10717
        "be_instance": instance.beparams,
10718
        "be_actual": cluster.FillBE(instance),
10719
        "os_instance": instance.osparams,
10720
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10721
        "serial_no": instance.serial_no,
10722
        "mtime": instance.mtime,
10723
        "ctime": instance.ctime,
10724
        "uuid": instance.uuid,
10725
        }
10726

    
10727
    return result
10728

    
10729

    
10730
class LUInstanceSetParams(LogicalUnit):
10731
  """Modifies an instances's parameters.
10732

10733
  """
10734
  HPATH = "instance-modify"
10735
  HTYPE = constants.HTYPE_INSTANCE
10736
  REQ_BGL = False
10737

    
10738
  def CheckArguments(self):
10739
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10740
            self.op.hvparams or self.op.beparams or self.op.os_name):
10741
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10742

    
10743
    if self.op.hvparams:
10744
      _CheckGlobalHvParams(self.op.hvparams)
10745

    
10746
    # Disk validation
10747
    disk_addremove = 0
10748
    for disk_op, disk_dict in self.op.disks:
10749
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10750
      if disk_op == constants.DDM_REMOVE:
10751
        disk_addremove += 1
10752
        continue
10753
      elif disk_op == constants.DDM_ADD:
10754
        disk_addremove += 1
10755
      else:
10756
        if not isinstance(disk_op, int):
10757
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10758
        if not isinstance(disk_dict, dict):
10759
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10760
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10761

    
10762
      if disk_op == constants.DDM_ADD:
10763
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10764
        if mode not in constants.DISK_ACCESS_SET:
10765
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10766
                                     errors.ECODE_INVAL)
10767
        size = disk_dict.get(constants.IDISK_SIZE, None)
10768
        if size is None:
10769
          raise errors.OpPrereqError("Required disk parameter size missing",
10770
                                     errors.ECODE_INVAL)
10771
        try:
10772
          size = int(size)
10773
        except (TypeError, ValueError), err:
10774
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10775
                                     str(err), errors.ECODE_INVAL)
10776
        disk_dict[constants.IDISK_SIZE] = size
10777
      else:
10778
        # modification of disk
10779
        if constants.IDISK_SIZE in disk_dict:
10780
          raise errors.OpPrereqError("Disk size change not possible, use"
10781
                                     " grow-disk", errors.ECODE_INVAL)
10782

    
10783
    if disk_addremove > 1:
10784
      raise errors.OpPrereqError("Only one disk add or remove operation"
10785
                                 " supported at a time", errors.ECODE_INVAL)
10786

    
10787
    if self.op.disks and self.op.disk_template is not None:
10788
      raise errors.OpPrereqError("Disk template conversion and other disk"
10789
                                 " changes not supported at the same time",
10790
                                 errors.ECODE_INVAL)
10791

    
10792
    if (self.op.disk_template and
10793
        self.op.disk_template in constants.DTS_INT_MIRROR and
10794
        self.op.remote_node is None):
10795
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10796
                                 " one requires specifying a secondary node",
10797
                                 errors.ECODE_INVAL)
10798

    
10799
    # NIC validation
10800
    nic_addremove = 0
10801
    for nic_op, nic_dict in self.op.nics:
10802
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10803
      if nic_op == constants.DDM_REMOVE:
10804
        nic_addremove += 1
10805
        continue
10806
      elif nic_op == constants.DDM_ADD:
10807
        nic_addremove += 1
10808
      else:
10809
        if not isinstance(nic_op, int):
10810
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10811
        if not isinstance(nic_dict, dict):
10812
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10813
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10814

    
10815
      # nic_dict should be a dict
10816
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10817
      if nic_ip is not None:
10818
        if nic_ip.lower() == constants.VALUE_NONE:
10819
          nic_dict[constants.INIC_IP] = None
10820
        else:
10821
          if not netutils.IPAddress.IsValid(nic_ip):
10822
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10823
                                       errors.ECODE_INVAL)
10824

    
10825
      nic_bridge = nic_dict.get("bridge", None)
10826
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10827
      if nic_bridge and nic_link:
10828
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10829
                                   " at the same time", errors.ECODE_INVAL)
10830
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10831
        nic_dict["bridge"] = None
10832
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10833
        nic_dict[constants.INIC_LINK] = None
10834

    
10835
      if nic_op == constants.DDM_ADD:
10836
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10837
        if nic_mac is None:
10838
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10839

    
10840
      if constants.INIC_MAC in nic_dict:
10841
        nic_mac = nic_dict[constants.INIC_MAC]
10842
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10843
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10844

    
10845
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10846
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10847
                                     " modifying an existing nic",
10848
                                     errors.ECODE_INVAL)
10849

    
10850
    if nic_addremove > 1:
10851
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10852
                                 " supported at a time", errors.ECODE_INVAL)
10853

    
10854
  def ExpandNames(self):
10855
    self._ExpandAndLockInstance()
10856
    self.needed_locks[locking.LEVEL_NODE] = []
10857
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10858

    
10859
  def DeclareLocks(self, level):
10860
    if level == locking.LEVEL_NODE:
10861
      self._LockInstancesNodes()
10862
      if self.op.disk_template and self.op.remote_node:
10863
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10864
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10865

    
10866
  def BuildHooksEnv(self):
10867
    """Build hooks env.
10868

10869
    This runs on the master, primary and secondaries.
10870

10871
    """
10872
    args = dict()
10873
    if constants.BE_MEMORY in self.be_new:
10874
      args["memory"] = self.be_new[constants.BE_MEMORY]
10875
    if constants.BE_VCPUS in self.be_new:
10876
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10877
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10878
    # information at all.
10879
    if self.op.nics:
10880
      args["nics"] = []
10881
      nic_override = dict(self.op.nics)
10882
      for idx, nic in enumerate(self.instance.nics):
10883
        if idx in nic_override:
10884
          this_nic_override = nic_override[idx]
10885
        else:
10886
          this_nic_override = {}
10887
        if constants.INIC_IP in this_nic_override:
10888
          ip = this_nic_override[constants.INIC_IP]
10889
        else:
10890
          ip = nic.ip
10891
        if constants.INIC_MAC in this_nic_override:
10892
          mac = this_nic_override[constants.INIC_MAC]
10893
        else:
10894
          mac = nic.mac
10895
        if idx in self.nic_pnew:
10896
          nicparams = self.nic_pnew[idx]
10897
        else:
10898
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10899
        mode = nicparams[constants.NIC_MODE]
10900
        link = nicparams[constants.NIC_LINK]
10901
        args["nics"].append((ip, mac, mode, link))
10902
      if constants.DDM_ADD in nic_override:
10903
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10904
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10905
        nicparams = self.nic_pnew[constants.DDM_ADD]
10906
        mode = nicparams[constants.NIC_MODE]
10907
        link = nicparams[constants.NIC_LINK]
10908
        args["nics"].append((ip, mac, mode, link))
10909
      elif constants.DDM_REMOVE in nic_override:
10910
        del args["nics"][-1]
10911

    
10912
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10913
    if self.op.disk_template:
10914
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10915

    
10916
    return env
10917

    
10918
  def BuildHooksNodes(self):
10919
    """Build hooks nodes.
10920

10921
    """
10922
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10923
    return (nl, nl)
10924

    
10925
  def CheckPrereq(self):
10926
    """Check prerequisites.
10927

10928
    This only checks the instance list against the existing names.
10929

10930
    """
10931
    # checking the new params on the primary/secondary nodes
10932

    
10933
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10934
    cluster = self.cluster = self.cfg.GetClusterInfo()
10935
    assert self.instance is not None, \
10936
      "Cannot retrieve locked instance %s" % self.op.instance_name
10937
    pnode = instance.primary_node
10938
    nodelist = list(instance.all_nodes)
10939

    
10940
    # OS change
10941
    if self.op.os_name and not self.op.force:
10942
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10943
                      self.op.force_variant)
10944
      instance_os = self.op.os_name
10945
    else:
10946
      instance_os = instance.os
10947

    
10948
    if self.op.disk_template:
10949
      if instance.disk_template == self.op.disk_template:
10950
        raise errors.OpPrereqError("Instance already has disk template %s" %
10951
                                   instance.disk_template, errors.ECODE_INVAL)
10952

    
10953
      if (instance.disk_template,
10954
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10955
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10956
                                   " %s to %s" % (instance.disk_template,
10957
                                                  self.op.disk_template),
10958
                                   errors.ECODE_INVAL)
10959
      _CheckInstanceDown(self, instance, "cannot change disk template")
10960
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10961
        if self.op.remote_node == pnode:
10962
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10963
                                     " as the primary node of the instance" %
10964
                                     self.op.remote_node, errors.ECODE_STATE)
10965
        _CheckNodeOnline(self, self.op.remote_node)
10966
        _CheckNodeNotDrained(self, self.op.remote_node)
10967
        # FIXME: here we assume that the old instance type is DT_PLAIN
10968
        assert instance.disk_template == constants.DT_PLAIN
10969
        disks = [{constants.IDISK_SIZE: d.size,
10970
                  constants.IDISK_VG: d.logical_id[0]}
10971
                 for d in instance.disks]
10972
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10973
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10974

    
10975
    # hvparams processing
10976
    if self.op.hvparams:
10977
      hv_type = instance.hypervisor
10978
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10979
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10980
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10981

    
10982
      # local check
10983
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10984
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10985
      self.hv_proposed = self.hv_new = hv_new # the new actual values
10986
      self.hv_inst = i_hvdict # the new dict (without defaults)
10987
    else:
10988
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
10989
                                              instance.hvparams)
10990
      self.hv_new = self.hv_inst = {}
10991

    
10992
    # beparams processing
10993
    if self.op.beparams:
10994
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10995
                                   use_none=True)
10996
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10997
      be_new = cluster.SimpleFillBE(i_bedict)
10998
      self.be_proposed = self.be_new = be_new # the new actual values
10999
      self.be_inst = i_bedict # the new dict (without defaults)
11000
    else:
11001
      self.be_new = self.be_inst = {}
11002
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11003
    be_old = cluster.FillBE(instance)
11004

    
11005
    # CPU param validation -- checking every time a paramtere is
11006
    # changed to cover all cases where either CPU mask or vcpus have
11007
    # changed
11008
    if (constants.BE_VCPUS in self.be_proposed and
11009
        constants.HV_CPU_MASK in self.hv_proposed):
11010
      cpu_list = \
11011
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11012
      # Verify mask is consistent with number of vCPUs. Can skip this
11013
      # test if only 1 entry in the CPU mask, which means same mask
11014
      # is applied to all vCPUs.
11015
      if (len(cpu_list) > 1 and
11016
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11017
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11018
                                   " CPU mask [%s]" %
11019
                                   (self.be_proposed[constants.BE_VCPUS],
11020
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11021
                                   errors.ECODE_INVAL)
11022

    
11023
      # Only perform this test if a new CPU mask is given
11024
      if constants.HV_CPU_MASK in self.hv_new:
11025
        # Calculate the largest CPU number requested
11026
        max_requested_cpu = max(map(max, cpu_list))
11027
        # Check that all of the instance's nodes have enough physical CPUs to
11028
        # satisfy the requested CPU mask
11029
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11030
                                max_requested_cpu + 1, instance.hypervisor)
11031

    
11032
    # osparams processing
11033
    if self.op.osparams:
11034
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11035
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11036
      self.os_inst = i_osdict # the new dict (without defaults)
11037
    else:
11038
      self.os_inst = {}
11039

    
11040
    self.warn = []
11041

    
11042
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11043
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11044
      mem_check_list = [pnode]
11045
      if be_new[constants.BE_AUTO_BALANCE]:
11046
        # either we changed auto_balance to yes or it was from before
11047
        mem_check_list.extend(instance.secondary_nodes)
11048
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11049
                                                  instance.hypervisor)
11050
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11051
                                         instance.hypervisor)
11052
      pninfo = nodeinfo[pnode]
11053
      msg = pninfo.fail_msg
11054
      if msg:
11055
        # Assume the primary node is unreachable and go ahead
11056
        self.warn.append("Can't get info from primary node %s: %s" %
11057
                         (pnode, msg))
11058
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
11059
        self.warn.append("Node data from primary node %s doesn't contain"
11060
                         " free memory information" % pnode)
11061
      elif instance_info.fail_msg:
11062
        self.warn.append("Can't get instance runtime information: %s" %
11063
                        instance_info.fail_msg)
11064
      else:
11065
        if instance_info.payload:
11066
          current_mem = int(instance_info.payload["memory"])
11067
        else:
11068
          # Assume instance not running
11069
          # (there is a slight race condition here, but it's not very probable,
11070
          # and we have no other way to check)
11071
          current_mem = 0
11072
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11073
                    pninfo.payload["memory_free"])
11074
        if miss_mem > 0:
11075
          raise errors.OpPrereqError("This change will prevent the instance"
11076
                                     " from starting, due to %d MB of memory"
11077
                                     " missing on its primary node" % miss_mem,
11078
                                     errors.ECODE_NORES)
11079

    
11080
      if be_new[constants.BE_AUTO_BALANCE]:
11081
        for node, nres in nodeinfo.items():
11082
          if node not in instance.secondary_nodes:
11083
            continue
11084
          nres.Raise("Can't get info from secondary node %s" % node,
11085
                     prereq=True, ecode=errors.ECODE_STATE)
11086
          if not isinstance(nres.payload.get("memory_free", None), int):
11087
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11088
                                       " memory information" % node,
11089
                                       errors.ECODE_STATE)
11090
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11091
            raise errors.OpPrereqError("This change will prevent the instance"
11092
                                       " from failover to its secondary node"
11093
                                       " %s, due to not enough memory" % node,
11094
                                       errors.ECODE_STATE)
11095

    
11096
    # NIC processing
11097
    self.nic_pnew = {}
11098
    self.nic_pinst = {}
11099
    for nic_op, nic_dict in self.op.nics:
11100
      if nic_op == constants.DDM_REMOVE:
11101
        if not instance.nics:
11102
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11103
                                     errors.ECODE_INVAL)
11104
        continue
11105
      if nic_op != constants.DDM_ADD:
11106
        # an existing nic
11107
        if not instance.nics:
11108
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11109
                                     " no NICs" % nic_op,
11110
                                     errors.ECODE_INVAL)
11111
        if nic_op < 0 or nic_op >= len(instance.nics):
11112
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11113
                                     " are 0 to %d" %
11114
                                     (nic_op, len(instance.nics) - 1),
11115
                                     errors.ECODE_INVAL)
11116
        old_nic_params = instance.nics[nic_op].nicparams
11117
        old_nic_ip = instance.nics[nic_op].ip
11118
      else:
11119
        old_nic_params = {}
11120
        old_nic_ip = None
11121

    
11122
      update_params_dict = dict([(key, nic_dict[key])
11123
                                 for key in constants.NICS_PARAMETERS
11124
                                 if key in nic_dict])
11125

    
11126
      if "bridge" in nic_dict:
11127
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11128

    
11129
      new_nic_params = _GetUpdatedParams(old_nic_params,
11130
                                         update_params_dict)
11131
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11132
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11133
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11134
      self.nic_pinst[nic_op] = new_nic_params
11135
      self.nic_pnew[nic_op] = new_filled_nic_params
11136
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11137

    
11138
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11139
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11140
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11141
        if msg:
11142
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11143
          if self.op.force:
11144
            self.warn.append(msg)
11145
          else:
11146
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11147
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11148
        if constants.INIC_IP in nic_dict:
11149
          nic_ip = nic_dict[constants.INIC_IP]
11150
        else:
11151
          nic_ip = old_nic_ip
11152
        if nic_ip is None:
11153
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11154
                                     " on a routed nic", errors.ECODE_INVAL)
11155
      if constants.INIC_MAC in nic_dict:
11156
        nic_mac = nic_dict[constants.INIC_MAC]
11157
        if nic_mac is None:
11158
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11159
                                     errors.ECODE_INVAL)
11160
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11161
          # otherwise generate the mac
11162
          nic_dict[constants.INIC_MAC] = \
11163
            self.cfg.GenerateMAC(self.proc.GetECId())
11164
        else:
11165
          # or validate/reserve the current one
11166
          try:
11167
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11168
          except errors.ReservationError:
11169
            raise errors.OpPrereqError("MAC address %s already in use"
11170
                                       " in cluster" % nic_mac,
11171
                                       errors.ECODE_NOTUNIQUE)
11172

    
11173
    # DISK processing
11174
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11175
      raise errors.OpPrereqError("Disk operations not supported for"
11176
                                 " diskless instances",
11177
                                 errors.ECODE_INVAL)
11178
    for disk_op, _ in self.op.disks:
11179
      if disk_op == constants.DDM_REMOVE:
11180
        if len(instance.disks) == 1:
11181
          raise errors.OpPrereqError("Cannot remove the last disk of"
11182
                                     " an instance", errors.ECODE_INVAL)
11183
        _CheckInstanceDown(self, instance, "cannot remove disks")
11184

    
11185
      if (disk_op == constants.DDM_ADD and
11186
          len(instance.disks) >= constants.MAX_DISKS):
11187
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11188
                                   " add more" % constants.MAX_DISKS,
11189
                                   errors.ECODE_STATE)
11190
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11191
        # an existing disk
11192
        if disk_op < 0 or disk_op >= len(instance.disks):
11193
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11194
                                     " are 0 to %d" %
11195
                                     (disk_op, len(instance.disks)),
11196
                                     errors.ECODE_INVAL)
11197

    
11198
    return
11199

    
11200
  def _ConvertPlainToDrbd(self, feedback_fn):
11201
    """Converts an instance from plain to drbd.
11202

11203
    """
11204
    feedback_fn("Converting template to drbd")
11205
    instance = self.instance
11206
    pnode = instance.primary_node
11207
    snode = self.op.remote_node
11208

    
11209
    # create a fake disk info for _GenerateDiskTemplate
11210
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11211
                  constants.IDISK_VG: d.logical_id[0]}
11212
                 for d in instance.disks]
11213
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11214
                                      instance.name, pnode, [snode],
11215
                                      disk_info, None, None, 0, feedback_fn)
11216
    info = _GetInstanceInfoText(instance)
11217
    feedback_fn("Creating aditional volumes...")
11218
    # first, create the missing data and meta devices
11219
    for disk in new_disks:
11220
      # unfortunately this is... not too nice
11221
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11222
                            info, True)
11223
      for child in disk.children:
11224
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11225
    # at this stage, all new LVs have been created, we can rename the
11226
    # old ones
11227
    feedback_fn("Renaming original volumes...")
11228
    rename_list = [(o, n.children[0].logical_id)
11229
                   for (o, n) in zip(instance.disks, new_disks)]
11230
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11231
    result.Raise("Failed to rename original LVs")
11232

    
11233
    feedback_fn("Initializing DRBD devices...")
11234
    # all child devices are in place, we can now create the DRBD devices
11235
    for disk in new_disks:
11236
      for node in [pnode, snode]:
11237
        f_create = node == pnode
11238
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11239

    
11240
    # at this point, the instance has been modified
11241
    instance.disk_template = constants.DT_DRBD8
11242
    instance.disks = new_disks
11243
    self.cfg.Update(instance, feedback_fn)
11244

    
11245
    # disks are created, waiting for sync
11246
    disk_abort = not _WaitForSync(self, instance,
11247
                                  oneshot=not self.op.wait_for_sync)
11248
    if disk_abort:
11249
      raise errors.OpExecError("There are some degraded disks for"
11250
                               " this instance, please cleanup manually")
11251

    
11252
  def _ConvertDrbdToPlain(self, feedback_fn):
11253
    """Converts an instance from drbd to plain.
11254

11255
    """
11256
    instance = self.instance
11257
    assert len(instance.secondary_nodes) == 1
11258
    pnode = instance.primary_node
11259
    snode = instance.secondary_nodes[0]
11260
    feedback_fn("Converting template to plain")
11261

    
11262
    old_disks = instance.disks
11263
    new_disks = [d.children[0] for d in old_disks]
11264

    
11265
    # copy over size and mode
11266
    for parent, child in zip(old_disks, new_disks):
11267
      child.size = parent.size
11268
      child.mode = parent.mode
11269

    
11270
    # update instance structure
11271
    instance.disks = new_disks
11272
    instance.disk_template = constants.DT_PLAIN
11273
    self.cfg.Update(instance, feedback_fn)
11274

    
11275
    feedback_fn("Removing volumes on the secondary node...")
11276
    for disk in old_disks:
11277
      self.cfg.SetDiskID(disk, snode)
11278
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11279
      if msg:
11280
        self.LogWarning("Could not remove block device %s on node %s,"
11281
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11282

    
11283
    feedback_fn("Removing unneeded volumes on the primary node...")
11284
    for idx, disk in enumerate(old_disks):
11285
      meta = disk.children[1]
11286
      self.cfg.SetDiskID(meta, pnode)
11287
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11288
      if msg:
11289
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11290
                        " continuing anyway: %s", idx, pnode, msg)
11291

    
11292
  def Exec(self, feedback_fn):
11293
    """Modifies an instance.
11294

11295
    All parameters take effect only at the next restart of the instance.
11296

11297
    """
11298
    # Process here the warnings from CheckPrereq, as we don't have a
11299
    # feedback_fn there.
11300
    for warn in self.warn:
11301
      feedback_fn("WARNING: %s" % warn)
11302

    
11303
    result = []
11304
    instance = self.instance
11305
    # disk changes
11306
    for disk_op, disk_dict in self.op.disks:
11307
      if disk_op == constants.DDM_REMOVE:
11308
        # remove the last disk
11309
        device = instance.disks.pop()
11310
        device_idx = len(instance.disks)
11311
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11312
          self.cfg.SetDiskID(disk, node)
11313
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11314
          if msg:
11315
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11316
                            " continuing anyway", device_idx, node, msg)
11317
        result.append(("disk/%d" % device_idx, "remove"))
11318
      elif disk_op == constants.DDM_ADD:
11319
        # add a new disk
11320
        if instance.disk_template in (constants.DT_FILE,
11321
                                        constants.DT_SHARED_FILE):
11322
          file_driver, file_path = instance.disks[0].logical_id
11323
          file_path = os.path.dirname(file_path)
11324
        else:
11325
          file_driver = file_path = None
11326
        disk_idx_base = len(instance.disks)
11327
        new_disk = _GenerateDiskTemplate(self,
11328
                                         instance.disk_template,
11329
                                         instance.name, instance.primary_node,
11330
                                         instance.secondary_nodes,
11331
                                         [disk_dict],
11332
                                         file_path,
11333
                                         file_driver,
11334
                                         disk_idx_base, feedback_fn)[0]
11335
        instance.disks.append(new_disk)
11336
        info = _GetInstanceInfoText(instance)
11337

    
11338
        logging.info("Creating volume %s for instance %s",
11339
                     new_disk.iv_name, instance.name)
11340
        # Note: this needs to be kept in sync with _CreateDisks
11341
        #HARDCODE
11342
        for node in instance.all_nodes:
11343
          f_create = node == instance.primary_node
11344
          try:
11345
            _CreateBlockDev(self, node, instance, new_disk,
11346
                            f_create, info, f_create)
11347
          except errors.OpExecError, err:
11348
            self.LogWarning("Failed to create volume %s (%s) on"
11349
                            " node %s: %s",
11350
                            new_disk.iv_name, new_disk, node, err)
11351
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11352
                       (new_disk.size, new_disk.mode)))
11353
      else:
11354
        # change a given disk
11355
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11356
        result.append(("disk.mode/%d" % disk_op,
11357
                       disk_dict[constants.IDISK_MODE]))
11358

    
11359
    if self.op.disk_template:
11360
      r_shut = _ShutdownInstanceDisks(self, instance)
11361
      if not r_shut:
11362
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11363
                                 " proceed with disk template conversion")
11364
      mode = (instance.disk_template, self.op.disk_template)
11365
      try:
11366
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11367
      except:
11368
        self.cfg.ReleaseDRBDMinors(instance.name)
11369
        raise
11370
      result.append(("disk_template", self.op.disk_template))
11371

    
11372
    # NIC changes
11373
    for nic_op, nic_dict in self.op.nics:
11374
      if nic_op == constants.DDM_REMOVE:
11375
        # remove the last nic
11376
        del instance.nics[-1]
11377
        result.append(("nic.%d" % len(instance.nics), "remove"))
11378
      elif nic_op == constants.DDM_ADD:
11379
        # mac and bridge should be set, by now
11380
        mac = nic_dict[constants.INIC_MAC]
11381
        ip = nic_dict.get(constants.INIC_IP, None)
11382
        nicparams = self.nic_pinst[constants.DDM_ADD]
11383
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11384
        instance.nics.append(new_nic)
11385
        result.append(("nic.%d" % (len(instance.nics) - 1),
11386
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11387
                       (new_nic.mac, new_nic.ip,
11388
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11389
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11390
                       )))
11391
      else:
11392
        for key in (constants.INIC_MAC, constants.INIC_IP):
11393
          if key in nic_dict:
11394
            setattr(instance.nics[nic_op], key, nic_dict[key])
11395
        if nic_op in self.nic_pinst:
11396
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11397
        for key, val in nic_dict.iteritems():
11398
          result.append(("nic.%s/%d" % (key, nic_op), val))
11399

    
11400
    # hvparams changes
11401
    if self.op.hvparams:
11402
      instance.hvparams = self.hv_inst
11403
      for key, val in self.op.hvparams.iteritems():
11404
        result.append(("hv/%s" % key, val))
11405

    
11406
    # beparams changes
11407
    if self.op.beparams:
11408
      instance.beparams = self.be_inst
11409
      for key, val in self.op.beparams.iteritems():
11410
        result.append(("be/%s" % key, val))
11411

    
11412
    # OS change
11413
    if self.op.os_name:
11414
      instance.os = self.op.os_name
11415

    
11416
    # osparams changes
11417
    if self.op.osparams:
11418
      instance.osparams = self.os_inst
11419
      for key, val in self.op.osparams.iteritems():
11420
        result.append(("os/%s" % key, val))
11421

    
11422
    self.cfg.Update(instance, feedback_fn)
11423

    
11424
    return result
11425

    
11426
  _DISK_CONVERSIONS = {
11427
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11428
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11429
    }
11430

    
11431

    
11432
class LUInstanceChangeGroup(LogicalUnit):
11433
  HPATH = "instance-change-group"
11434
  HTYPE = constants.HTYPE_INSTANCE
11435
  REQ_BGL = False
11436

    
11437
  def ExpandNames(self):
11438
    self.share_locks = _ShareAll()
11439
    self.needed_locks = {
11440
      locking.LEVEL_NODEGROUP: [],
11441
      locking.LEVEL_NODE: [],
11442
      }
11443

    
11444
    self._ExpandAndLockInstance()
11445

    
11446
    if self.op.target_groups:
11447
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11448
                                  self.op.target_groups)
11449
    else:
11450
      self.req_target_uuids = None
11451

    
11452
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11453

    
11454
  def DeclareLocks(self, level):
11455
    if level == locking.LEVEL_NODEGROUP:
11456
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11457

    
11458
      if self.req_target_uuids:
11459
        lock_groups = set(self.req_target_uuids)
11460

    
11461
        # Lock all groups used by instance optimistically; this requires going
11462
        # via the node before it's locked, requiring verification later on
11463
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11464
        lock_groups.update(instance_groups)
11465
      else:
11466
        # No target groups, need to lock all of them
11467
        lock_groups = locking.ALL_SET
11468

    
11469
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11470

    
11471
    elif level == locking.LEVEL_NODE:
11472
      if self.req_target_uuids:
11473
        # Lock all nodes used by instances
11474
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11475
        self._LockInstancesNodes()
11476

    
11477
        # Lock all nodes in all potential target groups
11478
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11479
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11480
        member_nodes = [node_name
11481
                        for group in lock_groups
11482
                        for node_name in self.cfg.GetNodeGroup(group).members]
11483
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11484
      else:
11485
        # Lock all nodes as all groups are potential targets
11486
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11487

    
11488
  def CheckPrereq(self):
11489
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11490
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11491
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11492

    
11493
    assert (self.req_target_uuids is None or
11494
            owned_groups.issuperset(self.req_target_uuids))
11495
    assert owned_instances == set([self.op.instance_name])
11496

    
11497
    # Get instance information
11498
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11499

    
11500
    # Check if node groups for locked instance are still correct
11501
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11502
      ("Instance %s's nodes changed while we kept the lock" %
11503
       self.op.instance_name)
11504

    
11505
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11506
                                           owned_groups)
11507

    
11508
    if self.req_target_uuids:
11509
      # User requested specific target groups
11510
      self.target_uuids = self.req_target_uuids
11511
    else:
11512
      # All groups except those used by the instance are potential targets
11513
      self.target_uuids = owned_groups - inst_groups
11514

    
11515
    conflicting_groups = self.target_uuids & inst_groups
11516
    if conflicting_groups:
11517
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11518
                                 " used by the instance '%s'" %
11519
                                 (utils.CommaJoin(conflicting_groups),
11520
                                  self.op.instance_name),
11521
                                 errors.ECODE_INVAL)
11522

    
11523
    if not self.target_uuids:
11524
      raise errors.OpPrereqError("There are no possible target groups",
11525
                                 errors.ECODE_INVAL)
11526

    
11527
  def BuildHooksEnv(self):
11528
    """Build hooks env.
11529

11530
    """
11531
    assert self.target_uuids
11532

    
11533
    env = {
11534
      "TARGET_GROUPS": " ".join(self.target_uuids),
11535
      }
11536

    
11537
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11538

    
11539
    return env
11540

    
11541
  def BuildHooksNodes(self):
11542
    """Build hooks nodes.
11543

11544
    """
11545
    mn = self.cfg.GetMasterNode()
11546
    return ([mn], [mn])
11547

    
11548
  def Exec(self, feedback_fn):
11549
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11550

    
11551
    assert instances == [self.op.instance_name], "Instance not locked"
11552

    
11553
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11554
                     instances=instances, target_groups=list(self.target_uuids))
11555

    
11556
    ial.Run(self.op.iallocator)
11557

    
11558
    if not ial.success:
11559
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11560
                                 " instance '%s' using iallocator '%s': %s" %
11561
                                 (self.op.instance_name, self.op.iallocator,
11562
                                  ial.info),
11563
                                 errors.ECODE_NORES)
11564

    
11565
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11566

    
11567
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11568
                 " instance '%s'", len(jobs), self.op.instance_name)
11569

    
11570
    return ResultWithJobs(jobs)
11571

    
11572

    
11573
class LUBackupQuery(NoHooksLU):
11574
  """Query the exports list
11575

11576
  """
11577
  REQ_BGL = False
11578

    
11579
  def ExpandNames(self):
11580
    self.needed_locks = {}
11581
    self.share_locks[locking.LEVEL_NODE] = 1
11582
    if not self.op.nodes:
11583
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11584
    else:
11585
      self.needed_locks[locking.LEVEL_NODE] = \
11586
        _GetWantedNodes(self, self.op.nodes)
11587

    
11588
  def Exec(self, feedback_fn):
11589
    """Compute the list of all the exported system images.
11590

11591
    @rtype: dict
11592
    @return: a dictionary with the structure node->(export-list)
11593
        where export-list is a list of the instances exported on
11594
        that node.
11595

11596
    """
11597
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11598
    rpcresult = self.rpc.call_export_list(self.nodes)
11599
    result = {}
11600
    for node in rpcresult:
11601
      if rpcresult[node].fail_msg:
11602
        result[node] = False
11603
      else:
11604
        result[node] = rpcresult[node].payload
11605

    
11606
    return result
11607

    
11608

    
11609
class LUBackupPrepare(NoHooksLU):
11610
  """Prepares an instance for an export and returns useful information.
11611

11612
  """
11613
  REQ_BGL = False
11614

    
11615
  def ExpandNames(self):
11616
    self._ExpandAndLockInstance()
11617

    
11618
  def CheckPrereq(self):
11619
    """Check prerequisites.
11620

11621
    """
11622
    instance_name = self.op.instance_name
11623

    
11624
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11625
    assert self.instance is not None, \
11626
          "Cannot retrieve locked instance %s" % self.op.instance_name
11627
    _CheckNodeOnline(self, self.instance.primary_node)
11628

    
11629
    self._cds = _GetClusterDomainSecret()
11630

    
11631
  def Exec(self, feedback_fn):
11632
    """Prepares an instance for an export.
11633

11634
    """
11635
    instance = self.instance
11636

    
11637
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11638
      salt = utils.GenerateSecret(8)
11639

    
11640
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11641
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11642
                                              constants.RIE_CERT_VALIDITY)
11643
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11644

    
11645
      (name, cert_pem) = result.payload
11646

    
11647
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11648
                                             cert_pem)
11649

    
11650
      return {
11651
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11652
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11653
                          salt),
11654
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11655
        }
11656

    
11657
    return None
11658

    
11659

    
11660
class LUBackupExport(LogicalUnit):
11661
  """Export an instance to an image in the cluster.
11662

11663
  """
11664
  HPATH = "instance-export"
11665
  HTYPE = constants.HTYPE_INSTANCE
11666
  REQ_BGL = False
11667

    
11668
  def CheckArguments(self):
11669
    """Check the arguments.
11670

11671
    """
11672
    self.x509_key_name = self.op.x509_key_name
11673
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11674

    
11675
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11676
      if not self.x509_key_name:
11677
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11678
                                   errors.ECODE_INVAL)
11679

    
11680
      if not self.dest_x509_ca_pem:
11681
        raise errors.OpPrereqError("Missing destination X509 CA",
11682
                                   errors.ECODE_INVAL)
11683

    
11684
  def ExpandNames(self):
11685
    self._ExpandAndLockInstance()
11686

    
11687
    # Lock all nodes for local exports
11688
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11689
      # FIXME: lock only instance primary and destination node
11690
      #
11691
      # Sad but true, for now we have do lock all nodes, as we don't know where
11692
      # the previous export might be, and in this LU we search for it and
11693
      # remove it from its current node. In the future we could fix this by:
11694
      #  - making a tasklet to search (share-lock all), then create the
11695
      #    new one, then one to remove, after
11696
      #  - removing the removal operation altogether
11697
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11698

    
11699
  def DeclareLocks(self, level):
11700
    """Last minute lock declaration."""
11701
    # All nodes are locked anyway, so nothing to do here.
11702

    
11703
  def BuildHooksEnv(self):
11704
    """Build hooks env.
11705

11706
    This will run on the master, primary node and target node.
11707

11708
    """
11709
    env = {
11710
      "EXPORT_MODE": self.op.mode,
11711
      "EXPORT_NODE": self.op.target_node,
11712
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11713
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11714
      # TODO: Generic function for boolean env variables
11715
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11716
      }
11717

    
11718
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11719

    
11720
    return env
11721

    
11722
  def BuildHooksNodes(self):
11723
    """Build hooks nodes.
11724

11725
    """
11726
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11727

    
11728
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11729
      nl.append(self.op.target_node)
11730

    
11731
    return (nl, nl)
11732

    
11733
  def CheckPrereq(self):
11734
    """Check prerequisites.
11735

11736
    This checks that the instance and node names are valid.
11737

11738
    """
11739
    instance_name = self.op.instance_name
11740

    
11741
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11742
    assert self.instance is not None, \
11743
          "Cannot retrieve locked instance %s" % self.op.instance_name
11744
    _CheckNodeOnline(self, self.instance.primary_node)
11745

    
11746
    if (self.op.remove_instance and self.instance.admin_up and
11747
        not self.op.shutdown):
11748
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11749
                                 " down before")
11750

    
11751
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11752
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11753
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11754
      assert self.dst_node is not None
11755

    
11756
      _CheckNodeOnline(self, self.dst_node.name)
11757
      _CheckNodeNotDrained(self, self.dst_node.name)
11758

    
11759
      self._cds = None
11760
      self.dest_disk_info = None
11761
      self.dest_x509_ca = None
11762

    
11763
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11764
      self.dst_node = None
11765

    
11766
      if len(self.op.target_node) != len(self.instance.disks):
11767
        raise errors.OpPrereqError(("Received destination information for %s"
11768
                                    " disks, but instance %s has %s disks") %
11769
                                   (len(self.op.target_node), instance_name,
11770
                                    len(self.instance.disks)),
11771
                                   errors.ECODE_INVAL)
11772

    
11773
      cds = _GetClusterDomainSecret()
11774

    
11775
      # Check X509 key name
11776
      try:
11777
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11778
      except (TypeError, ValueError), err:
11779
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11780

    
11781
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11782
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11783
                                   errors.ECODE_INVAL)
11784

    
11785
      # Load and verify CA
11786
      try:
11787
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11788
      except OpenSSL.crypto.Error, err:
11789
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11790
                                   (err, ), errors.ECODE_INVAL)
11791

    
11792
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11793
      if errcode is not None:
11794
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11795
                                   (msg, ), errors.ECODE_INVAL)
11796

    
11797
      self.dest_x509_ca = cert
11798

    
11799
      # Verify target information
11800
      disk_info = []
11801
      for idx, disk_data in enumerate(self.op.target_node):
11802
        try:
11803
          (host, port, magic) = \
11804
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11805
        except errors.GenericError, err:
11806
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11807
                                     (idx, err), errors.ECODE_INVAL)
11808

    
11809
        disk_info.append((host, port, magic))
11810

    
11811
      assert len(disk_info) == len(self.op.target_node)
11812
      self.dest_disk_info = disk_info
11813

    
11814
    else:
11815
      raise errors.ProgrammerError("Unhandled export mode %r" %
11816
                                   self.op.mode)
11817

    
11818
    # instance disk type verification
11819
    # TODO: Implement export support for file-based disks
11820
    for disk in self.instance.disks:
11821
      if disk.dev_type == constants.LD_FILE:
11822
        raise errors.OpPrereqError("Export not supported for instances with"
11823
                                   " file-based disks", errors.ECODE_INVAL)
11824

    
11825
  def _CleanupExports(self, feedback_fn):
11826
    """Removes exports of current instance from all other nodes.
11827

11828
    If an instance in a cluster with nodes A..D was exported to node C, its
11829
    exports will be removed from the nodes A, B and D.
11830

11831
    """
11832
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11833

    
11834
    nodelist = self.cfg.GetNodeList()
11835
    nodelist.remove(self.dst_node.name)
11836

    
11837
    # on one-node clusters nodelist will be empty after the removal
11838
    # if we proceed the backup would be removed because OpBackupQuery
11839
    # substitutes an empty list with the full cluster node list.
11840
    iname = self.instance.name
11841
    if nodelist:
11842
      feedback_fn("Removing old exports for instance %s" % iname)
11843
      exportlist = self.rpc.call_export_list(nodelist)
11844
      for node in exportlist:
11845
        if exportlist[node].fail_msg:
11846
          continue
11847
        if iname in exportlist[node].payload:
11848
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11849
          if msg:
11850
            self.LogWarning("Could not remove older export for instance %s"
11851
                            " on node %s: %s", iname, node, msg)
11852

    
11853
  def Exec(self, feedback_fn):
11854
    """Export an instance to an image in the cluster.
11855

11856
    """
11857
    assert self.op.mode in constants.EXPORT_MODES
11858

    
11859
    instance = self.instance
11860
    src_node = instance.primary_node
11861

    
11862
    if self.op.shutdown:
11863
      # shutdown the instance, but not the disks
11864
      feedback_fn("Shutting down instance %s" % instance.name)
11865
      result = self.rpc.call_instance_shutdown(src_node, instance,
11866
                                               self.op.shutdown_timeout)
11867
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11868
      result.Raise("Could not shutdown instance %s on"
11869
                   " node %s" % (instance.name, src_node))
11870

    
11871
    # set the disks ID correctly since call_instance_start needs the
11872
    # correct drbd minor to create the symlinks
11873
    for disk in instance.disks:
11874
      self.cfg.SetDiskID(disk, src_node)
11875

    
11876
    activate_disks = (not instance.admin_up)
11877

    
11878
    if activate_disks:
11879
      # Activate the instance disks if we'exporting a stopped instance
11880
      feedback_fn("Activating disks for %s" % instance.name)
11881
      _StartInstanceDisks(self, instance, None)
11882

    
11883
    try:
11884
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11885
                                                     instance)
11886

    
11887
      helper.CreateSnapshots()
11888
      try:
11889
        if (self.op.shutdown and instance.admin_up and
11890
            not self.op.remove_instance):
11891
          assert not activate_disks
11892
          feedback_fn("Starting instance %s" % instance.name)
11893
          result = self.rpc.call_instance_start(src_node, instance,
11894
                                                None, None, False)
11895
          msg = result.fail_msg
11896
          if msg:
11897
            feedback_fn("Failed to start instance: %s" % msg)
11898
            _ShutdownInstanceDisks(self, instance)
11899
            raise errors.OpExecError("Could not start instance: %s" % msg)
11900

    
11901
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11902
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11903
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11904
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11905
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11906

    
11907
          (key_name, _, _) = self.x509_key_name
11908

    
11909
          dest_ca_pem = \
11910
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11911
                                            self.dest_x509_ca)
11912

    
11913
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11914
                                                     key_name, dest_ca_pem,
11915
                                                     timeouts)
11916
      finally:
11917
        helper.Cleanup()
11918

    
11919
      # Check for backwards compatibility
11920
      assert len(dresults) == len(instance.disks)
11921
      assert compat.all(isinstance(i, bool) for i in dresults), \
11922
             "Not all results are boolean: %r" % dresults
11923

    
11924
    finally:
11925
      if activate_disks:
11926
        feedback_fn("Deactivating disks for %s" % instance.name)
11927
        _ShutdownInstanceDisks(self, instance)
11928

    
11929
    if not (compat.all(dresults) and fin_resu):
11930
      failures = []
11931
      if not fin_resu:
11932
        failures.append("export finalization")
11933
      if not compat.all(dresults):
11934
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11935
                               if not dsk)
11936
        failures.append("disk export: disk(s) %s" % fdsk)
11937

    
11938
      raise errors.OpExecError("Export failed, errors in %s" %
11939
                               utils.CommaJoin(failures))
11940

    
11941
    # At this point, the export was successful, we can cleanup/finish
11942

    
11943
    # Remove instance if requested
11944
    if self.op.remove_instance:
11945
      feedback_fn("Removing instance %s" % instance.name)
11946
      _RemoveInstance(self, feedback_fn, instance,
11947
                      self.op.ignore_remove_failures)
11948

    
11949
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11950
      self._CleanupExports(feedback_fn)
11951

    
11952
    return fin_resu, dresults
11953

    
11954

    
11955
class LUBackupRemove(NoHooksLU):
11956
  """Remove exports related to the named instance.
11957

11958
  """
11959
  REQ_BGL = False
11960

    
11961
  def ExpandNames(self):
11962
    self.needed_locks = {}
11963
    # We need all nodes to be locked in order for RemoveExport to work, but we
11964
    # don't need to lock the instance itself, as nothing will happen to it (and
11965
    # we can remove exports also for a removed instance)
11966
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11967

    
11968
  def Exec(self, feedback_fn):
11969
    """Remove any export.
11970

11971
    """
11972
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11973
    # If the instance was not found we'll try with the name that was passed in.
11974
    # This will only work if it was an FQDN, though.
11975
    fqdn_warn = False
11976
    if not instance_name:
11977
      fqdn_warn = True
11978
      instance_name = self.op.instance_name
11979

    
11980
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11981
    exportlist = self.rpc.call_export_list(locked_nodes)
11982
    found = False
11983
    for node in exportlist:
11984
      msg = exportlist[node].fail_msg
11985
      if msg:
11986
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11987
        continue
11988
      if instance_name in exportlist[node].payload:
11989
        found = True
11990
        result = self.rpc.call_export_remove(node, instance_name)
11991
        msg = result.fail_msg
11992
        if msg:
11993
          logging.error("Could not remove export for instance %s"
11994
                        " on node %s: %s", instance_name, node, msg)
11995

    
11996
    if fqdn_warn and not found:
11997
      feedback_fn("Export not found. If trying to remove an export belonging"
11998
                  " to a deleted instance please use its Fully Qualified"
11999
                  " Domain Name.")
12000

    
12001

    
12002
class LUGroupAdd(LogicalUnit):
12003
  """Logical unit for creating node groups.
12004

12005
  """
12006
  HPATH = "group-add"
12007
  HTYPE = constants.HTYPE_GROUP
12008
  REQ_BGL = False
12009

    
12010
  def ExpandNames(self):
12011
    # We need the new group's UUID here so that we can create and acquire the
12012
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12013
    # that it should not check whether the UUID exists in the configuration.
12014
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12015
    self.needed_locks = {}
12016
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12017

    
12018
  def CheckPrereq(self):
12019
    """Check prerequisites.
12020

12021
    This checks that the given group name is not an existing node group
12022
    already.
12023

12024
    """
12025
    try:
12026
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12027
    except errors.OpPrereqError:
12028
      pass
12029
    else:
12030
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12031
                                 " node group (UUID: %s)" %
12032
                                 (self.op.group_name, existing_uuid),
12033
                                 errors.ECODE_EXISTS)
12034

    
12035
    if self.op.ndparams:
12036
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12037

    
12038
  def BuildHooksEnv(self):
12039
    """Build hooks env.
12040

12041
    """
12042
    return {
12043
      "GROUP_NAME": self.op.group_name,
12044
      }
12045

    
12046
  def BuildHooksNodes(self):
12047
    """Build hooks nodes.
12048

12049
    """
12050
    mn = self.cfg.GetMasterNode()
12051
    return ([mn], [mn])
12052

    
12053
  def Exec(self, feedback_fn):
12054
    """Add the node group to the cluster.
12055

12056
    """
12057
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12058
                                  uuid=self.group_uuid,
12059
                                  alloc_policy=self.op.alloc_policy,
12060
                                  ndparams=self.op.ndparams)
12061

    
12062
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12063
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12064

    
12065

    
12066
class LUGroupAssignNodes(NoHooksLU):
12067
  """Logical unit for assigning nodes to groups.
12068

12069
  """
12070
  REQ_BGL = False
12071

    
12072
  def ExpandNames(self):
12073
    # These raise errors.OpPrereqError on their own:
12074
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12075
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12076

    
12077
    # We want to lock all the affected nodes and groups. We have readily
12078
    # available the list of nodes, and the *destination* group. To gather the
12079
    # list of "source" groups, we need to fetch node information later on.
12080
    self.needed_locks = {
12081
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12082
      locking.LEVEL_NODE: self.op.nodes,
12083
      }
12084

    
12085
  def DeclareLocks(self, level):
12086
    if level == locking.LEVEL_NODEGROUP:
12087
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12088

    
12089
      # Try to get all affected nodes' groups without having the group or node
12090
      # lock yet. Needs verification later in the code flow.
12091
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12092

    
12093
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12094

    
12095
  def CheckPrereq(self):
12096
    """Check prerequisites.
12097

12098
    """
12099
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12100
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12101
            frozenset(self.op.nodes))
12102

    
12103
    expected_locks = (set([self.group_uuid]) |
12104
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12105
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12106
    if actual_locks != expected_locks:
12107
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12108
                               " current groups are '%s', used to be '%s'" %
12109
                               (utils.CommaJoin(expected_locks),
12110
                                utils.CommaJoin(actual_locks)))
12111

    
12112
    self.node_data = self.cfg.GetAllNodesInfo()
12113
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12114
    instance_data = self.cfg.GetAllInstancesInfo()
12115

    
12116
    if self.group is None:
12117
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12118
                               (self.op.group_name, self.group_uuid))
12119

    
12120
    (new_splits, previous_splits) = \
12121
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12122
                                             for node in self.op.nodes],
12123
                                            self.node_data, instance_data)
12124

    
12125
    if new_splits:
12126
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12127

    
12128
      if not self.op.force:
12129
        raise errors.OpExecError("The following instances get split by this"
12130
                                 " change and --force was not given: %s" %
12131
                                 fmt_new_splits)
12132
      else:
12133
        self.LogWarning("This operation will split the following instances: %s",
12134
                        fmt_new_splits)
12135

    
12136
        if previous_splits:
12137
          self.LogWarning("In addition, these already-split instances continue"
12138
                          " to be split across groups: %s",
12139
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12140

    
12141
  def Exec(self, feedback_fn):
12142
    """Assign nodes to a new group.
12143

12144
    """
12145
    for node in self.op.nodes:
12146
      self.node_data[node].group = self.group_uuid
12147

    
12148
    # FIXME: Depends on side-effects of modifying the result of
12149
    # C{cfg.GetAllNodesInfo}
12150

    
12151
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12152

    
12153
  @staticmethod
12154
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12155
    """Check for split instances after a node assignment.
12156

12157
    This method considers a series of node assignments as an atomic operation,
12158
    and returns information about split instances after applying the set of
12159
    changes.
12160

12161
    In particular, it returns information about newly split instances, and
12162
    instances that were already split, and remain so after the change.
12163

12164
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12165
    considered.
12166

12167
    @type changes: list of (node_name, new_group_uuid) pairs.
12168
    @param changes: list of node assignments to consider.
12169
    @param node_data: a dict with data for all nodes
12170
    @param instance_data: a dict with all instances to consider
12171
    @rtype: a two-tuple
12172
    @return: a list of instances that were previously okay and result split as a
12173
      consequence of this change, and a list of instances that were previously
12174
      split and this change does not fix.
12175

12176
    """
12177
    changed_nodes = dict((node, group) for node, group in changes
12178
                         if node_data[node].group != group)
12179

    
12180
    all_split_instances = set()
12181
    previously_split_instances = set()
12182

    
12183
    def InstanceNodes(instance):
12184
      return [instance.primary_node] + list(instance.secondary_nodes)
12185

    
12186
    for inst in instance_data.values():
12187
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12188
        continue
12189

    
12190
      instance_nodes = InstanceNodes(inst)
12191

    
12192
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12193
        previously_split_instances.add(inst.name)
12194

    
12195
      if len(set(changed_nodes.get(node, node_data[node].group)
12196
                 for node in instance_nodes)) > 1:
12197
        all_split_instances.add(inst.name)
12198

    
12199
    return (list(all_split_instances - previously_split_instances),
12200
            list(previously_split_instances & all_split_instances))
12201

    
12202

    
12203
class _GroupQuery(_QueryBase):
12204
  FIELDS = query.GROUP_FIELDS
12205

    
12206
  def ExpandNames(self, lu):
12207
    lu.needed_locks = {}
12208

    
12209
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12210
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12211

    
12212
    if not self.names:
12213
      self.wanted = [name_to_uuid[name]
12214
                     for name in utils.NiceSort(name_to_uuid.keys())]
12215
    else:
12216
      # Accept names to be either names or UUIDs.
12217
      missing = []
12218
      self.wanted = []
12219
      all_uuid = frozenset(self._all_groups.keys())
12220

    
12221
      for name in self.names:
12222
        if name in all_uuid:
12223
          self.wanted.append(name)
12224
        elif name in name_to_uuid:
12225
          self.wanted.append(name_to_uuid[name])
12226
        else:
12227
          missing.append(name)
12228

    
12229
      if missing:
12230
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12231
                                   utils.CommaJoin(missing),
12232
                                   errors.ECODE_NOENT)
12233

    
12234
  def DeclareLocks(self, lu, level):
12235
    pass
12236

    
12237
  def _GetQueryData(self, lu):
12238
    """Computes the list of node groups and their attributes.
12239

12240
    """
12241
    do_nodes = query.GQ_NODE in self.requested_data
12242
    do_instances = query.GQ_INST in self.requested_data
12243

    
12244
    group_to_nodes = None
12245
    group_to_instances = None
12246

    
12247
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12248
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12249
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12250
    # instance->node. Hence, we will need to process nodes even if we only need
12251
    # instance information.
12252
    if do_nodes or do_instances:
12253
      all_nodes = lu.cfg.GetAllNodesInfo()
12254
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12255
      node_to_group = {}
12256

    
12257
      for node in all_nodes.values():
12258
        if node.group in group_to_nodes:
12259
          group_to_nodes[node.group].append(node.name)
12260
          node_to_group[node.name] = node.group
12261

    
12262
      if do_instances:
12263
        all_instances = lu.cfg.GetAllInstancesInfo()
12264
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12265

    
12266
        for instance in all_instances.values():
12267
          node = instance.primary_node
12268
          if node in node_to_group:
12269
            group_to_instances[node_to_group[node]].append(instance.name)
12270

    
12271
        if not do_nodes:
12272
          # Do not pass on node information if it was not requested.
12273
          group_to_nodes = None
12274

    
12275
    return query.GroupQueryData([self._all_groups[uuid]
12276
                                 for uuid in self.wanted],
12277
                                group_to_nodes, group_to_instances)
12278

    
12279

    
12280
class LUGroupQuery(NoHooksLU):
12281
  """Logical unit for querying node groups.
12282

12283
  """
12284
  REQ_BGL = False
12285

    
12286
  def CheckArguments(self):
12287
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12288
                          self.op.output_fields, False)
12289

    
12290
  def ExpandNames(self):
12291
    self.gq.ExpandNames(self)
12292

    
12293
  def DeclareLocks(self, level):
12294
    self.gq.DeclareLocks(self, level)
12295

    
12296
  def Exec(self, feedback_fn):
12297
    return self.gq.OldStyleQuery(self)
12298

    
12299

    
12300
class LUGroupSetParams(LogicalUnit):
12301
  """Modifies the parameters of a node group.
12302

12303
  """
12304
  HPATH = "group-modify"
12305
  HTYPE = constants.HTYPE_GROUP
12306
  REQ_BGL = False
12307

    
12308
  def CheckArguments(self):
12309
    all_changes = [
12310
      self.op.ndparams,
12311
      self.op.alloc_policy,
12312
      ]
12313

    
12314
    if all_changes.count(None) == len(all_changes):
12315
      raise errors.OpPrereqError("Please pass at least one modification",
12316
                                 errors.ECODE_INVAL)
12317

    
12318
  def ExpandNames(self):
12319
    # This raises errors.OpPrereqError on its own:
12320
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12321

    
12322
    self.needed_locks = {
12323
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12324
      }
12325

    
12326
  def CheckPrereq(self):
12327
    """Check prerequisites.
12328

12329
    """
12330
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12331

    
12332
    if self.group is None:
12333
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12334
                               (self.op.group_name, self.group_uuid))
12335

    
12336
    if self.op.ndparams:
12337
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12338
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12339
      self.new_ndparams = new_ndparams
12340

    
12341
  def BuildHooksEnv(self):
12342
    """Build hooks env.
12343

12344
    """
12345
    return {
12346
      "GROUP_NAME": self.op.group_name,
12347
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12348
      }
12349

    
12350
  def BuildHooksNodes(self):
12351
    """Build hooks nodes.
12352

12353
    """
12354
    mn = self.cfg.GetMasterNode()
12355
    return ([mn], [mn])
12356

    
12357
  def Exec(self, feedback_fn):
12358
    """Modifies the node group.
12359

12360
    """
12361
    result = []
12362

    
12363
    if self.op.ndparams:
12364
      self.group.ndparams = self.new_ndparams
12365
      result.append(("ndparams", str(self.group.ndparams)))
12366

    
12367
    if self.op.alloc_policy:
12368
      self.group.alloc_policy = self.op.alloc_policy
12369

    
12370
    self.cfg.Update(self.group, feedback_fn)
12371
    return result
12372

    
12373

    
12374
class LUGroupRemove(LogicalUnit):
12375
  HPATH = "group-remove"
12376
  HTYPE = constants.HTYPE_GROUP
12377
  REQ_BGL = False
12378

    
12379
  def ExpandNames(self):
12380
    # This will raises errors.OpPrereqError on its own:
12381
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12382
    self.needed_locks = {
12383
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12384
      }
12385

    
12386
  def CheckPrereq(self):
12387
    """Check prerequisites.
12388

12389
    This checks that the given group name exists as a node group, that is
12390
    empty (i.e., contains no nodes), and that is not the last group of the
12391
    cluster.
12392

12393
    """
12394
    # Verify that the group is empty.
12395
    group_nodes = [node.name
12396
                   for node in self.cfg.GetAllNodesInfo().values()
12397
                   if node.group == self.group_uuid]
12398

    
12399
    if group_nodes:
12400
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12401
                                 " nodes: %s" %
12402
                                 (self.op.group_name,
12403
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12404
                                 errors.ECODE_STATE)
12405

    
12406
    # Verify the cluster would not be left group-less.
12407
    if len(self.cfg.GetNodeGroupList()) == 1:
12408
      raise errors.OpPrereqError("Group '%s' is the only group,"
12409
                                 " cannot be removed" %
12410
                                 self.op.group_name,
12411
                                 errors.ECODE_STATE)
12412

    
12413
  def BuildHooksEnv(self):
12414
    """Build hooks env.
12415

12416
    """
12417
    return {
12418
      "GROUP_NAME": self.op.group_name,
12419
      }
12420

    
12421
  def BuildHooksNodes(self):
12422
    """Build hooks nodes.
12423

12424
    """
12425
    mn = self.cfg.GetMasterNode()
12426
    return ([mn], [mn])
12427

    
12428
  def Exec(self, feedback_fn):
12429
    """Remove the node group.
12430

12431
    """
12432
    try:
12433
      self.cfg.RemoveNodeGroup(self.group_uuid)
12434
    except errors.ConfigurationError:
12435
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12436
                               (self.op.group_name, self.group_uuid))
12437

    
12438
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12439

    
12440

    
12441
class LUGroupRename(LogicalUnit):
12442
  HPATH = "group-rename"
12443
  HTYPE = constants.HTYPE_GROUP
12444
  REQ_BGL = False
12445

    
12446
  def ExpandNames(self):
12447
    # This raises errors.OpPrereqError on its own:
12448
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12449

    
12450
    self.needed_locks = {
12451
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12452
      }
12453

    
12454
  def CheckPrereq(self):
12455
    """Check prerequisites.
12456

12457
    Ensures requested new name is not yet used.
12458

12459
    """
12460
    try:
12461
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12462
    except errors.OpPrereqError:
12463
      pass
12464
    else:
12465
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12466
                                 " node group (UUID: %s)" %
12467
                                 (self.op.new_name, new_name_uuid),
12468
                                 errors.ECODE_EXISTS)
12469

    
12470
  def BuildHooksEnv(self):
12471
    """Build hooks env.
12472

12473
    """
12474
    return {
12475
      "OLD_NAME": self.op.group_name,
12476
      "NEW_NAME": self.op.new_name,
12477
      }
12478

    
12479
  def BuildHooksNodes(self):
12480
    """Build hooks nodes.
12481

12482
    """
12483
    mn = self.cfg.GetMasterNode()
12484

    
12485
    all_nodes = self.cfg.GetAllNodesInfo()
12486
    all_nodes.pop(mn, None)
12487

    
12488
    run_nodes = [mn]
12489
    run_nodes.extend(node.name for node in all_nodes.values()
12490
                     if node.group == self.group_uuid)
12491

    
12492
    return (run_nodes, run_nodes)
12493

    
12494
  def Exec(self, feedback_fn):
12495
    """Rename the node group.
12496

12497
    """
12498
    group = self.cfg.GetNodeGroup(self.group_uuid)
12499

    
12500
    if group is None:
12501
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12502
                               (self.op.group_name, self.group_uuid))
12503

    
12504
    group.name = self.op.new_name
12505
    self.cfg.Update(group, feedback_fn)
12506

    
12507
    return self.op.new_name
12508

    
12509

    
12510
class LUGroupEvacuate(LogicalUnit):
12511
  HPATH = "group-evacuate"
12512
  HTYPE = constants.HTYPE_GROUP
12513
  REQ_BGL = False
12514

    
12515
  def ExpandNames(self):
12516
    # This raises errors.OpPrereqError on its own:
12517
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12518

    
12519
    if self.op.target_groups:
12520
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12521
                                  self.op.target_groups)
12522
    else:
12523
      self.req_target_uuids = []
12524

    
12525
    if self.group_uuid in self.req_target_uuids:
12526
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12527
                                 " as a target group (targets are %s)" %
12528
                                 (self.group_uuid,
12529
                                  utils.CommaJoin(self.req_target_uuids)),
12530
                                 errors.ECODE_INVAL)
12531

    
12532
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12533

    
12534
    self.share_locks = _ShareAll()
12535
    self.needed_locks = {
12536
      locking.LEVEL_INSTANCE: [],
12537
      locking.LEVEL_NODEGROUP: [],
12538
      locking.LEVEL_NODE: [],
12539
      }
12540

    
12541
  def DeclareLocks(self, level):
12542
    if level == locking.LEVEL_INSTANCE:
12543
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12544

    
12545
      # Lock instances optimistically, needs verification once node and group
12546
      # locks have been acquired
12547
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12548
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12549

    
12550
    elif level == locking.LEVEL_NODEGROUP:
12551
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12552

    
12553
      if self.req_target_uuids:
12554
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12555

    
12556
        # Lock all groups used by instances optimistically; this requires going
12557
        # via the node before it's locked, requiring verification later on
12558
        lock_groups.update(group_uuid
12559
                           for instance_name in
12560
                             self.owned_locks(locking.LEVEL_INSTANCE)
12561
                           for group_uuid in
12562
                             self.cfg.GetInstanceNodeGroups(instance_name))
12563
      else:
12564
        # No target groups, need to lock all of them
12565
        lock_groups = locking.ALL_SET
12566

    
12567
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12568

    
12569
    elif level == locking.LEVEL_NODE:
12570
      # This will only lock the nodes in the group to be evacuated which
12571
      # contain actual instances
12572
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12573
      self._LockInstancesNodes()
12574

    
12575
      # Lock all nodes in group to be evacuated and target groups
12576
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12577
      assert self.group_uuid in owned_groups
12578
      member_nodes = [node_name
12579
                      for group in owned_groups
12580
                      for node_name in self.cfg.GetNodeGroup(group).members]
12581
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12582

    
12583
  def CheckPrereq(self):
12584
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12585
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12586
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12587

    
12588
    assert owned_groups.issuperset(self.req_target_uuids)
12589
    assert self.group_uuid in owned_groups
12590

    
12591
    # Check if locked instances are still correct
12592
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12593

    
12594
    # Get instance information
12595
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12596

    
12597
    # Check if node groups for locked instances are still correct
12598
    for instance_name in owned_instances:
12599
      inst = self.instances[instance_name]
12600
      assert owned_nodes.issuperset(inst.all_nodes), \
12601
        "Instance %s's nodes changed while we kept the lock" % instance_name
12602

    
12603
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12604
                                             owned_groups)
12605

    
12606
      assert self.group_uuid in inst_groups, \
12607
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12608

    
12609
    if self.req_target_uuids:
12610
      # User requested specific target groups
12611
      self.target_uuids = self.req_target_uuids
12612
    else:
12613
      # All groups except the one to be evacuated are potential targets
12614
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12615
                           if group_uuid != self.group_uuid]
12616

    
12617
      if not self.target_uuids:
12618
        raise errors.OpPrereqError("There are no possible target groups",
12619
                                   errors.ECODE_INVAL)
12620

    
12621
  def BuildHooksEnv(self):
12622
    """Build hooks env.
12623

12624
    """
12625
    return {
12626
      "GROUP_NAME": self.op.group_name,
12627
      "TARGET_GROUPS": " ".join(self.target_uuids),
12628
      }
12629

    
12630
  def BuildHooksNodes(self):
12631
    """Build hooks nodes.
12632

12633
    """
12634
    mn = self.cfg.GetMasterNode()
12635

    
12636
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12637

    
12638
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12639

    
12640
    return (run_nodes, run_nodes)
12641

    
12642
  def Exec(self, feedback_fn):
12643
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12644

    
12645
    assert self.group_uuid not in self.target_uuids
12646

    
12647
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12648
                     instances=instances, target_groups=self.target_uuids)
12649

    
12650
    ial.Run(self.op.iallocator)
12651

    
12652
    if not ial.success:
12653
      raise errors.OpPrereqError("Can't compute group evacuation using"
12654
                                 " iallocator '%s': %s" %
12655
                                 (self.op.iallocator, ial.info),
12656
                                 errors.ECODE_NORES)
12657

    
12658
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12659

    
12660
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12661
                 len(jobs), self.op.group_name)
12662

    
12663
    return ResultWithJobs(jobs)
12664

    
12665

    
12666
class TagsLU(NoHooksLU): # pylint: disable=W0223
12667
  """Generic tags LU.
12668

12669
  This is an abstract class which is the parent of all the other tags LUs.
12670

12671
  """
12672
  def ExpandNames(self):
12673
    self.group_uuid = None
12674
    self.needed_locks = {}
12675
    if self.op.kind == constants.TAG_NODE:
12676
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12677
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12678
    elif self.op.kind == constants.TAG_INSTANCE:
12679
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12680
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12681
    elif self.op.kind == constants.TAG_NODEGROUP:
12682
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12683

    
12684
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12685
    # not possible to acquire the BGL based on opcode parameters)
12686

    
12687
  def CheckPrereq(self):
12688
    """Check prerequisites.
12689

12690
    """
12691
    if self.op.kind == constants.TAG_CLUSTER:
12692
      self.target = self.cfg.GetClusterInfo()
12693
    elif self.op.kind == constants.TAG_NODE:
12694
      self.target = self.cfg.GetNodeInfo(self.op.name)
12695
    elif self.op.kind == constants.TAG_INSTANCE:
12696
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12697
    elif self.op.kind == constants.TAG_NODEGROUP:
12698
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12699
    else:
12700
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12701
                                 str(self.op.kind), errors.ECODE_INVAL)
12702

    
12703

    
12704
class LUTagsGet(TagsLU):
12705
  """Returns the tags of a given object.
12706

12707
  """
12708
  REQ_BGL = False
12709

    
12710
  def ExpandNames(self):
12711
    TagsLU.ExpandNames(self)
12712

    
12713
    # Share locks as this is only a read operation
12714
    self.share_locks = _ShareAll()
12715

    
12716
  def Exec(self, feedback_fn):
12717
    """Returns the tag list.
12718

12719
    """
12720
    return list(self.target.GetTags())
12721

    
12722

    
12723
class LUTagsSearch(NoHooksLU):
12724
  """Searches the tags for a given pattern.
12725

12726
  """
12727
  REQ_BGL = False
12728

    
12729
  def ExpandNames(self):
12730
    self.needed_locks = {}
12731

    
12732
  def CheckPrereq(self):
12733
    """Check prerequisites.
12734

12735
    This checks the pattern passed for validity by compiling it.
12736

12737
    """
12738
    try:
12739
      self.re = re.compile(self.op.pattern)
12740
    except re.error, err:
12741
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12742
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12743

    
12744
  def Exec(self, feedback_fn):
12745
    """Returns the tag list.
12746

12747
    """
12748
    cfg = self.cfg
12749
    tgts = [("/cluster", cfg.GetClusterInfo())]
12750
    ilist = cfg.GetAllInstancesInfo().values()
12751
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12752
    nlist = cfg.GetAllNodesInfo().values()
12753
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12754
    tgts.extend(("/nodegroup/%s" % n.name, n)
12755
                for n in cfg.GetAllNodeGroupsInfo().values())
12756
    results = []
12757
    for path, target in tgts:
12758
      for tag in target.GetTags():
12759
        if self.re.search(tag):
12760
          results.append((path, tag))
12761
    return results
12762

    
12763

    
12764
class LUTagsSet(TagsLU):
12765
  """Sets a tag on a given object.
12766

12767
  """
12768
  REQ_BGL = False
12769

    
12770
  def CheckPrereq(self):
12771
    """Check prerequisites.
12772

12773
    This checks the type and length of the tag name and value.
12774

12775
    """
12776
    TagsLU.CheckPrereq(self)
12777
    for tag in self.op.tags:
12778
      objects.TaggableObject.ValidateTag(tag)
12779

    
12780
  def Exec(self, feedback_fn):
12781
    """Sets the tag.
12782

12783
    """
12784
    try:
12785
      for tag in self.op.tags:
12786
        self.target.AddTag(tag)
12787
    except errors.TagError, err:
12788
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12789
    self.cfg.Update(self.target, feedback_fn)
12790

    
12791

    
12792
class LUTagsDel(TagsLU):
12793
  """Delete a list of tags from a given object.
12794

12795
  """
12796
  REQ_BGL = False
12797

    
12798
  def CheckPrereq(self):
12799
    """Check prerequisites.
12800

12801
    This checks that we have the given tag.
12802

12803
    """
12804
    TagsLU.CheckPrereq(self)
12805
    for tag in self.op.tags:
12806
      objects.TaggableObject.ValidateTag(tag)
12807
    del_tags = frozenset(self.op.tags)
12808
    cur_tags = self.target.GetTags()
12809

    
12810
    diff_tags = del_tags - cur_tags
12811
    if diff_tags:
12812
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12813
      raise errors.OpPrereqError("Tag(s) %s not found" %
12814
                                 (utils.CommaJoin(diff_names), ),
12815
                                 errors.ECODE_NOENT)
12816

    
12817
  def Exec(self, feedback_fn):
12818
    """Remove the tag from the object.
12819

12820
    """
12821
    for tag in self.op.tags:
12822
      self.target.RemoveTag(tag)
12823
    self.cfg.Update(self.target, feedback_fn)
12824

    
12825

    
12826
class LUTestDelay(NoHooksLU):
12827
  """Sleep for a specified amount of time.
12828

12829
  This LU sleeps on the master and/or nodes for a specified amount of
12830
  time.
12831

12832
  """
12833
  REQ_BGL = False
12834

    
12835
  def ExpandNames(self):
12836
    """Expand names and set required locks.
12837

12838
    This expands the node list, if any.
12839

12840
    """
12841
    self.needed_locks = {}
12842
    if self.op.on_nodes:
12843
      # _GetWantedNodes can be used here, but is not always appropriate to use
12844
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12845
      # more information.
12846
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12847
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12848

    
12849
  def _TestDelay(self):
12850
    """Do the actual sleep.
12851

12852
    """
12853
    if self.op.on_master:
12854
      if not utils.TestDelay(self.op.duration):
12855
        raise errors.OpExecError("Error during master delay test")
12856
    if self.op.on_nodes:
12857
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12858
      for node, node_result in result.items():
12859
        node_result.Raise("Failure during rpc call to node %s" % node)
12860

    
12861
  def Exec(self, feedback_fn):
12862
    """Execute the test delay opcode, with the wanted repetitions.
12863

12864
    """
12865
    if self.op.repeat == 0:
12866
      self._TestDelay()
12867
    else:
12868
      top_value = self.op.repeat - 1
12869
      for i in range(self.op.repeat):
12870
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12871
        self._TestDelay()
12872

    
12873

    
12874
class LUTestJqueue(NoHooksLU):
12875
  """Utility LU to test some aspects of the job queue.
12876

12877
  """
12878
  REQ_BGL = False
12879

    
12880
  # Must be lower than default timeout for WaitForJobChange to see whether it
12881
  # notices changed jobs
12882
  _CLIENT_CONNECT_TIMEOUT = 20.0
12883
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12884

    
12885
  @classmethod
12886
  def _NotifyUsingSocket(cls, cb, errcls):
12887
    """Opens a Unix socket and waits for another program to connect.
12888

12889
    @type cb: callable
12890
    @param cb: Callback to send socket name to client
12891
    @type errcls: class
12892
    @param errcls: Exception class to use for errors
12893

12894
    """
12895
    # Using a temporary directory as there's no easy way to create temporary
12896
    # sockets without writing a custom loop around tempfile.mktemp and
12897
    # socket.bind
12898
    tmpdir = tempfile.mkdtemp()
12899
    try:
12900
      tmpsock = utils.PathJoin(tmpdir, "sock")
12901

    
12902
      logging.debug("Creating temporary socket at %s", tmpsock)
12903
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12904
      try:
12905
        sock.bind(tmpsock)
12906
        sock.listen(1)
12907

    
12908
        # Send details to client
12909
        cb(tmpsock)
12910

    
12911
        # Wait for client to connect before continuing
12912
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12913
        try:
12914
          (conn, _) = sock.accept()
12915
        except socket.error, err:
12916
          raise errcls("Client didn't connect in time (%s)" % err)
12917
      finally:
12918
        sock.close()
12919
    finally:
12920
      # Remove as soon as client is connected
12921
      shutil.rmtree(tmpdir)
12922

    
12923
    # Wait for client to close
12924
    try:
12925
      try:
12926
        # pylint: disable=E1101
12927
        # Instance of '_socketobject' has no ... member
12928
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12929
        conn.recv(1)
12930
      except socket.error, err:
12931
        raise errcls("Client failed to confirm notification (%s)" % err)
12932
    finally:
12933
      conn.close()
12934

    
12935
  def _SendNotification(self, test, arg, sockname):
12936
    """Sends a notification to the client.
12937

12938
    @type test: string
12939
    @param test: Test name
12940
    @param arg: Test argument (depends on test)
12941
    @type sockname: string
12942
    @param sockname: Socket path
12943

12944
    """
12945
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12946

    
12947
  def _Notify(self, prereq, test, arg):
12948
    """Notifies the client of a test.
12949

12950
    @type prereq: bool
12951
    @param prereq: Whether this is a prereq-phase test
12952
    @type test: string
12953
    @param test: Test name
12954
    @param arg: Test argument (depends on test)
12955

12956
    """
12957
    if prereq:
12958
      errcls = errors.OpPrereqError
12959
    else:
12960
      errcls = errors.OpExecError
12961

    
12962
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12963
                                                  test, arg),
12964
                                   errcls)
12965

    
12966
  def CheckArguments(self):
12967
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12968
    self.expandnames_calls = 0
12969

    
12970
  def ExpandNames(self):
12971
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12972
    if checkargs_calls < 1:
12973
      raise errors.ProgrammerError("CheckArguments was not called")
12974

    
12975
    self.expandnames_calls += 1
12976

    
12977
    if self.op.notify_waitlock:
12978
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12979

    
12980
    self.LogInfo("Expanding names")
12981

    
12982
    # Get lock on master node (just to get a lock, not for a particular reason)
12983
    self.needed_locks = {
12984
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12985
      }
12986

    
12987
  def Exec(self, feedback_fn):
12988
    if self.expandnames_calls < 1:
12989
      raise errors.ProgrammerError("ExpandNames was not called")
12990

    
12991
    if self.op.notify_exec:
12992
      self._Notify(False, constants.JQT_EXEC, None)
12993

    
12994
    self.LogInfo("Executing")
12995

    
12996
    if self.op.log_messages:
12997
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12998
      for idx, msg in enumerate(self.op.log_messages):
12999
        self.LogInfo("Sending log message %s", idx + 1)
13000
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13001
        # Report how many test messages have been sent
13002
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13003

    
13004
    if self.op.fail:
13005
      raise errors.OpExecError("Opcode failure was requested")
13006

    
13007
    return True
13008

    
13009

    
13010
class IAllocator(object):
13011
  """IAllocator framework.
13012

13013
  An IAllocator instance has three sets of attributes:
13014
    - cfg that is needed to query the cluster
13015
    - input data (all members of the _KEYS class attribute are required)
13016
    - four buffer attributes (in|out_data|text), that represent the
13017
      input (to the external script) in text and data structure format,
13018
      and the output from it, again in two formats
13019
    - the result variables from the script (success, info, nodes) for
13020
      easy usage
13021

13022
  """
13023
  # pylint: disable=R0902
13024
  # lots of instance attributes
13025

    
13026
  def __init__(self, cfg, rpc, mode, **kwargs):
13027
    self.cfg = cfg
13028
    self.rpc = rpc
13029
    # init buffer variables
13030
    self.in_text = self.out_text = self.in_data = self.out_data = None
13031
    # init all input fields so that pylint is happy
13032
    self.mode = mode
13033
    self.memory = self.disks = self.disk_template = None
13034
    self.os = self.tags = self.nics = self.vcpus = None
13035
    self.hypervisor = None
13036
    self.relocate_from = None
13037
    self.name = None
13038
    self.instances = None
13039
    self.evac_mode = None
13040
    self.target_groups = []
13041
    # computed fields
13042
    self.required_nodes = None
13043
    # init result fields
13044
    self.success = self.info = self.result = None
13045

    
13046
    try:
13047
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13048
    except KeyError:
13049
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13050
                                   " IAllocator" % self.mode)
13051

    
13052
    keyset = [n for (n, _) in keydata]
13053

    
13054
    for key in kwargs:
13055
      if key not in keyset:
13056
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13057
                                     " IAllocator" % key)
13058
      setattr(self, key, kwargs[key])
13059

    
13060
    for key in keyset:
13061
      if key not in kwargs:
13062
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13063
                                     " IAllocator" % key)
13064
    self._BuildInputData(compat.partial(fn, self), keydata)
13065

    
13066
  def _ComputeClusterData(self):
13067
    """Compute the generic allocator input data.
13068

13069
    This is the data that is independent of the actual operation.
13070

13071
    """
13072
    cfg = self.cfg
13073
    cluster_info = cfg.GetClusterInfo()
13074
    # cluster data
13075
    data = {
13076
      "version": constants.IALLOCATOR_VERSION,
13077
      "cluster_name": cfg.GetClusterName(),
13078
      "cluster_tags": list(cluster_info.GetTags()),
13079
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13080
      # we don't have job IDs
13081
      }
13082
    ninfo = cfg.GetAllNodesInfo()
13083
    iinfo = cfg.GetAllInstancesInfo().values()
13084
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13085

    
13086
    # node data
13087
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13088

    
13089
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13090
      hypervisor_name = self.hypervisor
13091
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13092
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13093
    else:
13094
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13095

    
13096
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13097
                                        hypervisor_name)
13098
    node_iinfo = \
13099
      self.rpc.call_all_instances_info(node_list,
13100
                                       cluster_info.enabled_hypervisors)
13101

    
13102
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13103

    
13104
    config_ndata = self._ComputeBasicNodeData(ninfo)
13105
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13106
                                                 i_list, config_ndata)
13107
    assert len(data["nodes"]) == len(ninfo), \
13108
        "Incomplete node data computed"
13109

    
13110
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13111

    
13112
    self.in_data = data
13113

    
13114
  @staticmethod
13115
  def _ComputeNodeGroupData(cfg):
13116
    """Compute node groups data.
13117

13118
    """
13119
    ng = dict((guuid, {
13120
      "name": gdata.name,
13121
      "alloc_policy": gdata.alloc_policy,
13122
      })
13123
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13124

    
13125
    return ng
13126

    
13127
  @staticmethod
13128
  def _ComputeBasicNodeData(node_cfg):
13129
    """Compute global node data.
13130

13131
    @rtype: dict
13132
    @returns: a dict of name: (node dict, node config)
13133

13134
    """
13135
    # fill in static (config-based) values
13136
    node_results = dict((ninfo.name, {
13137
      "tags": list(ninfo.GetTags()),
13138
      "primary_ip": ninfo.primary_ip,
13139
      "secondary_ip": ninfo.secondary_ip,
13140
      "offline": ninfo.offline,
13141
      "drained": ninfo.drained,
13142
      "master_candidate": ninfo.master_candidate,
13143
      "group": ninfo.group,
13144
      "master_capable": ninfo.master_capable,
13145
      "vm_capable": ninfo.vm_capable,
13146
      })
13147
      for ninfo in node_cfg.values())
13148

    
13149
    return node_results
13150

    
13151
  @staticmethod
13152
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13153
                              node_results):
13154
    """Compute global node data.
13155

13156
    @param node_results: the basic node structures as filled from the config
13157

13158
    """
13159
    # make a copy of the current dict
13160
    node_results = dict(node_results)
13161
    for nname, nresult in node_data.items():
13162
      assert nname in node_results, "Missing basic data for node %s" % nname
13163
      ninfo = node_cfg[nname]
13164

    
13165
      if not (ninfo.offline or ninfo.drained):
13166
        nresult.Raise("Can't get data for node %s" % nname)
13167
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13168
                                nname)
13169
        remote_info = nresult.payload
13170

    
13171
        for attr in ["memory_total", "memory_free", "memory_dom0",
13172
                     "vg_size", "vg_free", "cpu_total"]:
13173
          if attr not in remote_info:
13174
            raise errors.OpExecError("Node '%s' didn't return attribute"
13175
                                     " '%s'" % (nname, attr))
13176
          if not isinstance(remote_info[attr], int):
13177
            raise errors.OpExecError("Node '%s' returned invalid value"
13178
                                     " for '%s': %s" %
13179
                                     (nname, attr, remote_info[attr]))
13180
        # compute memory used by primary instances
13181
        i_p_mem = i_p_up_mem = 0
13182
        for iinfo, beinfo in i_list:
13183
          if iinfo.primary_node == nname:
13184
            i_p_mem += beinfo[constants.BE_MEMORY]
13185
            if iinfo.name not in node_iinfo[nname].payload:
13186
              i_used_mem = 0
13187
            else:
13188
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13189
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13190
            remote_info["memory_free"] -= max(0, i_mem_diff)
13191

    
13192
            if iinfo.admin_up:
13193
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13194

    
13195
        # compute memory used by instances
13196
        pnr_dyn = {
13197
          "total_memory": remote_info["memory_total"],
13198
          "reserved_memory": remote_info["memory_dom0"],
13199
          "free_memory": remote_info["memory_free"],
13200
          "total_disk": remote_info["vg_size"],
13201
          "free_disk": remote_info["vg_free"],
13202
          "total_cpus": remote_info["cpu_total"],
13203
          "i_pri_memory": i_p_mem,
13204
          "i_pri_up_memory": i_p_up_mem,
13205
          }
13206
        pnr_dyn.update(node_results[nname])
13207
        node_results[nname] = pnr_dyn
13208

    
13209
    return node_results
13210

    
13211
  @staticmethod
13212
  def _ComputeInstanceData(cluster_info, i_list):
13213
    """Compute global instance data.
13214

13215
    """
13216
    instance_data = {}
13217
    for iinfo, beinfo in i_list:
13218
      nic_data = []
13219
      for nic in iinfo.nics:
13220
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13221
        nic_dict = {
13222
          "mac": nic.mac,
13223
          "ip": nic.ip,
13224
          "mode": filled_params[constants.NIC_MODE],
13225
          "link": filled_params[constants.NIC_LINK],
13226
          }
13227
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13228
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13229
        nic_data.append(nic_dict)
13230
      pir = {
13231
        "tags": list(iinfo.GetTags()),
13232
        "admin_up": iinfo.admin_up,
13233
        "vcpus": beinfo[constants.BE_VCPUS],
13234
        "memory": beinfo[constants.BE_MEMORY],
13235
        "os": iinfo.os,
13236
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13237
        "nics": nic_data,
13238
        "disks": [{constants.IDISK_SIZE: dsk.size,
13239
                   constants.IDISK_MODE: dsk.mode}
13240
                  for dsk in iinfo.disks],
13241
        "disk_template": iinfo.disk_template,
13242
        "hypervisor": iinfo.hypervisor,
13243
        }
13244
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13245
                                                 pir["disks"])
13246
      instance_data[iinfo.name] = pir
13247

    
13248
    return instance_data
13249

    
13250
  def _AddNewInstance(self):
13251
    """Add new instance data to allocator structure.
13252

13253
    This in combination with _AllocatorGetClusterData will create the
13254
    correct structure needed as input for the allocator.
13255

13256
    The checks for the completeness of the opcode must have already been
13257
    done.
13258

13259
    """
13260
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13261

    
13262
    if self.disk_template in constants.DTS_INT_MIRROR:
13263
      self.required_nodes = 2
13264
    else:
13265
      self.required_nodes = 1
13266

    
13267
    request = {
13268
      "name": self.name,
13269
      "disk_template": self.disk_template,
13270
      "tags": self.tags,
13271
      "os": self.os,
13272
      "vcpus": self.vcpus,
13273
      "memory": self.memory,
13274
      "disks": self.disks,
13275
      "disk_space_total": disk_space,
13276
      "nics": self.nics,
13277
      "required_nodes": self.required_nodes,
13278
      "hypervisor": self.hypervisor,
13279
      }
13280

    
13281
    return request
13282

    
13283
  def _AddRelocateInstance(self):
13284
    """Add relocate instance data to allocator structure.
13285

13286
    This in combination with _IAllocatorGetClusterData will create the
13287
    correct structure needed as input for the allocator.
13288

13289
    The checks for the completeness of the opcode must have already been
13290
    done.
13291

13292
    """
13293
    instance = self.cfg.GetInstanceInfo(self.name)
13294
    if instance is None:
13295
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13296
                                   " IAllocator" % self.name)
13297

    
13298
    if instance.disk_template not in constants.DTS_MIRRORED:
13299
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13300
                                 errors.ECODE_INVAL)
13301

    
13302
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13303
        len(instance.secondary_nodes) != 1:
13304
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13305
                                 errors.ECODE_STATE)
13306

    
13307
    self.required_nodes = 1
13308
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13309
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13310

    
13311
    request = {
13312
      "name": self.name,
13313
      "disk_space_total": disk_space,
13314
      "required_nodes": self.required_nodes,
13315
      "relocate_from": self.relocate_from,
13316
      }
13317
    return request
13318

    
13319
  def _AddNodeEvacuate(self):
13320
    """Get data for node-evacuate requests.
13321

13322
    """
13323
    return {
13324
      "instances": self.instances,
13325
      "evac_mode": self.evac_mode,
13326
      }
13327

    
13328
  def _AddChangeGroup(self):
13329
    """Get data for node-evacuate requests.
13330

13331
    """
13332
    return {
13333
      "instances": self.instances,
13334
      "target_groups": self.target_groups,
13335
      }
13336

    
13337
  def _BuildInputData(self, fn, keydata):
13338
    """Build input data structures.
13339

13340
    """
13341
    self._ComputeClusterData()
13342

    
13343
    request = fn()
13344
    request["type"] = self.mode
13345
    for keyname, keytype in keydata:
13346
      if keyname not in request:
13347
        raise errors.ProgrammerError("Request parameter %s is missing" %
13348
                                     keyname)
13349
      val = request[keyname]
13350
      if not keytype(val):
13351
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13352
                                     " validation, value %s, expected"
13353
                                     " type %s" % (keyname, val, keytype))
13354
    self.in_data["request"] = request
13355

    
13356
    self.in_text = serializer.Dump(self.in_data)
13357

    
13358
  _STRING_LIST = ht.TListOf(ht.TString)
13359
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13360
     # pylint: disable=E1101
13361
     # Class '...' has no 'OP_ID' member
13362
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13363
                          opcodes.OpInstanceMigrate.OP_ID,
13364
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13365
     })))
13366

    
13367
  _NEVAC_MOVED = \
13368
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13369
                       ht.TItems([ht.TNonEmptyString,
13370
                                  ht.TNonEmptyString,
13371
                                  ht.TListOf(ht.TNonEmptyString),
13372
                                 ])))
13373
  _NEVAC_FAILED = \
13374
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13375
                       ht.TItems([ht.TNonEmptyString,
13376
                                  ht.TMaybeString,
13377
                                 ])))
13378
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13379
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13380

    
13381
  _MODE_DATA = {
13382
    constants.IALLOCATOR_MODE_ALLOC:
13383
      (_AddNewInstance,
13384
       [
13385
        ("name", ht.TString),
13386
        ("memory", ht.TInt),
13387
        ("disks", ht.TListOf(ht.TDict)),
13388
        ("disk_template", ht.TString),
13389
        ("os", ht.TString),
13390
        ("tags", _STRING_LIST),
13391
        ("nics", ht.TListOf(ht.TDict)),
13392
        ("vcpus", ht.TInt),
13393
        ("hypervisor", ht.TString),
13394
        ], ht.TList),
13395
    constants.IALLOCATOR_MODE_RELOC:
13396
      (_AddRelocateInstance,
13397
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13398
       ht.TList),
13399
     constants.IALLOCATOR_MODE_NODE_EVAC:
13400
      (_AddNodeEvacuate, [
13401
        ("instances", _STRING_LIST),
13402
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13403
        ], _NEVAC_RESULT),
13404
     constants.IALLOCATOR_MODE_CHG_GROUP:
13405
      (_AddChangeGroup, [
13406
        ("instances", _STRING_LIST),
13407
        ("target_groups", _STRING_LIST),
13408
        ], _NEVAC_RESULT),
13409
    }
13410

    
13411
  def Run(self, name, validate=True, call_fn=None):
13412
    """Run an instance allocator and return the results.
13413

13414
    """
13415
    if call_fn is None:
13416
      call_fn = self.rpc.call_iallocator_runner
13417

    
13418
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13419
    result.Raise("Failure while running the iallocator script")
13420

    
13421
    self.out_text = result.payload
13422
    if validate:
13423
      self._ValidateResult()
13424

    
13425
  def _ValidateResult(self):
13426
    """Process the allocator results.
13427

13428
    This will process and if successful save the result in
13429
    self.out_data and the other parameters.
13430

13431
    """
13432
    try:
13433
      rdict = serializer.Load(self.out_text)
13434
    except Exception, err:
13435
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13436

    
13437
    if not isinstance(rdict, dict):
13438
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13439

    
13440
    # TODO: remove backwards compatiblity in later versions
13441
    if "nodes" in rdict and "result" not in rdict:
13442
      rdict["result"] = rdict["nodes"]
13443
      del rdict["nodes"]
13444

    
13445
    for key in "success", "info", "result":
13446
      if key not in rdict:
13447
        raise errors.OpExecError("Can't parse iallocator results:"
13448
                                 " missing key '%s'" % key)
13449
      setattr(self, key, rdict[key])
13450

    
13451
    if not self._result_check(self.result):
13452
      raise errors.OpExecError("Iallocator returned invalid result,"
13453
                               " expected %s, got %s" %
13454
                               (self._result_check, self.result),
13455
                               errors.ECODE_INVAL)
13456

    
13457
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13458
      assert self.relocate_from is not None
13459
      assert self.required_nodes == 1
13460

    
13461
      node2group = dict((name, ndata["group"])
13462
                        for (name, ndata) in self.in_data["nodes"].items())
13463

    
13464
      fn = compat.partial(self._NodesToGroups, node2group,
13465
                          self.in_data["nodegroups"])
13466

    
13467
      instance = self.cfg.GetInstanceInfo(self.name)
13468
      request_groups = fn(self.relocate_from + [instance.primary_node])
13469
      result_groups = fn(rdict["result"] + [instance.primary_node])
13470

    
13471
      if self.success and not set(result_groups).issubset(request_groups):
13472
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13473
                                 " differ from original groups (%s)" %
13474
                                 (utils.CommaJoin(result_groups),
13475
                                  utils.CommaJoin(request_groups)))
13476

    
13477
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13478
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13479

    
13480
    self.out_data = rdict
13481

    
13482
  @staticmethod
13483
  def _NodesToGroups(node2group, groups, nodes):
13484
    """Returns a list of unique group names for a list of nodes.
13485

13486
    @type node2group: dict
13487
    @param node2group: Map from node name to group UUID
13488
    @type groups: dict
13489
    @param groups: Group information
13490
    @type nodes: list
13491
    @param nodes: Node names
13492

13493
    """
13494
    result = set()
13495

    
13496
    for node in nodes:
13497
      try:
13498
        group_uuid = node2group[node]
13499
      except KeyError:
13500
        # Ignore unknown node
13501
        pass
13502
      else:
13503
        try:
13504
          group = groups[group_uuid]
13505
        except KeyError:
13506
          # Can't find group, let's use UUID
13507
          group_name = group_uuid
13508
        else:
13509
          group_name = group["name"]
13510

    
13511
        result.add(group_name)
13512

    
13513
    return sorted(result)
13514

    
13515

    
13516
class LUTestAllocator(NoHooksLU):
13517
  """Run allocator tests.
13518

13519
  This LU runs the allocator tests
13520

13521
  """
13522
  def CheckPrereq(self):
13523
    """Check prerequisites.
13524

13525
    This checks the opcode parameters depending on the director and mode test.
13526

13527
    """
13528
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13529
      for attr in ["memory", "disks", "disk_template",
13530
                   "os", "tags", "nics", "vcpus"]:
13531
        if not hasattr(self.op, attr):
13532
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13533
                                     attr, errors.ECODE_INVAL)
13534
      iname = self.cfg.ExpandInstanceName(self.op.name)
13535
      if iname is not None:
13536
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13537
                                   iname, errors.ECODE_EXISTS)
13538
      if not isinstance(self.op.nics, list):
13539
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13540
                                   errors.ECODE_INVAL)
13541
      if not isinstance(self.op.disks, list):
13542
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13543
                                   errors.ECODE_INVAL)
13544
      for row in self.op.disks:
13545
        if (not isinstance(row, dict) or
13546
            constants.IDISK_SIZE not in row or
13547
            not isinstance(row[constants.IDISK_SIZE], int) or
13548
            constants.IDISK_MODE not in row or
13549
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13550
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13551
                                     " parameter", errors.ECODE_INVAL)
13552
      if self.op.hypervisor is None:
13553
        self.op.hypervisor = self.cfg.GetHypervisorType()
13554
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13555
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13556
      self.op.name = fname
13557
      self.relocate_from = \
13558
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13559
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13560
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13561
      if not self.op.instances:
13562
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13563
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13564
    else:
13565
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13566
                                 self.op.mode, errors.ECODE_INVAL)
13567

    
13568
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13569
      if self.op.allocator is None:
13570
        raise errors.OpPrereqError("Missing allocator name",
13571
                                   errors.ECODE_INVAL)
13572
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13573
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13574
                                 self.op.direction, errors.ECODE_INVAL)
13575

    
13576
  def Exec(self, feedback_fn):
13577
    """Run the allocator test.
13578

13579
    """
13580
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13581
      ial = IAllocator(self.cfg, self.rpc,
13582
                       mode=self.op.mode,
13583
                       name=self.op.name,
13584
                       memory=self.op.memory,
13585
                       disks=self.op.disks,
13586
                       disk_template=self.op.disk_template,
13587
                       os=self.op.os,
13588
                       tags=self.op.tags,
13589
                       nics=self.op.nics,
13590
                       vcpus=self.op.vcpus,
13591
                       hypervisor=self.op.hypervisor,
13592
                       )
13593
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13594
      ial = IAllocator(self.cfg, self.rpc,
13595
                       mode=self.op.mode,
13596
                       name=self.op.name,
13597
                       relocate_from=list(self.relocate_from),
13598
                       )
13599
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13600
      ial = IAllocator(self.cfg, self.rpc,
13601
                       mode=self.op.mode,
13602
                       instances=self.op.instances,
13603
                       target_groups=self.op.target_groups)
13604
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13605
      ial = IAllocator(self.cfg, self.rpc,
13606
                       mode=self.op.mode,
13607
                       instances=self.op.instances,
13608
                       evac_mode=self.op.evac_mode)
13609
    else:
13610
      raise errors.ProgrammerError("Uncatched mode %s in"
13611
                                   " LUTestAllocator.Exec", self.op.mode)
13612

    
13613
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13614
      result = ial.in_text
13615
    else:
13616
      ial.Run(self.op.allocator, validate=False)
13617
      result = ial.out_text
13618
    return result
13619

    
13620

    
13621
#: Query type implementations
13622
_QUERY_IMPL = {
13623
  constants.QR_INSTANCE: _InstanceQuery,
13624
  constants.QR_NODE: _NodeQuery,
13625
  constants.QR_GROUP: _GroupQuery,
13626
  constants.QR_OS: _OsQuery,
13627
  }
13628

    
13629
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13630

    
13631

    
13632
def _GetQueryImplementation(name):
13633
  """Returns the implemtnation for a query type.
13634

13635
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13636

13637
  """
13638
  try:
13639
    return _QUERY_IMPL[name]
13640
  except KeyError:
13641
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13642
                               errors.ECODE_INVAL)