Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 1e89a135

History | View | Annotate | Download (499.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62
from ganeti import rpc
63

    
64
import ganeti.masterd.instance # pylint: disable=W0611
65

    
66

    
67
#: Size of DRBD meta block device
68
DRBD_META_SIZE = 128
69

    
70
# States of instance
71
INSTANCE_UP = [constants.ADMINST_UP]
72
INSTANCE_DOWN = [constants.ADMINST_DOWN]
73
INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74
INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75
INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
76

    
77

    
78
class ResultWithJobs:
79
  """Data container for LU results with jobs.
80

81
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83
  contained in the C{jobs} attribute and include the job IDs in the opcode
84
  result.
85

86
  """
87
  def __init__(self, jobs, **kwargs):
88
    """Initializes this class.
89

90
    Additional return values can be specified as keyword arguments.
91

92
    @type jobs: list of lists of L{opcode.OpCode}
93
    @param jobs: A list of lists of opcode objects
94

95
    """
96
    self.jobs = jobs
97
    self.other = kwargs
98

    
99

    
100
class LogicalUnit(object):
101
  """Logical Unit base class.
102

103
  Subclasses must follow these rules:
104
    - implement ExpandNames
105
    - implement CheckPrereq (except when tasklets are used)
106
    - implement Exec (except when tasklets are used)
107
    - implement BuildHooksEnv
108
    - implement BuildHooksNodes
109
    - redefine HPATH and HTYPE
110
    - optionally redefine their run requirements:
111
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
112

113
  Note that all commands require root permissions.
114

115
  @ivar dry_run_result: the value (if any) that will be returned to the caller
116
      in dry-run mode (signalled by opcode dry_run parameter)
117

118
  """
119
  HPATH = None
120
  HTYPE = None
121
  REQ_BGL = True
122

    
123
  def __init__(self, processor, op, context, rpc_runner):
124
    """Constructor for LogicalUnit.
125

126
    This needs to be overridden in derived classes in order to check op
127
    validity.
128

129
    """
130
    self.proc = processor
131
    self.op = op
132
    self.cfg = context.cfg
133
    self.glm = context.glm
134
    # readability alias
135
    self.owned_locks = context.glm.list_owned
136
    self.context = context
137
    self.rpc = rpc_runner
138
    # Dicts used to declare locking needs to mcpu
139
    self.needed_locks = None
140
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
141
    self.add_locks = {}
142
    self.remove_locks = {}
143
    # Used to force good behavior when calling helper functions
144
    self.recalculate_locks = {}
145
    # logging
146
    self.Log = processor.Log # pylint: disable=C0103
147
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
148
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
149
    self.LogStep = processor.LogStep # pylint: disable=C0103
150
    # support for dry-run
151
    self.dry_run_result = None
152
    # support for generic debug attribute
153
    if (not hasattr(self.op, "debug_level") or
154
        not isinstance(self.op.debug_level, int)):
155
      self.op.debug_level = 0
156

    
157
    # Tasklets
158
    self.tasklets = None
159

    
160
    # Validate opcode parameters and set defaults
161
    self.op.Validate(True)
162

    
163
    self.CheckArguments()
164

    
165
  def CheckArguments(self):
166
    """Check syntactic validity for the opcode arguments.
167

168
    This method is for doing a simple syntactic check and ensure
169
    validity of opcode parameters, without any cluster-related
170
    checks. While the same can be accomplished in ExpandNames and/or
171
    CheckPrereq, doing these separate is better because:
172

173
      - ExpandNames is left as as purely a lock-related function
174
      - CheckPrereq is run after we have acquired locks (and possible
175
        waited for them)
176

177
    The function is allowed to change the self.op attribute so that
178
    later methods can no longer worry about missing parameters.
179

180
    """
181
    pass
182

    
183
  def ExpandNames(self):
184
    """Expand names for this LU.
185

186
    This method is called before starting to execute the opcode, and it should
187
    update all the parameters of the opcode to their canonical form (e.g. a
188
    short node name must be fully expanded after this method has successfully
189
    completed). This way locking, hooks, logging, etc. can work correctly.
190

191
    LUs which implement this method must also populate the self.needed_locks
192
    member, as a dict with lock levels as keys, and a list of needed lock names
193
    as values. Rules:
194

195
      - use an empty dict if you don't need any lock
196
      - if you don't need any lock at a particular level omit that level
197
      - don't put anything for the BGL level
198
      - if you want all locks at a level use locking.ALL_SET as a value
199

200
    If you need to share locks (rather than acquire them exclusively) at one
201
    level you can modify self.share_locks, setting a true value (usually 1) for
202
    that level. By default locks are not shared.
203

204
    This function can also define a list of tasklets, which then will be
205
    executed in order instead of the usual LU-level CheckPrereq and Exec
206
    functions, if those are not defined by the LU.
207

208
    Examples::
209

210
      # Acquire all nodes and one instance
211
      self.needed_locks = {
212
        locking.LEVEL_NODE: locking.ALL_SET,
213
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
214
      }
215
      # Acquire just two nodes
216
      self.needed_locks = {
217
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
218
      }
219
      # Acquire no locks
220
      self.needed_locks = {} # No, you can't leave it to the default value None
221

222
    """
223
    # The implementation of this method is mandatory only if the new LU is
224
    # concurrent, so that old LUs don't need to be changed all at the same
225
    # time.
226
    if self.REQ_BGL:
227
      self.needed_locks = {} # Exclusive LUs don't need locks.
228
    else:
229
      raise NotImplementedError
230

    
231
  def DeclareLocks(self, level):
232
    """Declare LU locking needs for a level
233

234
    While most LUs can just declare their locking needs at ExpandNames time,
235
    sometimes there's the need to calculate some locks after having acquired
236
    the ones before. This function is called just before acquiring locks at a
237
    particular level, but after acquiring the ones at lower levels, and permits
238
    such calculations. It can be used to modify self.needed_locks, and by
239
    default it does nothing.
240

241
    This function is only called if you have something already set in
242
    self.needed_locks for the level.
243

244
    @param level: Locking level which is going to be locked
245
    @type level: member of ganeti.locking.LEVELS
246

247
    """
248

    
249
  def CheckPrereq(self):
250
    """Check prerequisites for this LU.
251

252
    This method should check that the prerequisites for the execution
253
    of this LU are fulfilled. It can do internode communication, but
254
    it should be idempotent - no cluster or system changes are
255
    allowed.
256

257
    The method should raise errors.OpPrereqError in case something is
258
    not fulfilled. Its return value is ignored.
259

260
    This method should also update all the parameters of the opcode to
261
    their canonical form if it hasn't been done by ExpandNames before.
262

263
    """
264
    if self.tasklets is not None:
265
      for (idx, tl) in enumerate(self.tasklets):
266
        logging.debug("Checking prerequisites for tasklet %s/%s",
267
                      idx + 1, len(self.tasklets))
268
        tl.CheckPrereq()
269
    else:
270
      pass
271

    
272
  def Exec(self, feedback_fn):
273
    """Execute the LU.
274

275
    This method should implement the actual work. It should raise
276
    errors.OpExecError for failures that are somewhat dealt with in
277
    code, or expected.
278

279
    """
280
    if self.tasklets is not None:
281
      for (idx, tl) in enumerate(self.tasklets):
282
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
283
        tl.Exec(feedback_fn)
284
    else:
285
      raise NotImplementedError
286

    
287
  def BuildHooksEnv(self):
288
    """Build hooks environment for this LU.
289

290
    @rtype: dict
291
    @return: Dictionary containing the environment that will be used for
292
      running the hooks for this LU. The keys of the dict must not be prefixed
293
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294
      will extend the environment with additional variables. If no environment
295
      should be defined, an empty dictionary should be returned (not C{None}).
296
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
297
      will not be called.
298

299
    """
300
    raise NotImplementedError
301

    
302
  def BuildHooksNodes(self):
303
    """Build list of nodes to run LU's hooks.
304

305
    @rtype: tuple; (list, list)
306
    @return: Tuple containing a list of node names on which the hook
307
      should run before the execution and a list of node names on which the
308
      hook should run after the execution. No nodes should be returned as an
309
      empty list (and not None).
310
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
311
      will not be called.
312

313
    """
314
    raise NotImplementedError
315

    
316
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317
    """Notify the LU about the results of its hooks.
318

319
    This method is called every time a hooks phase is executed, and notifies
320
    the Logical Unit about the hooks' result. The LU can then use it to alter
321
    its result based on the hooks.  By default the method does nothing and the
322
    previous result is passed back unchanged but any LU can define it if it
323
    wants to use the local cluster hook-scripts somehow.
324

325
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
326
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327
    @param hook_results: the results of the multi-node hooks rpc call
328
    @param feedback_fn: function used send feedback back to the caller
329
    @param lu_result: the previous Exec result this LU had, or None
330
        in the PRE phase
331
    @return: the new Exec result, based on the previous result
332
        and hook results
333

334
    """
335
    # API must be kept, thus we ignore the unused argument and could
336
    # be a function warnings
337
    # pylint: disable=W0613,R0201
338
    return lu_result
339

    
340
  def _ExpandAndLockInstance(self):
341
    """Helper function to expand and lock an instance.
342

343
    Many LUs that work on an instance take its name in self.op.instance_name
344
    and need to expand it and then declare the expanded name for locking. This
345
    function does it, and then updates self.op.instance_name to the expanded
346
    name. It also initializes needed_locks as a dict, if this hasn't been done
347
    before.
348

349
    """
350
    if self.needed_locks is None:
351
      self.needed_locks = {}
352
    else:
353
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354
        "_ExpandAndLockInstance called with instance-level locks set"
355
    self.op.instance_name = _ExpandInstanceName(self.cfg,
356
                                                self.op.instance_name)
357
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
358

    
359
  def _LockInstancesNodes(self, primary_only=False,
360
                          level=locking.LEVEL_NODE):
361
    """Helper function to declare instances' nodes for locking.
362

363
    This function should be called after locking one or more instances to lock
364
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365
    with all primary or secondary nodes for instances already locked and
366
    present in self.needed_locks[locking.LEVEL_INSTANCE].
367

368
    It should be called from DeclareLocks, and for safety only works if
369
    self.recalculate_locks[locking.LEVEL_NODE] is set.
370

371
    In the future it may grow parameters to just lock some instance's nodes, or
372
    to just lock primaries or secondary nodes, if needed.
373

374
    If should be called in DeclareLocks in a way similar to::
375

376
      if level == locking.LEVEL_NODE:
377
        self._LockInstancesNodes()
378

379
    @type primary_only: boolean
380
    @param primary_only: only lock primary nodes of locked instances
381
    @param level: Which lock level to use for locking nodes
382

383
    """
384
    assert level in self.recalculate_locks, \
385
      "_LockInstancesNodes helper function called with no nodes to recalculate"
386

    
387
    # TODO: check if we're really been called with the instance locks held
388

    
389
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390
    # future we might want to have different behaviors depending on the value
391
    # of self.recalculate_locks[locking.LEVEL_NODE]
392
    wanted_nodes = []
393
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395
      wanted_nodes.append(instance.primary_node)
396
      if not primary_only:
397
        wanted_nodes.extend(instance.secondary_nodes)
398

    
399
    if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400
      self.needed_locks[level] = wanted_nodes
401
    elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402
      self.needed_locks[level].extend(wanted_nodes)
403
    else:
404
      raise errors.ProgrammerError("Unknown recalculation mode")
405

    
406
    del self.recalculate_locks[level]
407

    
408

    
409
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410
  """Simple LU which runs no hooks.
411

412
  This LU is intended as a parent for other LogicalUnits which will
413
  run no hooks, in order to reduce duplicate code.
414

415
  """
416
  HPATH = None
417
  HTYPE = None
418

    
419
  def BuildHooksEnv(self):
420
    """Empty BuildHooksEnv for NoHooksLu.
421

422
    This just raises an error.
423

424
    """
425
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
426

    
427
  def BuildHooksNodes(self):
428
    """Empty BuildHooksNodes for NoHooksLU.
429

430
    """
431
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
432

    
433

    
434
class Tasklet:
435
  """Tasklet base class.
436

437
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
439
  tasklets know nothing about locks.
440

441
  Subclasses must follow these rules:
442
    - Implement CheckPrereq
443
    - Implement Exec
444

445
  """
446
  def __init__(self, lu):
447
    self.lu = lu
448

    
449
    # Shortcuts
450
    self.cfg = lu.cfg
451
    self.rpc = lu.rpc
452

    
453
  def CheckPrereq(self):
454
    """Check prerequisites for this tasklets.
455

456
    This method should check whether the prerequisites for the execution of
457
    this tasklet are fulfilled. It can do internode communication, but it
458
    should be idempotent - no cluster or system changes are allowed.
459

460
    The method should raise errors.OpPrereqError in case something is not
461
    fulfilled. Its return value is ignored.
462

463
    This method should also update all parameters to their canonical form if it
464
    hasn't been done before.
465

466
    """
467
    pass
468

    
469
  def Exec(self, feedback_fn):
470
    """Execute the tasklet.
471

472
    This method should implement the actual work. It should raise
473
    errors.OpExecError for failures that are somewhat dealt with in code, or
474
    expected.
475

476
    """
477
    raise NotImplementedError
478

    
479

    
480
class _QueryBase:
481
  """Base for query utility classes.
482

483
  """
484
  #: Attribute holding field definitions
485
  FIELDS = None
486

    
487
  def __init__(self, qfilter, fields, use_locking):
488
    """Initializes this class.
489

490
    """
491
    self.use_locking = use_locking
492

    
493
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
494
                             namefield="name")
495
    self.requested_data = self.query.RequestedData()
496
    self.names = self.query.RequestedNames()
497

    
498
    # Sort only if no names were requested
499
    self.sort_by_name = not self.names
500

    
501
    self.do_locking = None
502
    self.wanted = None
503

    
504
  def _GetNames(self, lu, all_names, lock_level):
505
    """Helper function to determine names asked for in the query.
506

507
    """
508
    if self.do_locking:
509
      names = lu.owned_locks(lock_level)
510
    else:
511
      names = all_names
512

    
513
    if self.wanted == locking.ALL_SET:
514
      assert not self.names
515
      # caller didn't specify names, so ordering is not important
516
      return utils.NiceSort(names)
517

    
518
    # caller specified names and we must keep the same order
519
    assert self.names
520
    assert not self.do_locking or lu.glm.is_owned(lock_level)
521

    
522
    missing = set(self.wanted).difference(names)
523
    if missing:
524
      raise errors.OpExecError("Some items were removed before retrieving"
525
                               " their data: %s" % missing)
526

    
527
    # Return expanded names
528
    return self.wanted
529

    
530
  def ExpandNames(self, lu):
531
    """Expand names for this query.
532

533
    See L{LogicalUnit.ExpandNames}.
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def DeclareLocks(self, lu, level):
539
    """Declare locks for this query.
540

541
    See L{LogicalUnit.DeclareLocks}.
542

543
    """
544
    raise NotImplementedError()
545

    
546
  def _GetQueryData(self, lu):
547
    """Collects all data for this query.
548

549
    @return: Query data object
550

551
    """
552
    raise NotImplementedError()
553

    
554
  def NewStyleQuery(self, lu):
555
    """Collect data and execute query.
556

557
    """
558
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559
                                  sort_by_name=self.sort_by_name)
560

    
561
  def OldStyleQuery(self, lu):
562
    """Collect data and execute query.
563

564
    """
565
    return self.query.OldStyleQuery(self._GetQueryData(lu),
566
                                    sort_by_name=self.sort_by_name)
567

    
568

    
569
def _ShareAll():
570
  """Returns a dict declaring all lock levels shared.
571

572
  """
573
  return dict.fromkeys(locking.LEVELS, 1)
574

    
575

    
576
def _MakeLegacyNodeInfo(data):
577
  """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
578

579
  Converts the data into a single dictionary. This is fine for most use cases,
580
  but some require information from more than one volume group or hypervisor.
581

582
  """
583
  (bootid, (vg_info, ), (hv_info, )) = data
584

    
585
  return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
586
    "bootid": bootid,
587
    })
588

    
589

    
590
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591
  """Checks if the owned node groups are still correct for an instance.
592

593
  @type cfg: L{config.ConfigWriter}
594
  @param cfg: The cluster configuration
595
  @type instance_name: string
596
  @param instance_name: Instance name
597
  @type owned_groups: set or frozenset
598
  @param owned_groups: List of currently owned node groups
599

600
  """
601
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
602

    
603
  if not owned_groups.issuperset(inst_groups):
604
    raise errors.OpPrereqError("Instance %s's node groups changed since"
605
                               " locks were acquired, current groups are"
606
                               " are '%s', owning groups '%s'; retry the"
607
                               " operation" %
608
                               (instance_name,
609
                                utils.CommaJoin(inst_groups),
610
                                utils.CommaJoin(owned_groups)),
611
                               errors.ECODE_STATE)
612

    
613
  return inst_groups
614

    
615

    
616
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617
  """Checks if the instances in a node group are still correct.
618

619
  @type cfg: L{config.ConfigWriter}
620
  @param cfg: The cluster configuration
621
  @type group_uuid: string
622
  @param group_uuid: Node group UUID
623
  @type owned_instances: set or frozenset
624
  @param owned_instances: List of currently owned instances
625

626
  """
627
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628
  if owned_instances != wanted_instances:
629
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
630
                               " locks were acquired, wanted '%s', have '%s';"
631
                               " retry the operation" %
632
                               (group_uuid,
633
                                utils.CommaJoin(wanted_instances),
634
                                utils.CommaJoin(owned_instances)),
635
                               errors.ECODE_STATE)
636

    
637
  return wanted_instances
638

    
639

    
640
def _SupportsOob(cfg, node):
641
  """Tells if node supports OOB.
642

643
  @type cfg: L{config.ConfigWriter}
644
  @param cfg: The cluster configuration
645
  @type node: L{objects.Node}
646
  @param node: The node
647
  @return: The OOB script if supported or an empty string otherwise
648

649
  """
650
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
651

    
652

    
653
def _GetWantedNodes(lu, nodes):
654
  """Returns list of checked and expanded node names.
655

656
  @type lu: L{LogicalUnit}
657
  @param lu: the logical unit on whose behalf we execute
658
  @type nodes: list
659
  @param nodes: list of node names or None for all nodes
660
  @rtype: list
661
  @return: the list of nodes, sorted
662
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
663

664
  """
665
  if nodes:
666
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
667

    
668
  return utils.NiceSort(lu.cfg.GetNodeList())
669

    
670

    
671
def _GetWantedInstances(lu, instances):
672
  """Returns list of checked and expanded instance names.
673

674
  @type lu: L{LogicalUnit}
675
  @param lu: the logical unit on whose behalf we execute
676
  @type instances: list
677
  @param instances: list of instance names or None for all instances
678
  @rtype: list
679
  @return: the list of instances, sorted
680
  @raise errors.OpPrereqError: if the instances parameter is wrong type
681
  @raise errors.OpPrereqError: if any of the passed instances is not found
682

683
  """
684
  if instances:
685
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
686
  else:
687
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
688
  return wanted
689

    
690

    
691
def _GetUpdatedParams(old_params, update_dict,
692
                      use_default=True, use_none=False):
693
  """Return the new version of a parameter dictionary.
694

695
  @type old_params: dict
696
  @param old_params: old parameters
697
  @type update_dict: dict
698
  @param update_dict: dict containing new parameter values, or
699
      constants.VALUE_DEFAULT to reset the parameter to its default
700
      value
701
  @param use_default: boolean
702
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703
      values as 'to be deleted' values
704
  @param use_none: boolean
705
  @type use_none: whether to recognise C{None} values as 'to be
706
      deleted' values
707
  @rtype: dict
708
  @return: the new parameter dictionary
709

710
  """
711
  params_copy = copy.deepcopy(old_params)
712
  for key, val in update_dict.iteritems():
713
    if ((use_default and val == constants.VALUE_DEFAULT) or
714
        (use_none and val is None)):
715
      try:
716
        del params_copy[key]
717
      except KeyError:
718
        pass
719
    else:
720
      params_copy[key] = val
721
  return params_copy
722

    
723

    
724
def _ReleaseLocks(lu, level, names=None, keep=None):
725
  """Releases locks owned by an LU.
726

727
  @type lu: L{LogicalUnit}
728
  @param level: Lock level
729
  @type names: list or None
730
  @param names: Names of locks to release
731
  @type keep: list or None
732
  @param keep: Names of locks to retain
733

734
  """
735
  assert not (keep is not None and names is not None), \
736
         "Only one of the 'names' and the 'keep' parameters can be given"
737

    
738
  if names is not None:
739
    should_release = names.__contains__
740
  elif keep:
741
    should_release = lambda name: name not in keep
742
  else:
743
    should_release = None
744

    
745
  owned = lu.owned_locks(level)
746
  if not owned:
747
    # Not owning any lock at this level, do nothing
748
    pass
749

    
750
  elif should_release:
751
    retain = []
752
    release = []
753

    
754
    # Determine which locks to release
755
    for name in owned:
756
      if should_release(name):
757
        release.append(name)
758
      else:
759
        retain.append(name)
760

    
761
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
762

    
763
    # Release just some locks
764
    lu.glm.release(level, names=release)
765

    
766
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
767
  else:
768
    # Release everything
769
    lu.glm.release(level)
770

    
771
    assert not lu.glm.is_owned(level), "No locks should be owned"
772

    
773

    
774
def _MapInstanceDisksToNodes(instances):
775
  """Creates a map from (node, volume) to instance name.
776

777
  @type instances: list of L{objects.Instance}
778
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
779

780
  """
781
  return dict(((node, vol), inst.name)
782
              for inst in instances
783
              for (node, vols) in inst.MapLVsByNode().items()
784
              for vol in vols)
785

    
786

    
787
def _RunPostHook(lu, node_name):
788
  """Runs the post-hook for an opcode on a single node.
789

790
  """
791
  hm = lu.proc.BuildHooksManager(lu)
792
  try:
793
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
794
  except:
795
    # pylint: disable=W0702
796
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
797

    
798

    
799
def _CheckOutputFields(static, dynamic, selected):
800
  """Checks whether all selected fields are valid.
801

802
  @type static: L{utils.FieldSet}
803
  @param static: static fields set
804
  @type dynamic: L{utils.FieldSet}
805
  @param dynamic: dynamic fields set
806

807
  """
808
  f = utils.FieldSet()
809
  f.Extend(static)
810
  f.Extend(dynamic)
811

    
812
  delta = f.NonMatching(selected)
813
  if delta:
814
    raise errors.OpPrereqError("Unknown output fields selected: %s"
815
                               % ",".join(delta), errors.ECODE_INVAL)
816

    
817

    
818
def _CheckGlobalHvParams(params):
819
  """Validates that given hypervisor params are not global ones.
820

821
  This will ensure that instances don't get customised versions of
822
  global params.
823

824
  """
825
  used_globals = constants.HVC_GLOBALS.intersection(params)
826
  if used_globals:
827
    msg = ("The following hypervisor parameters are global and cannot"
828
           " be customized at instance level, please modify them at"
829
           " cluster level: %s" % utils.CommaJoin(used_globals))
830
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
831

    
832

    
833
def _CheckNodeOnline(lu, node, msg=None):
834
  """Ensure that a given node is online.
835

836
  @param lu: the LU on behalf of which we make the check
837
  @param node: the node to check
838
  @param msg: if passed, should be a message to replace the default one
839
  @raise errors.OpPrereqError: if the node is offline
840

841
  """
842
  if msg is None:
843
    msg = "Can't use offline node"
844
  if lu.cfg.GetNodeInfo(node).offline:
845
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
846

    
847

    
848
def _CheckNodeNotDrained(lu, node):
849
  """Ensure that a given node is not drained.
850

851
  @param lu: the LU on behalf of which we make the check
852
  @param node: the node to check
853
  @raise errors.OpPrereqError: if the node is drained
854

855
  """
856
  if lu.cfg.GetNodeInfo(node).drained:
857
    raise errors.OpPrereqError("Can't use drained node %s" % node,
858
                               errors.ECODE_STATE)
859

    
860

    
861
def _CheckNodeVmCapable(lu, node):
862
  """Ensure that a given node is vm capable.
863

864
  @param lu: the LU on behalf of which we make the check
865
  @param node: the node to check
866
  @raise errors.OpPrereqError: if the node is not vm capable
867

868
  """
869
  if not lu.cfg.GetNodeInfo(node).vm_capable:
870
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
871
                               errors.ECODE_STATE)
872

    
873

    
874
def _CheckNodeHasOS(lu, node, os_name, force_variant):
875
  """Ensure that a node supports a given OS.
876

877
  @param lu: the LU on behalf of which we make the check
878
  @param node: the node to check
879
  @param os_name: the OS to query about
880
  @param force_variant: whether to ignore variant errors
881
  @raise errors.OpPrereqError: if the node is not supporting the OS
882

883
  """
884
  result = lu.rpc.call_os_get(node, os_name)
885
  result.Raise("OS '%s' not in supported OS list for node %s" %
886
               (os_name, node),
887
               prereq=True, ecode=errors.ECODE_INVAL)
888
  if not force_variant:
889
    _CheckOSVariant(result.payload, os_name)
890

    
891

    
892
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
893
  """Ensure that a node has the given secondary ip.
894

895
  @type lu: L{LogicalUnit}
896
  @param lu: the LU on behalf of which we make the check
897
  @type node: string
898
  @param node: the node to check
899
  @type secondary_ip: string
900
  @param secondary_ip: the ip to check
901
  @type prereq: boolean
902
  @param prereq: whether to throw a prerequisite or an execute error
903
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
904
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
905

906
  """
907
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
908
  result.Raise("Failure checking secondary ip on node %s" % node,
909
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
910
  if not result.payload:
911
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
912
           " please fix and re-run this command" % secondary_ip)
913
    if prereq:
914
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
915
    else:
916
      raise errors.OpExecError(msg)
917

    
918

    
919
def _GetClusterDomainSecret():
920
  """Reads the cluster domain secret.
921

922
  """
923
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
924
                               strict=True)
925

    
926

    
927
def _CheckInstanceState(lu, instance, req_states, msg=None):
928
  """Ensure that an instance is in one of the required states.
929

930
  @param lu: the LU on behalf of which we make the check
931
  @param instance: the instance to check
932
  @param msg: if passed, should be a message to replace the default one
933
  @raise errors.OpPrereqError: if the instance is not in the required state
934

935
  """
936
  if msg is None:
937
    msg = "can't use instance from outside %s states" % ", ".join(req_states)
938
  if instance.admin_state not in req_states:
939
    raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
940
                               (instance, instance.admin_state, msg),
941
                               errors.ECODE_STATE)
942

    
943
  if constants.ADMINST_UP not in req_states:
944
    pnode = instance.primary_node
945
    ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
946
    ins_l.Raise("Can't contact node %s for instance information" % pnode,
947
                prereq=True, ecode=errors.ECODE_ENVIRON)
948

    
949
    if instance.name in ins_l.payload:
950
      raise errors.OpPrereqError("Instance %s is running, %s" %
951
                                 (instance.name, msg), errors.ECODE_STATE)
952

    
953

    
954
def _ExpandItemName(fn, name, kind):
955
  """Expand an item name.
956

957
  @param fn: the function to use for expansion
958
  @param name: requested item name
959
  @param kind: text description ('Node' or 'Instance')
960
  @return: the resolved (full) name
961
  @raise errors.OpPrereqError: if the item is not found
962

963
  """
964
  full_name = fn(name)
965
  if full_name is None:
966
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
967
                               errors.ECODE_NOENT)
968
  return full_name
969

    
970

    
971
def _ExpandNodeName(cfg, name):
972
  """Wrapper over L{_ExpandItemName} for nodes."""
973
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
974

    
975

    
976
def _ExpandInstanceName(cfg, name):
977
  """Wrapper over L{_ExpandItemName} for instance."""
978
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
979

    
980

    
981
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
982
                          minmem, maxmem, vcpus, nics, disk_template, disks,
983
                          bep, hvp, hypervisor_name, tags):
984
  """Builds instance related env variables for hooks
985

986
  This builds the hook environment from individual variables.
987

988
  @type name: string
989
  @param name: the name of the instance
990
  @type primary_node: string
991
  @param primary_node: the name of the instance's primary node
992
  @type secondary_nodes: list
993
  @param secondary_nodes: list of secondary nodes as strings
994
  @type os_type: string
995
  @param os_type: the name of the instance's OS
996
  @type status: string
997
  @param status: the desired status of the instance
998
  @type minmem: string
999
  @param minmem: the minimum memory size of the instance
1000
  @type maxmem: string
1001
  @param maxmem: the maximum memory size of the instance
1002
  @type vcpus: string
1003
  @param vcpus: the count of VCPUs the instance has
1004
  @type nics: list
1005
  @param nics: list of tuples (ip, mac, mode, link) representing
1006
      the NICs the instance has
1007
  @type disk_template: string
1008
  @param disk_template: the disk template of the instance
1009
  @type disks: list
1010
  @param disks: the list of (size, mode) pairs
1011
  @type bep: dict
1012
  @param bep: the backend parameters for the instance
1013
  @type hvp: dict
1014
  @param hvp: the hypervisor parameters for the instance
1015
  @type hypervisor_name: string
1016
  @param hypervisor_name: the hypervisor for the instance
1017
  @type tags: list
1018
  @param tags: list of instance tags as strings
1019
  @rtype: dict
1020
  @return: the hook environment for this instance
1021

1022
  """
1023
  env = {
1024
    "OP_TARGET": name,
1025
    "INSTANCE_NAME": name,
1026
    "INSTANCE_PRIMARY": primary_node,
1027
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1028
    "INSTANCE_OS_TYPE": os_type,
1029
    "INSTANCE_STATUS": status,
1030
    "INSTANCE_MINMEM": minmem,
1031
    "INSTANCE_MAXMEM": maxmem,
1032
    # TODO(2.7) remove deprecated "memory" value
1033
    "INSTANCE_MEMORY": maxmem,
1034
    "INSTANCE_VCPUS": vcpus,
1035
    "INSTANCE_DISK_TEMPLATE": disk_template,
1036
    "INSTANCE_HYPERVISOR": hypervisor_name,
1037
  }
1038
  if nics:
1039
    nic_count = len(nics)
1040
    for idx, (ip, mac, mode, link) in enumerate(nics):
1041
      if ip is None:
1042
        ip = ""
1043
      env["INSTANCE_NIC%d_IP" % idx] = ip
1044
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1045
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1046
      env["INSTANCE_NIC%d_LINK" % idx] = link
1047
      if mode == constants.NIC_MODE_BRIDGED:
1048
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1049
  else:
1050
    nic_count = 0
1051

    
1052
  env["INSTANCE_NIC_COUNT"] = nic_count
1053

    
1054
  if disks:
1055
    disk_count = len(disks)
1056
    for idx, (size, mode) in enumerate(disks):
1057
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1058
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1059
  else:
1060
    disk_count = 0
1061

    
1062
  env["INSTANCE_DISK_COUNT"] = disk_count
1063

    
1064
  if not tags:
1065
    tags = []
1066

    
1067
  env["INSTANCE_TAGS"] = " ".join(tags)
1068

    
1069
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1070
    for key, value in source.items():
1071
      env["INSTANCE_%s_%s" % (kind, key)] = value
1072

    
1073
  return env
1074

    
1075

    
1076
def _NICListToTuple(lu, nics):
1077
  """Build a list of nic information tuples.
1078

1079
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1080
  value in LUInstanceQueryData.
1081

1082
  @type lu:  L{LogicalUnit}
1083
  @param lu: the logical unit on whose behalf we execute
1084
  @type nics: list of L{objects.NIC}
1085
  @param nics: list of nics to convert to hooks tuples
1086

1087
  """
1088
  hooks_nics = []
1089
  cluster = lu.cfg.GetClusterInfo()
1090
  for nic in nics:
1091
    ip = nic.ip
1092
    mac = nic.mac
1093
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1094
    mode = filled_params[constants.NIC_MODE]
1095
    link = filled_params[constants.NIC_LINK]
1096
    hooks_nics.append((ip, mac, mode, link))
1097
  return hooks_nics
1098

    
1099

    
1100
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1101
  """Builds instance related env variables for hooks from an object.
1102

1103
  @type lu: L{LogicalUnit}
1104
  @param lu: the logical unit on whose behalf we execute
1105
  @type instance: L{objects.Instance}
1106
  @param instance: the instance for which we should build the
1107
      environment
1108
  @type override: dict
1109
  @param override: dictionary with key/values that will override
1110
      our values
1111
  @rtype: dict
1112
  @return: the hook environment dictionary
1113

1114
  """
1115
  cluster = lu.cfg.GetClusterInfo()
1116
  bep = cluster.FillBE(instance)
1117
  hvp = cluster.FillHV(instance)
1118
  args = {
1119
    "name": instance.name,
1120
    "primary_node": instance.primary_node,
1121
    "secondary_nodes": instance.secondary_nodes,
1122
    "os_type": instance.os,
1123
    "status": instance.admin_state,
1124
    "maxmem": bep[constants.BE_MAXMEM],
1125
    "minmem": bep[constants.BE_MINMEM],
1126
    "vcpus": bep[constants.BE_VCPUS],
1127
    "nics": _NICListToTuple(lu, instance.nics),
1128
    "disk_template": instance.disk_template,
1129
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1130
    "bep": bep,
1131
    "hvp": hvp,
1132
    "hypervisor_name": instance.hypervisor,
1133
    "tags": instance.tags,
1134
  }
1135
  if override:
1136
    args.update(override)
1137
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1138

    
1139

    
1140
def _AdjustCandidatePool(lu, exceptions):
1141
  """Adjust the candidate pool after node operations.
1142

1143
  """
1144
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1145
  if mod_list:
1146
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1147
               utils.CommaJoin(node.name for node in mod_list))
1148
    for name in mod_list:
1149
      lu.context.ReaddNode(name)
1150
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1151
  if mc_now > mc_max:
1152
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1153
               (mc_now, mc_max))
1154

    
1155

    
1156
def _DecideSelfPromotion(lu, exceptions=None):
1157
  """Decide whether I should promote myself as a master candidate.
1158

1159
  """
1160
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1161
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1162
  # the new node will increase mc_max with one, so:
1163
  mc_should = min(mc_should + 1, cp_size)
1164
  return mc_now < mc_should
1165

    
1166

    
1167
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1168
  """Check that the brigdes needed by a list of nics exist.
1169

1170
  """
1171
  cluster = lu.cfg.GetClusterInfo()
1172
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1173
  brlist = [params[constants.NIC_LINK] for params in paramslist
1174
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1175
  if brlist:
1176
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1177
    result.Raise("Error checking bridges on destination node '%s'" %
1178
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1179

    
1180

    
1181
def _CheckInstanceBridgesExist(lu, instance, node=None):
1182
  """Check that the brigdes needed by an instance exist.
1183

1184
  """
1185
  if node is None:
1186
    node = instance.primary_node
1187
  _CheckNicsBridgesExist(lu, instance.nics, node)
1188

    
1189

    
1190
def _CheckOSVariant(os_obj, name):
1191
  """Check whether an OS name conforms to the os variants specification.
1192

1193
  @type os_obj: L{objects.OS}
1194
  @param os_obj: OS object to check
1195
  @type name: string
1196
  @param name: OS name passed by the user, to check for validity
1197

1198
  """
1199
  variant = objects.OS.GetVariant(name)
1200
  if not os_obj.supported_variants:
1201
    if variant:
1202
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1203
                                 " passed)" % (os_obj.name, variant),
1204
                                 errors.ECODE_INVAL)
1205
    return
1206
  if not variant:
1207
    raise errors.OpPrereqError("OS name must include a variant",
1208
                               errors.ECODE_INVAL)
1209

    
1210
  if variant not in os_obj.supported_variants:
1211
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1212

    
1213

    
1214
def _GetNodeInstancesInner(cfg, fn):
1215
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1216

    
1217

    
1218
def _GetNodeInstances(cfg, node_name):
1219
  """Returns a list of all primary and secondary instances on a node.
1220

1221
  """
1222

    
1223
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1224

    
1225

    
1226
def _GetNodePrimaryInstances(cfg, node_name):
1227
  """Returns primary instances on a node.
1228

1229
  """
1230
  return _GetNodeInstancesInner(cfg,
1231
                                lambda inst: node_name == inst.primary_node)
1232

    
1233

    
1234
def _GetNodeSecondaryInstances(cfg, node_name):
1235
  """Returns secondary instances on a node.
1236

1237
  """
1238
  return _GetNodeInstancesInner(cfg,
1239
                                lambda inst: node_name in inst.secondary_nodes)
1240

    
1241

    
1242
def _GetStorageTypeArgs(cfg, storage_type):
1243
  """Returns the arguments for a storage type.
1244

1245
  """
1246
  # Special case for file storage
1247
  if storage_type == constants.ST_FILE:
1248
    # storage.FileStorage wants a list of storage directories
1249
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1250

    
1251
  return []
1252

    
1253

    
1254
def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1255
  faulty = []
1256

    
1257
  for dev in instance.disks:
1258
    cfg.SetDiskID(dev, node_name)
1259

    
1260
  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1261
  result.Raise("Failed to get disk status from node %s" % node_name,
1262
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1263

    
1264
  for idx, bdev_status in enumerate(result.payload):
1265
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1266
      faulty.append(idx)
1267

    
1268
  return faulty
1269

    
1270

    
1271
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1272
  """Check the sanity of iallocator and node arguments and use the
1273
  cluster-wide iallocator if appropriate.
1274

1275
  Check that at most one of (iallocator, node) is specified. If none is
1276
  specified, then the LU's opcode's iallocator slot is filled with the
1277
  cluster-wide default iallocator.
1278

1279
  @type iallocator_slot: string
1280
  @param iallocator_slot: the name of the opcode iallocator slot
1281
  @type node_slot: string
1282
  @param node_slot: the name of the opcode target node slot
1283

1284
  """
1285
  node = getattr(lu.op, node_slot, None)
1286
  iallocator = getattr(lu.op, iallocator_slot, None)
1287

    
1288
  if node is not None and iallocator is not None:
1289
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1290
                               errors.ECODE_INVAL)
1291
  elif node is None and iallocator is None:
1292
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1293
    if default_iallocator:
1294
      setattr(lu.op, iallocator_slot, default_iallocator)
1295
    else:
1296
      raise errors.OpPrereqError("No iallocator or node given and no"
1297
                                 " cluster-wide default iallocator found;"
1298
                                 " please specify either an iallocator or a"
1299
                                 " node, or set a cluster-wide default"
1300
                                 " iallocator")
1301

    
1302

    
1303
def _GetDefaultIAllocator(cfg, iallocator):
1304
  """Decides on which iallocator to use.
1305

1306
  @type cfg: L{config.ConfigWriter}
1307
  @param cfg: Cluster configuration object
1308
  @type iallocator: string or None
1309
  @param iallocator: Iallocator specified in opcode
1310
  @rtype: string
1311
  @return: Iallocator name
1312

1313
  """
1314
  if not iallocator:
1315
    # Use default iallocator
1316
    iallocator = cfg.GetDefaultIAllocator()
1317

    
1318
  if not iallocator:
1319
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1320
                               " opcode nor as a cluster-wide default",
1321
                               errors.ECODE_INVAL)
1322

    
1323
  return iallocator
1324

    
1325

    
1326
class LUClusterPostInit(LogicalUnit):
1327
  """Logical unit for running hooks after cluster initialization.
1328

1329
  """
1330
  HPATH = "cluster-init"
1331
  HTYPE = constants.HTYPE_CLUSTER
1332

    
1333
  def BuildHooksEnv(self):
1334
    """Build hooks env.
1335

1336
    """
1337
    return {
1338
      "OP_TARGET": self.cfg.GetClusterName(),
1339
      }
1340

    
1341
  def BuildHooksNodes(self):
1342
    """Build hooks nodes.
1343

1344
    """
1345
    return ([], [self.cfg.GetMasterNode()])
1346

    
1347
  def Exec(self, feedback_fn):
1348
    """Nothing to do.
1349

1350
    """
1351
    return True
1352

    
1353

    
1354
class LUClusterDestroy(LogicalUnit):
1355
  """Logical unit for destroying the cluster.
1356

1357
  """
1358
  HPATH = "cluster-destroy"
1359
  HTYPE = constants.HTYPE_CLUSTER
1360

    
1361
  def BuildHooksEnv(self):
1362
    """Build hooks env.
1363

1364
    """
1365
    return {
1366
      "OP_TARGET": self.cfg.GetClusterName(),
1367
      }
1368

    
1369
  def BuildHooksNodes(self):
1370
    """Build hooks nodes.
1371

1372
    """
1373
    return ([], [])
1374

    
1375
  def CheckPrereq(self):
1376
    """Check prerequisites.
1377

1378
    This checks whether the cluster is empty.
1379

1380
    Any errors are signaled by raising errors.OpPrereqError.
1381

1382
    """
1383
    master = self.cfg.GetMasterNode()
1384

    
1385
    nodelist = self.cfg.GetNodeList()
1386
    if len(nodelist) != 1 or nodelist[0] != master:
1387
      raise errors.OpPrereqError("There are still %d node(s) in"
1388
                                 " this cluster." % (len(nodelist) - 1),
1389
                                 errors.ECODE_INVAL)
1390
    instancelist = self.cfg.GetInstanceList()
1391
    if instancelist:
1392
      raise errors.OpPrereqError("There are still %d instance(s) in"
1393
                                 " this cluster." % len(instancelist),
1394
                                 errors.ECODE_INVAL)
1395

    
1396
  def Exec(self, feedback_fn):
1397
    """Destroys the cluster.
1398

1399
    """
1400
    master_params = self.cfg.GetMasterNetworkParameters()
1401

    
1402
    # Run post hooks on master node before it's removed
1403
    _RunPostHook(self, master_params.name)
1404

    
1405
    ems = self.cfg.GetUseExternalMipScript()
1406
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1407
                                                     master_params, ems)
1408
    result.Raise("Could not disable the master role")
1409

    
1410
    return master_params.name
1411

    
1412

    
1413
def _VerifyCertificate(filename):
1414
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1415

1416
  @type filename: string
1417
  @param filename: Path to PEM file
1418

1419
  """
1420
  try:
1421
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1422
                                           utils.ReadFile(filename))
1423
  except Exception, err: # pylint: disable=W0703
1424
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1425
            "Failed to load X509 certificate %s: %s" % (filename, err))
1426

    
1427
  (errcode, msg) = \
1428
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1429
                                constants.SSL_CERT_EXPIRATION_ERROR)
1430

    
1431
  if msg:
1432
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1433
  else:
1434
    fnamemsg = None
1435

    
1436
  if errcode is None:
1437
    return (None, fnamemsg)
1438
  elif errcode == utils.CERT_WARNING:
1439
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1440
  elif errcode == utils.CERT_ERROR:
1441
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1442

    
1443
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1444

    
1445

    
1446
def _GetAllHypervisorParameters(cluster, instances):
1447
  """Compute the set of all hypervisor parameters.
1448

1449
  @type cluster: L{objects.Cluster}
1450
  @param cluster: the cluster object
1451
  @param instances: list of L{objects.Instance}
1452
  @param instances: additional instances from which to obtain parameters
1453
  @rtype: list of (origin, hypervisor, parameters)
1454
  @return: a list with all parameters found, indicating the hypervisor they
1455
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1456

1457
  """
1458
  hvp_data = []
1459

    
1460
  for hv_name in cluster.enabled_hypervisors:
1461
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1462

    
1463
  for os_name, os_hvp in cluster.os_hvp.items():
1464
    for hv_name, hv_params in os_hvp.items():
1465
      if hv_params:
1466
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1467
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1468

    
1469
  # TODO: collapse identical parameter values in a single one
1470
  for instance in instances:
1471
    if instance.hvparams:
1472
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1473
                       cluster.FillHV(instance)))
1474

    
1475
  return hvp_data
1476

    
1477

    
1478
class _VerifyErrors(object):
1479
  """Mix-in for cluster/group verify LUs.
1480

1481
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1482
  self.op and self._feedback_fn to be available.)
1483

1484
  """
1485

    
1486
  ETYPE_FIELD = "code"
1487
  ETYPE_ERROR = "ERROR"
1488
  ETYPE_WARNING = "WARNING"
1489

    
1490
  def _Error(self, ecode, item, msg, *args, **kwargs):
1491
    """Format an error message.
1492

1493
    Based on the opcode's error_codes parameter, either format a
1494
    parseable error code, or a simpler error string.
1495

1496
    This must be called only from Exec and functions called from Exec.
1497

1498
    """
1499
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1500
    itype, etxt, _ = ecode
1501
    # first complete the msg
1502
    if args:
1503
      msg = msg % args
1504
    # then format the whole message
1505
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1506
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1507
    else:
1508
      if item:
1509
        item = " " + item
1510
      else:
1511
        item = ""
1512
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1513
    # and finally report it via the feedback_fn
1514
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1515

    
1516
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1517
    """Log an error message if the passed condition is True.
1518

1519
    """
1520
    cond = (bool(cond)
1521
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1522

    
1523
    # If the error code is in the list of ignored errors, demote the error to a
1524
    # warning
1525
    (_, etxt, _) = ecode
1526
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1527
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1528

    
1529
    if cond:
1530
      self._Error(ecode, *args, **kwargs)
1531

    
1532
    # do not mark the operation as failed for WARN cases only
1533
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1534
      self.bad = self.bad or cond
1535

    
1536

    
1537
class LUClusterVerify(NoHooksLU):
1538
  """Submits all jobs necessary to verify the cluster.
1539

1540
  """
1541
  REQ_BGL = False
1542

    
1543
  def ExpandNames(self):
1544
    self.needed_locks = {}
1545

    
1546
  def Exec(self, feedback_fn):
1547
    jobs = []
1548

    
1549
    if self.op.group_name:
1550
      groups = [self.op.group_name]
1551
      depends_fn = lambda: None
1552
    else:
1553
      groups = self.cfg.GetNodeGroupList()
1554

    
1555
      # Verify global configuration
1556
      jobs.append([
1557
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1558
        ])
1559

    
1560
      # Always depend on global verification
1561
      depends_fn = lambda: [(-len(jobs), [])]
1562

    
1563
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1564
                                            ignore_errors=self.op.ignore_errors,
1565
                                            depends=depends_fn())]
1566
                for group in groups)
1567

    
1568
    # Fix up all parameters
1569
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1570
      op.debug_simulate_errors = self.op.debug_simulate_errors
1571
      op.verbose = self.op.verbose
1572
      op.error_codes = self.op.error_codes
1573
      try:
1574
        op.skip_checks = self.op.skip_checks
1575
      except AttributeError:
1576
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1577

    
1578
    return ResultWithJobs(jobs)
1579

    
1580

    
1581
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1582
  """Verifies the cluster config.
1583

1584
  """
1585
  REQ_BGL = True
1586

    
1587
  def _VerifyHVP(self, hvp_data):
1588
    """Verifies locally the syntax of the hypervisor parameters.
1589

1590
    """
1591
    for item, hv_name, hv_params in hvp_data:
1592
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1593
             (item, hv_name))
1594
      try:
1595
        hv_class = hypervisor.GetHypervisor(hv_name)
1596
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1597
        hv_class.CheckParameterSyntax(hv_params)
1598
      except errors.GenericError, err:
1599
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1600

    
1601
  def ExpandNames(self):
1602
    # Information can be safely retrieved as the BGL is acquired in exclusive
1603
    # mode
1604
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1605
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1606
    self.all_node_info = self.cfg.GetAllNodesInfo()
1607
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1608
    self.needed_locks = {}
1609

    
1610
  def Exec(self, feedback_fn):
1611
    """Verify integrity of cluster, performing various test on nodes.
1612

1613
    """
1614
    self.bad = False
1615
    self._feedback_fn = feedback_fn
1616

    
1617
    feedback_fn("* Verifying cluster config")
1618

    
1619
    for msg in self.cfg.VerifyConfig():
1620
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1621

    
1622
    feedback_fn("* Verifying cluster certificate files")
1623

    
1624
    for cert_filename in constants.ALL_CERT_FILES:
1625
      (errcode, msg) = _VerifyCertificate(cert_filename)
1626
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1627

    
1628
    feedback_fn("* Verifying hypervisor parameters")
1629

    
1630
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1631
                                                self.all_inst_info.values()))
1632

    
1633
    feedback_fn("* Verifying all nodes belong to an existing group")
1634

    
1635
    # We do this verification here because, should this bogus circumstance
1636
    # occur, it would never be caught by VerifyGroup, which only acts on
1637
    # nodes/instances reachable from existing node groups.
1638

    
1639
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1640
                         if node.group not in self.all_group_info)
1641

    
1642
    dangling_instances = {}
1643
    no_node_instances = []
1644

    
1645
    for inst in self.all_inst_info.values():
1646
      if inst.primary_node in dangling_nodes:
1647
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1648
      elif inst.primary_node not in self.all_node_info:
1649
        no_node_instances.append(inst.name)
1650

    
1651
    pretty_dangling = [
1652
        "%s (%s)" %
1653
        (node.name,
1654
         utils.CommaJoin(dangling_instances.get(node.name,
1655
                                                ["no instances"])))
1656
        for node in dangling_nodes]
1657

    
1658
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1659
                  None,
1660
                  "the following nodes (and their instances) belong to a non"
1661
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1662

    
1663
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1664
                  None,
1665
                  "the following instances have a non-existing primary-node:"
1666
                  " %s", utils.CommaJoin(no_node_instances))
1667

    
1668
    return not self.bad
1669

    
1670

    
1671
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1672
  """Verifies the status of a node group.
1673

1674
  """
1675
  HPATH = "cluster-verify"
1676
  HTYPE = constants.HTYPE_CLUSTER
1677
  REQ_BGL = False
1678

    
1679
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1680

    
1681
  class NodeImage(object):
1682
    """A class representing the logical and physical status of a node.
1683

1684
    @type name: string
1685
    @ivar name: the node name to which this object refers
1686
    @ivar volumes: a structure as returned from
1687
        L{ganeti.backend.GetVolumeList} (runtime)
1688
    @ivar instances: a list of running instances (runtime)
1689
    @ivar pinst: list of configured primary instances (config)
1690
    @ivar sinst: list of configured secondary instances (config)
1691
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1692
        instances for which this node is secondary (config)
1693
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1694
    @ivar dfree: free disk, as reported by the node (runtime)
1695
    @ivar offline: the offline status (config)
1696
    @type rpc_fail: boolean
1697
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1698
        not whether the individual keys were correct) (runtime)
1699
    @type lvm_fail: boolean
1700
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1701
    @type hyp_fail: boolean
1702
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1703
    @type ghost: boolean
1704
    @ivar ghost: whether this is a known node or not (config)
1705
    @type os_fail: boolean
1706
    @ivar os_fail: whether the RPC call didn't return valid OS data
1707
    @type oslist: list
1708
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1709
    @type vm_capable: boolean
1710
    @ivar vm_capable: whether the node can host instances
1711

1712
    """
1713
    def __init__(self, offline=False, name=None, vm_capable=True):
1714
      self.name = name
1715
      self.volumes = {}
1716
      self.instances = []
1717
      self.pinst = []
1718
      self.sinst = []
1719
      self.sbp = {}
1720
      self.mfree = 0
1721
      self.dfree = 0
1722
      self.offline = offline
1723
      self.vm_capable = vm_capable
1724
      self.rpc_fail = False
1725
      self.lvm_fail = False
1726
      self.hyp_fail = False
1727
      self.ghost = False
1728
      self.os_fail = False
1729
      self.oslist = {}
1730

    
1731
  def ExpandNames(self):
1732
    # This raises errors.OpPrereqError on its own:
1733
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1734

    
1735
    # Get instances in node group; this is unsafe and needs verification later
1736
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1737

    
1738
    self.needed_locks = {
1739
      locking.LEVEL_INSTANCE: inst_names,
1740
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1741
      locking.LEVEL_NODE: [],
1742
      }
1743

    
1744
    self.share_locks = _ShareAll()
1745

    
1746
  def DeclareLocks(self, level):
1747
    if level == locking.LEVEL_NODE:
1748
      # Get members of node group; this is unsafe and needs verification later
1749
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1750

    
1751
      all_inst_info = self.cfg.GetAllInstancesInfo()
1752

    
1753
      # In Exec(), we warn about mirrored instances that have primary and
1754
      # secondary living in separate node groups. To fully verify that
1755
      # volumes for these instances are healthy, we will need to do an
1756
      # extra call to their secondaries. We ensure here those nodes will
1757
      # be locked.
1758
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1759
        # Important: access only the instances whose lock is owned
1760
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1761
          nodes.update(all_inst_info[inst].secondary_nodes)
1762

    
1763
      self.needed_locks[locking.LEVEL_NODE] = nodes
1764

    
1765
  def CheckPrereq(self):
1766
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1767
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1768

    
1769
    group_nodes = set(self.group_info.members)
1770
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1771

    
1772
    unlocked_nodes = \
1773
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1774

    
1775
    unlocked_instances = \
1776
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1777

    
1778
    if unlocked_nodes:
1779
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1780
                                 utils.CommaJoin(unlocked_nodes))
1781

    
1782
    if unlocked_instances:
1783
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1784
                                 utils.CommaJoin(unlocked_instances))
1785

    
1786
    self.all_node_info = self.cfg.GetAllNodesInfo()
1787
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1788

    
1789
    self.my_node_names = utils.NiceSort(group_nodes)
1790
    self.my_inst_names = utils.NiceSort(group_instances)
1791

    
1792
    self.my_node_info = dict((name, self.all_node_info[name])
1793
                             for name in self.my_node_names)
1794

    
1795
    self.my_inst_info = dict((name, self.all_inst_info[name])
1796
                             for name in self.my_inst_names)
1797

    
1798
    # We detect here the nodes that will need the extra RPC calls for verifying
1799
    # split LV volumes; they should be locked.
1800
    extra_lv_nodes = set()
1801

    
1802
    for inst in self.my_inst_info.values():
1803
      if inst.disk_template in constants.DTS_INT_MIRROR:
1804
        group = self.my_node_info[inst.primary_node].group
1805
        for nname in inst.secondary_nodes:
1806
          if self.all_node_info[nname].group != group:
1807
            extra_lv_nodes.add(nname)
1808

    
1809
    unlocked_lv_nodes = \
1810
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1811

    
1812
    if unlocked_lv_nodes:
1813
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1814
                                 utils.CommaJoin(unlocked_lv_nodes))
1815
    self.extra_lv_nodes = list(extra_lv_nodes)
1816

    
1817
  def _VerifyNode(self, ninfo, nresult):
1818
    """Perform some basic validation on data returned from a node.
1819

1820
      - check the result data structure is well formed and has all the
1821
        mandatory fields
1822
      - check ganeti version
1823

1824
    @type ninfo: L{objects.Node}
1825
    @param ninfo: the node to check
1826
    @param nresult: the results from the node
1827
    @rtype: boolean
1828
    @return: whether overall this call was successful (and we can expect
1829
         reasonable values in the respose)
1830

1831
    """
1832
    node = ninfo.name
1833
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1834

    
1835
    # main result, nresult should be a non-empty dict
1836
    test = not nresult or not isinstance(nresult, dict)
1837
    _ErrorIf(test, constants.CV_ENODERPC, node,
1838
                  "unable to verify node: no data returned")
1839
    if test:
1840
      return False
1841

    
1842
    # compares ganeti version
1843
    local_version = constants.PROTOCOL_VERSION
1844
    remote_version = nresult.get("version", None)
1845
    test = not (remote_version and
1846
                isinstance(remote_version, (list, tuple)) and
1847
                len(remote_version) == 2)
1848
    _ErrorIf(test, constants.CV_ENODERPC, node,
1849
             "connection to node returned invalid data")
1850
    if test:
1851
      return False
1852

    
1853
    test = local_version != remote_version[0]
1854
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1855
             "incompatible protocol versions: master %s,"
1856
             " node %s", local_version, remote_version[0])
1857
    if test:
1858
      return False
1859

    
1860
    # node seems compatible, we can actually try to look into its results
1861

    
1862
    # full package version
1863
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1864
                  constants.CV_ENODEVERSION, node,
1865
                  "software version mismatch: master %s, node %s",
1866
                  constants.RELEASE_VERSION, remote_version[1],
1867
                  code=self.ETYPE_WARNING)
1868

    
1869
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1870
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1871
      for hv_name, hv_result in hyp_result.iteritems():
1872
        test = hv_result is not None
1873
        _ErrorIf(test, constants.CV_ENODEHV, node,
1874
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1875

    
1876
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1877
    if ninfo.vm_capable and isinstance(hvp_result, list):
1878
      for item, hv_name, hv_result in hvp_result:
1879
        _ErrorIf(True, constants.CV_ENODEHV, node,
1880
                 "hypervisor %s parameter verify failure (source %s): %s",
1881
                 hv_name, item, hv_result)
1882

    
1883
    test = nresult.get(constants.NV_NODESETUP,
1884
                       ["Missing NODESETUP results"])
1885
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1886
             "; ".join(test))
1887

    
1888
    return True
1889

    
1890
  def _VerifyNodeTime(self, ninfo, nresult,
1891
                      nvinfo_starttime, nvinfo_endtime):
1892
    """Check the node time.
1893

1894
    @type ninfo: L{objects.Node}
1895
    @param ninfo: the node to check
1896
    @param nresult: the remote results for the node
1897
    @param nvinfo_starttime: the start time of the RPC call
1898
    @param nvinfo_endtime: the end time of the RPC call
1899

1900
    """
1901
    node = ninfo.name
1902
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1903

    
1904
    ntime = nresult.get(constants.NV_TIME, None)
1905
    try:
1906
      ntime_merged = utils.MergeTime(ntime)
1907
    except (ValueError, TypeError):
1908
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1909
      return
1910

    
1911
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1912
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1913
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1914
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1915
    else:
1916
      ntime_diff = None
1917

    
1918
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1919
             "Node time diverges by at least %s from master node time",
1920
             ntime_diff)
1921

    
1922
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1923
    """Check the node LVM results.
1924

1925
    @type ninfo: L{objects.Node}
1926
    @param ninfo: the node to check
1927
    @param nresult: the remote results for the node
1928
    @param vg_name: the configured VG name
1929

1930
    """
1931
    if vg_name is None:
1932
      return
1933

    
1934
    node = ninfo.name
1935
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1936

    
1937
    # checks vg existence and size > 20G
1938
    vglist = nresult.get(constants.NV_VGLIST, None)
1939
    test = not vglist
1940
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1941
    if not test:
1942
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1943
                                            constants.MIN_VG_SIZE)
1944
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1945

    
1946
    # check pv names
1947
    pvlist = nresult.get(constants.NV_PVLIST, None)
1948
    test = pvlist is None
1949
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1950
    if not test:
1951
      # check that ':' is not present in PV names, since it's a
1952
      # special character for lvcreate (denotes the range of PEs to
1953
      # use on the PV)
1954
      for _, pvname, owner_vg in pvlist:
1955
        test = ":" in pvname
1956
        _ErrorIf(test, constants.CV_ENODELVM, node,
1957
                 "Invalid character ':' in PV '%s' of VG '%s'",
1958
                 pvname, owner_vg)
1959

    
1960
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1961
    """Check the node bridges.
1962

1963
    @type ninfo: L{objects.Node}
1964
    @param ninfo: the node to check
1965
    @param nresult: the remote results for the node
1966
    @param bridges: the expected list of bridges
1967

1968
    """
1969
    if not bridges:
1970
      return
1971

    
1972
    node = ninfo.name
1973
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1974

    
1975
    missing = nresult.get(constants.NV_BRIDGES, None)
1976
    test = not isinstance(missing, list)
1977
    _ErrorIf(test, constants.CV_ENODENET, node,
1978
             "did not return valid bridge information")
1979
    if not test:
1980
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1981
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1982

    
1983
  def _VerifyNodeUserScripts(self, ninfo, nresult):
1984
    """Check the results of user scripts presence and executability on the node
1985

1986
    @type ninfo: L{objects.Node}
1987
    @param ninfo: the node to check
1988
    @param nresult: the remote results for the node
1989

1990
    """
1991
    node = ninfo.name
1992

    
1993
    test = not constants.NV_USERSCRIPTS in nresult
1994
    self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
1995
                  "did not return user scripts information")
1996

    
1997
    broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
1998
    if not test:
1999
      self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2000
                    "user scripts not present or not executable: %s" %
2001
                    utils.CommaJoin(sorted(broken_scripts)))
2002

    
2003
  def _VerifyNodeNetwork(self, ninfo, nresult):
2004
    """Check the node network connectivity results.
2005

2006
    @type ninfo: L{objects.Node}
2007
    @param ninfo: the node to check
2008
    @param nresult: the remote results for the node
2009

2010
    """
2011
    node = ninfo.name
2012
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2013

    
2014
    test = constants.NV_NODELIST not in nresult
2015
    _ErrorIf(test, constants.CV_ENODESSH, node,
2016
             "node hasn't returned node ssh connectivity data")
2017
    if not test:
2018
      if nresult[constants.NV_NODELIST]:
2019
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2020
          _ErrorIf(True, constants.CV_ENODESSH, node,
2021
                   "ssh communication with node '%s': %s", a_node, a_msg)
2022

    
2023
    test = constants.NV_NODENETTEST not in nresult
2024
    _ErrorIf(test, constants.CV_ENODENET, node,
2025
             "node hasn't returned node tcp connectivity data")
2026
    if not test:
2027
      if nresult[constants.NV_NODENETTEST]:
2028
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2029
        for anode in nlist:
2030
          _ErrorIf(True, constants.CV_ENODENET, node,
2031
                   "tcp communication with node '%s': %s",
2032
                   anode, nresult[constants.NV_NODENETTEST][anode])
2033

    
2034
    test = constants.NV_MASTERIP not in nresult
2035
    _ErrorIf(test, constants.CV_ENODENET, node,
2036
             "node hasn't returned node master IP reachability data")
2037
    if not test:
2038
      if not nresult[constants.NV_MASTERIP]:
2039
        if node == self.master_node:
2040
          msg = "the master node cannot reach the master IP (not configured?)"
2041
        else:
2042
          msg = "cannot reach the master IP"
2043
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
2044

    
2045
  def _VerifyInstance(self, instance, instanceconfig, node_image,
2046
                      diskstatus):
2047
    """Verify an instance.
2048

2049
    This function checks to see if the required block devices are
2050
    available on the instance's node.
2051

2052
    """
2053
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2054
    node_current = instanceconfig.primary_node
2055

    
2056
    node_vol_should = {}
2057
    instanceconfig.MapLVsByNode(node_vol_should)
2058

    
2059
    for node in node_vol_should:
2060
      n_img = node_image[node]
2061
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2062
        # ignore missing volumes on offline or broken nodes
2063
        continue
2064
      for volume in node_vol_should[node]:
2065
        test = volume not in n_img.volumes
2066
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2067
                 "volume %s missing on node %s", volume, node)
2068

    
2069
    if instanceconfig.admin_state == constants.ADMINST_UP:
2070
      pri_img = node_image[node_current]
2071
      test = instance not in pri_img.instances and not pri_img.offline
2072
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2073
               "instance not running on its primary node %s",
2074
               node_current)
2075

    
2076
    diskdata = [(nname, success, status, idx)
2077
                for (nname, disks) in diskstatus.items()
2078
                for idx, (success, status) in enumerate(disks)]
2079

    
2080
    for nname, success, bdev_status, idx in diskdata:
2081
      # the 'ghost node' construction in Exec() ensures that we have a
2082
      # node here
2083
      snode = node_image[nname]
2084
      bad_snode = snode.ghost or snode.offline
2085
      _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2086
               not success and not bad_snode,
2087
               constants.CV_EINSTANCEFAULTYDISK, instance,
2088
               "couldn't retrieve status for disk/%s on %s: %s",
2089
               idx, nname, bdev_status)
2090
      _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2091
                success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2092
               constants.CV_EINSTANCEFAULTYDISK, instance,
2093
               "disk/%s on %s is faulty", idx, nname)
2094

    
2095
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2096
    """Verify if there are any unknown volumes in the cluster.
2097

2098
    The .os, .swap and backup volumes are ignored. All other volumes are
2099
    reported as unknown.
2100

2101
    @type reserved: L{ganeti.utils.FieldSet}
2102
    @param reserved: a FieldSet of reserved volume names
2103

2104
    """
2105
    for node, n_img in node_image.items():
2106
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2107
        # skip non-healthy nodes
2108
        continue
2109
      for volume in n_img.volumes:
2110
        test = ((node not in node_vol_should or
2111
                volume not in node_vol_should[node]) and
2112
                not reserved.Matches(volume))
2113
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2114
                      "volume %s is unknown", volume)
2115

    
2116
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2117
    """Verify N+1 Memory Resilience.
2118

2119
    Check that if one single node dies we can still start all the
2120
    instances it was primary for.
2121

2122
    """
2123
    cluster_info = self.cfg.GetClusterInfo()
2124
    for node, n_img in node_image.items():
2125
      # This code checks that every node which is now listed as
2126
      # secondary has enough memory to host all instances it is
2127
      # supposed to should a single other node in the cluster fail.
2128
      # FIXME: not ready for failover to an arbitrary node
2129
      # FIXME: does not support file-backed instances
2130
      # WARNING: we currently take into account down instances as well
2131
      # as up ones, considering that even if they're down someone
2132
      # might want to start them even in the event of a node failure.
2133
      if n_img.offline:
2134
        # we're skipping offline nodes from the N+1 warning, since
2135
        # most likely we don't have good memory infromation from them;
2136
        # we already list instances living on such nodes, and that's
2137
        # enough warning
2138
        continue
2139
      #TODO(dynmem): use MINMEM for checking
2140
      #TODO(dynmem): also consider ballooning out other instances
2141
      for prinode, instances in n_img.sbp.items():
2142
        needed_mem = 0
2143
        for instance in instances:
2144
          bep = cluster_info.FillBE(instance_cfg[instance])
2145
          if bep[constants.BE_AUTO_BALANCE]:
2146
            needed_mem += bep[constants.BE_MAXMEM]
2147
        test = n_img.mfree < needed_mem
2148
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2149
                      "not enough memory to accomodate instance failovers"
2150
                      " should node %s fail (%dMiB needed, %dMiB available)",
2151
                      prinode, needed_mem, n_img.mfree)
2152

    
2153
  @classmethod
2154
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2155
                   (files_all, files_opt, files_mc, files_vm)):
2156
    """Verifies file checksums collected from all nodes.
2157

2158
    @param errorif: Callback for reporting errors
2159
    @param nodeinfo: List of L{objects.Node} objects
2160
    @param master_node: Name of master node
2161
    @param all_nvinfo: RPC results
2162

2163
    """
2164
    # Define functions determining which nodes to consider for a file
2165
    files2nodefn = [
2166
      (files_all, None),
2167
      (files_mc, lambda node: (node.master_candidate or
2168
                               node.name == master_node)),
2169
      (files_vm, lambda node: node.vm_capable),
2170
      ]
2171

    
2172
    # Build mapping from filename to list of nodes which should have the file
2173
    nodefiles = {}
2174
    for (files, fn) in files2nodefn:
2175
      if fn is None:
2176
        filenodes = nodeinfo
2177
      else:
2178
        filenodes = filter(fn, nodeinfo)
2179
      nodefiles.update((filename,
2180
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2181
                       for filename in files)
2182

    
2183
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2184

    
2185
    fileinfo = dict((filename, {}) for filename in nodefiles)
2186
    ignore_nodes = set()
2187

    
2188
    for node in nodeinfo:
2189
      if node.offline:
2190
        ignore_nodes.add(node.name)
2191
        continue
2192

    
2193
      nresult = all_nvinfo[node.name]
2194

    
2195
      if nresult.fail_msg or not nresult.payload:
2196
        node_files = None
2197
      else:
2198
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2199

    
2200
      test = not (node_files and isinstance(node_files, dict))
2201
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2202
              "Node did not return file checksum data")
2203
      if test:
2204
        ignore_nodes.add(node.name)
2205
        continue
2206

    
2207
      # Build per-checksum mapping from filename to nodes having it
2208
      for (filename, checksum) in node_files.items():
2209
        assert filename in nodefiles
2210
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2211

    
2212
    for (filename, checksums) in fileinfo.items():
2213
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2214

    
2215
      # Nodes having the file
2216
      with_file = frozenset(node_name
2217
                            for nodes in fileinfo[filename].values()
2218
                            for node_name in nodes) - ignore_nodes
2219

    
2220
      expected_nodes = nodefiles[filename] - ignore_nodes
2221

    
2222
      # Nodes missing file
2223
      missing_file = expected_nodes - with_file
2224

    
2225
      if filename in files_opt:
2226
        # All or no nodes
2227
        errorif(missing_file and missing_file != expected_nodes,
2228
                constants.CV_ECLUSTERFILECHECK, None,
2229
                "File %s is optional, but it must exist on all or no"
2230
                " nodes (not found on %s)",
2231
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2232
      else:
2233
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2234
                "File %s is missing from node(s) %s", filename,
2235
                utils.CommaJoin(utils.NiceSort(missing_file)))
2236

    
2237
        # Warn if a node has a file it shouldn't
2238
        unexpected = with_file - expected_nodes
2239
        errorif(unexpected,
2240
                constants.CV_ECLUSTERFILECHECK, None,
2241
                "File %s should not exist on node(s) %s",
2242
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2243

    
2244
      # See if there are multiple versions of the file
2245
      test = len(checksums) > 1
2246
      if test:
2247
        variants = ["variant %s on %s" %
2248
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2249
                    for (idx, (checksum, nodes)) in
2250
                      enumerate(sorted(checksums.items()))]
2251
      else:
2252
        variants = []
2253

    
2254
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2255
              "File %s found with %s different checksums (%s)",
2256
              filename, len(checksums), "; ".join(variants))
2257

    
2258
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2259
                      drbd_map):
2260
    """Verifies and the node DRBD status.
2261

2262
    @type ninfo: L{objects.Node}
2263
    @param ninfo: the node to check
2264
    @param nresult: the remote results for the node
2265
    @param instanceinfo: the dict of instances
2266
    @param drbd_helper: the configured DRBD usermode helper
2267
    @param drbd_map: the DRBD map as returned by
2268
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2269

2270
    """
2271
    node = ninfo.name
2272
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2273

    
2274
    if drbd_helper:
2275
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2276
      test = (helper_result == None)
2277
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2278
               "no drbd usermode helper returned")
2279
      if helper_result:
2280
        status, payload = helper_result
2281
        test = not status
2282
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2283
                 "drbd usermode helper check unsuccessful: %s", payload)
2284
        test = status and (payload != drbd_helper)
2285
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2286
                 "wrong drbd usermode helper: %s", payload)
2287

    
2288
    # compute the DRBD minors
2289
    node_drbd = {}
2290
    for minor, instance in drbd_map[node].items():
2291
      test = instance not in instanceinfo
2292
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2293
               "ghost instance '%s' in temporary DRBD map", instance)
2294
        # ghost instance should not be running, but otherwise we
2295
        # don't give double warnings (both ghost instance and
2296
        # unallocated minor in use)
2297
      if test:
2298
        node_drbd[minor] = (instance, False)
2299
      else:
2300
        instance = instanceinfo[instance]
2301
        node_drbd[minor] = (instance.name,
2302
                            instance.admin_state == constants.ADMINST_UP)
2303

    
2304
    # and now check them
2305
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2306
    test = not isinstance(used_minors, (tuple, list))
2307
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2308
             "cannot parse drbd status file: %s", str(used_minors))
2309
    if test:
2310
      # we cannot check drbd status
2311
      return
2312

    
2313
    for minor, (iname, must_exist) in node_drbd.items():
2314
      test = minor not in used_minors and must_exist
2315
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2316
               "drbd minor %d of instance %s is not active", minor, iname)
2317
    for minor in used_minors:
2318
      test = minor not in node_drbd
2319
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2320
               "unallocated drbd minor %d is in use", minor)
2321

    
2322
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2323
    """Builds the node OS structures.
2324

2325
    @type ninfo: L{objects.Node}
2326
    @param ninfo: the node to check
2327
    @param nresult: the remote results for the node
2328
    @param nimg: the node image object
2329

2330
    """
2331
    node = ninfo.name
2332
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2333

    
2334
    remote_os = nresult.get(constants.NV_OSLIST, None)
2335
    test = (not isinstance(remote_os, list) or
2336
            not compat.all(isinstance(v, list) and len(v) == 7
2337
                           for v in remote_os))
2338

    
2339
    _ErrorIf(test, constants.CV_ENODEOS, node,
2340
             "node hasn't returned valid OS data")
2341

    
2342
    nimg.os_fail = test
2343

    
2344
    if test:
2345
      return
2346

    
2347
    os_dict = {}
2348

    
2349
    for (name, os_path, status, diagnose,
2350
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2351

    
2352
      if name not in os_dict:
2353
        os_dict[name] = []
2354

    
2355
      # parameters is a list of lists instead of list of tuples due to
2356
      # JSON lacking a real tuple type, fix it:
2357
      parameters = [tuple(v) for v in parameters]
2358
      os_dict[name].append((os_path, status, diagnose,
2359
                            set(variants), set(parameters), set(api_ver)))
2360

    
2361
    nimg.oslist = os_dict
2362

    
2363
  def _VerifyNodeOS(self, ninfo, nimg, base):
2364
    """Verifies the node OS list.
2365

2366
    @type ninfo: L{objects.Node}
2367
    @param ninfo: the node to check
2368
    @param nimg: the node image object
2369
    @param base: the 'template' node we match against (e.g. from the master)
2370

2371
    """
2372
    node = ninfo.name
2373
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2374

    
2375
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2376

    
2377
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2378
    for os_name, os_data in nimg.oslist.items():
2379
      assert os_data, "Empty OS status for OS %s?!" % os_name
2380
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2381
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2382
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2383
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2384
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2385
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2386
      # comparisons with the 'base' image
2387
      test = os_name not in base.oslist
2388
      _ErrorIf(test, constants.CV_ENODEOS, node,
2389
               "Extra OS %s not present on reference node (%s)",
2390
               os_name, base.name)
2391
      if test:
2392
        continue
2393
      assert base.oslist[os_name], "Base node has empty OS status?"
2394
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2395
      if not b_status:
2396
        # base OS is invalid, skipping
2397
        continue
2398
      for kind, a, b in [("API version", f_api, b_api),
2399
                         ("variants list", f_var, b_var),
2400
                         ("parameters", beautify_params(f_param),
2401
                          beautify_params(b_param))]:
2402
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2403
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2404
                 kind, os_name, base.name,
2405
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2406

    
2407
    # check any missing OSes
2408
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2409
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2410
             "OSes present on reference node %s but missing on this node: %s",
2411
             base.name, utils.CommaJoin(missing))
2412

    
2413
  def _VerifyOob(self, ninfo, nresult):
2414
    """Verifies out of band functionality of a node.
2415

2416
    @type ninfo: L{objects.Node}
2417
    @param ninfo: the node to check
2418
    @param nresult: the remote results for the node
2419

2420
    """
2421
    node = ninfo.name
2422
    # We just have to verify the paths on master and/or master candidates
2423
    # as the oob helper is invoked on the master
2424
    if ((ninfo.master_candidate or ninfo.master_capable) and
2425
        constants.NV_OOB_PATHS in nresult):
2426
      for path_result in nresult[constants.NV_OOB_PATHS]:
2427
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2428

    
2429
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2430
    """Verifies and updates the node volume data.
2431

2432
    This function will update a L{NodeImage}'s internal structures
2433
    with data from the remote call.
2434

2435
    @type ninfo: L{objects.Node}
2436
    @param ninfo: the node to check
2437
    @param nresult: the remote results for the node
2438
    @param nimg: the node image object
2439
    @param vg_name: the configured VG name
2440

2441
    """
2442
    node = ninfo.name
2443
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2444

    
2445
    nimg.lvm_fail = True
2446
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2447
    if vg_name is None:
2448
      pass
2449
    elif isinstance(lvdata, basestring):
2450
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2451
               utils.SafeEncode(lvdata))
2452
    elif not isinstance(lvdata, dict):
2453
      _ErrorIf(True, constants.CV_ENODELVM, node,
2454
               "rpc call to node failed (lvlist)")
2455
    else:
2456
      nimg.volumes = lvdata
2457
      nimg.lvm_fail = False
2458

    
2459
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2460
    """Verifies and updates the node instance list.
2461

2462
    If the listing was successful, then updates this node's instance
2463
    list. Otherwise, it marks the RPC call as failed for the instance
2464
    list key.
2465

2466
    @type ninfo: L{objects.Node}
2467
    @param ninfo: the node to check
2468
    @param nresult: the remote results for the node
2469
    @param nimg: the node image object
2470

2471
    """
2472
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2473
    test = not isinstance(idata, list)
2474
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2475
                  "rpc call to node failed (instancelist): %s",
2476
                  utils.SafeEncode(str(idata)))
2477
    if test:
2478
      nimg.hyp_fail = True
2479
    else:
2480
      nimg.instances = idata
2481

    
2482
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2483
    """Verifies and computes a node information map
2484

2485
    @type ninfo: L{objects.Node}
2486
    @param ninfo: the node to check
2487
    @param nresult: the remote results for the node
2488
    @param nimg: the node image object
2489
    @param vg_name: the configured VG name
2490

2491
    """
2492
    node = ninfo.name
2493
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2494

    
2495
    # try to read free memory (from the hypervisor)
2496
    hv_info = nresult.get(constants.NV_HVINFO, None)
2497
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2498
    _ErrorIf(test, constants.CV_ENODEHV, node,
2499
             "rpc call to node failed (hvinfo)")
2500
    if not test:
2501
      try:
2502
        nimg.mfree = int(hv_info["memory_free"])
2503
      except (ValueError, TypeError):
2504
        _ErrorIf(True, constants.CV_ENODERPC, node,
2505
                 "node returned invalid nodeinfo, check hypervisor")
2506

    
2507
    # FIXME: devise a free space model for file based instances as well
2508
    if vg_name is not None:
2509
      test = (constants.NV_VGLIST not in nresult or
2510
              vg_name not in nresult[constants.NV_VGLIST])
2511
      _ErrorIf(test, constants.CV_ENODELVM, node,
2512
               "node didn't return data for the volume group '%s'"
2513
               " - it is either missing or broken", vg_name)
2514
      if not test:
2515
        try:
2516
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2517
        except (ValueError, TypeError):
2518
          _ErrorIf(True, constants.CV_ENODERPC, node,
2519
                   "node returned invalid LVM info, check LVM status")
2520

    
2521
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2522
    """Gets per-disk status information for all instances.
2523

2524
    @type nodelist: list of strings
2525
    @param nodelist: Node names
2526
    @type node_image: dict of (name, L{objects.Node})
2527
    @param node_image: Node objects
2528
    @type instanceinfo: dict of (name, L{objects.Instance})
2529
    @param instanceinfo: Instance objects
2530
    @rtype: {instance: {node: [(succes, payload)]}}
2531
    @return: a dictionary of per-instance dictionaries with nodes as
2532
        keys and disk information as values; the disk information is a
2533
        list of tuples (success, payload)
2534

2535
    """
2536
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2537

    
2538
    node_disks = {}
2539
    node_disks_devonly = {}
2540
    diskless_instances = set()
2541
    diskless = constants.DT_DISKLESS
2542

    
2543
    for nname in nodelist:
2544
      node_instances = list(itertools.chain(node_image[nname].pinst,
2545
                                            node_image[nname].sinst))
2546
      diskless_instances.update(inst for inst in node_instances
2547
                                if instanceinfo[inst].disk_template == diskless)
2548
      disks = [(inst, disk)
2549
               for inst in node_instances
2550
               for disk in instanceinfo[inst].disks]
2551

    
2552
      if not disks:
2553
        # No need to collect data
2554
        continue
2555

    
2556
      node_disks[nname] = disks
2557

    
2558
      # Creating copies as SetDiskID below will modify the objects and that can
2559
      # lead to incorrect data returned from nodes
2560
      devonly = [dev.Copy() for (_, dev) in disks]
2561

    
2562
      for dev in devonly:
2563
        self.cfg.SetDiskID(dev, nname)
2564

    
2565
      node_disks_devonly[nname] = devonly
2566

    
2567
    assert len(node_disks) == len(node_disks_devonly)
2568

    
2569
    # Collect data from all nodes with disks
2570
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2571
                                                          node_disks_devonly)
2572

    
2573
    assert len(result) == len(node_disks)
2574

    
2575
    instdisk = {}
2576

    
2577
    for (nname, nres) in result.items():
2578
      disks = node_disks[nname]
2579

    
2580
      if nres.offline:
2581
        # No data from this node
2582
        data = len(disks) * [(False, "node offline")]
2583
      else:
2584
        msg = nres.fail_msg
2585
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2586
                 "while getting disk information: %s", msg)
2587
        if msg:
2588
          # No data from this node
2589
          data = len(disks) * [(False, msg)]
2590
        else:
2591
          data = []
2592
          for idx, i in enumerate(nres.payload):
2593
            if isinstance(i, (tuple, list)) and len(i) == 2:
2594
              data.append(i)
2595
            else:
2596
              logging.warning("Invalid result from node %s, entry %d: %s",
2597
                              nname, idx, i)
2598
              data.append((False, "Invalid result from the remote node"))
2599

    
2600
      for ((inst, _), status) in zip(disks, data):
2601
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2602

    
2603
    # Add empty entries for diskless instances.
2604
    for inst in diskless_instances:
2605
      assert inst not in instdisk
2606
      instdisk[inst] = {}
2607

    
2608
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2609
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2610
                      compat.all(isinstance(s, (tuple, list)) and
2611
                                 len(s) == 2 for s in statuses)
2612
                      for inst, nnames in instdisk.items()
2613
                      for nname, statuses in nnames.items())
2614
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2615

    
2616
    return instdisk
2617

    
2618
  @staticmethod
2619
  def _SshNodeSelector(group_uuid, all_nodes):
2620
    """Create endless iterators for all potential SSH check hosts.
2621

2622
    """
2623
    nodes = [node for node in all_nodes
2624
             if (node.group != group_uuid and
2625
                 not node.offline)]
2626
    keyfunc = operator.attrgetter("group")
2627

    
2628
    return map(itertools.cycle,
2629
               [sorted(map(operator.attrgetter("name"), names))
2630
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2631
                                                  keyfunc)])
2632

    
2633
  @classmethod
2634
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2635
    """Choose which nodes should talk to which other nodes.
2636

2637
    We will make nodes contact all nodes in their group, and one node from
2638
    every other group.
2639

2640
    @warning: This algorithm has a known issue if one node group is much
2641
      smaller than others (e.g. just one node). In such a case all other
2642
      nodes will talk to the single node.
2643

2644
    """
2645
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2646
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2647

    
2648
    return (online_nodes,
2649
            dict((name, sorted([i.next() for i in sel]))
2650
                 for name in online_nodes))
2651

    
2652
  def BuildHooksEnv(self):
2653
    """Build hooks env.
2654

2655
    Cluster-Verify hooks just ran in the post phase and their failure makes
2656
    the output be logged in the verify output and the verification to fail.
2657

2658
    """
2659
    env = {
2660
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2661
      }
2662

    
2663
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2664
               for node in self.my_node_info.values())
2665

    
2666
    return env
2667

    
2668
  def BuildHooksNodes(self):
2669
    """Build hooks nodes.
2670

2671
    """
2672
    return ([], self.my_node_names)
2673

    
2674
  def Exec(self, feedback_fn):
2675
    """Verify integrity of the node group, performing various test on nodes.
2676

2677
    """
2678
    # This method has too many local variables. pylint: disable=R0914
2679
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2680

    
2681
    if not self.my_node_names:
2682
      # empty node group
2683
      feedback_fn("* Empty node group, skipping verification")
2684
      return True
2685

    
2686
    self.bad = False
2687
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2688
    verbose = self.op.verbose
2689
    self._feedback_fn = feedback_fn
2690

    
2691
    vg_name = self.cfg.GetVGName()
2692
    drbd_helper = self.cfg.GetDRBDHelper()
2693
    cluster = self.cfg.GetClusterInfo()
2694
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2695
    hypervisors = cluster.enabled_hypervisors
2696
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2697

    
2698
    i_non_redundant = [] # Non redundant instances
2699
    i_non_a_balanced = [] # Non auto-balanced instances
2700
    i_offline = 0 # Count of offline instances
2701
    n_offline = 0 # Count of offline nodes
2702
    n_drained = 0 # Count of nodes being drained
2703
    node_vol_should = {}
2704

    
2705
    # FIXME: verify OS list
2706

    
2707
    # File verification
2708
    filemap = _ComputeAncillaryFiles(cluster, False)
2709

    
2710
    # do local checksums
2711
    master_node = self.master_node = self.cfg.GetMasterNode()
2712
    master_ip = self.cfg.GetMasterIP()
2713

    
2714
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2715

    
2716
    user_scripts = []
2717
    if self.cfg.GetUseExternalMipScript():
2718
      user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2719

    
2720
    node_verify_param = {
2721
      constants.NV_FILELIST:
2722
        utils.UniqueSequence(filename
2723
                             for files in filemap
2724
                             for filename in files),
2725
      constants.NV_NODELIST:
2726
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2727
                                  self.all_node_info.values()),
2728
      constants.NV_HYPERVISOR: hypervisors,
2729
      constants.NV_HVPARAMS:
2730
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2731
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2732
                                 for node in node_data_list
2733
                                 if not node.offline],
2734
      constants.NV_INSTANCELIST: hypervisors,
2735
      constants.NV_VERSION: None,
2736
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2737
      constants.NV_NODESETUP: None,
2738
      constants.NV_TIME: None,
2739
      constants.NV_MASTERIP: (master_node, master_ip),
2740
      constants.NV_OSLIST: None,
2741
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2742
      constants.NV_USERSCRIPTS: user_scripts,
2743
      }
2744

    
2745
    if vg_name is not None:
2746
      node_verify_param[constants.NV_VGLIST] = None
2747
      node_verify_param[constants.NV_LVLIST] = vg_name
2748
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2749
      node_verify_param[constants.NV_DRBDLIST] = None
2750

    
2751
    if drbd_helper:
2752
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2753

    
2754
    # bridge checks
2755
    # FIXME: this needs to be changed per node-group, not cluster-wide
2756
    bridges = set()
2757
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2758
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2759
      bridges.add(default_nicpp[constants.NIC_LINK])
2760
    for instance in self.my_inst_info.values():
2761
      for nic in instance.nics:
2762
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2763
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2764
          bridges.add(full_nic[constants.NIC_LINK])
2765

    
2766
    if bridges:
2767
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2768

    
2769
    # Build our expected cluster state
2770
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2771
                                                 name=node.name,
2772
                                                 vm_capable=node.vm_capable))
2773
                      for node in node_data_list)
2774

    
2775
    # Gather OOB paths
2776
    oob_paths = []
2777
    for node in self.all_node_info.values():
2778
      path = _SupportsOob(self.cfg, node)
2779
      if path and path not in oob_paths:
2780
        oob_paths.append(path)
2781

    
2782
    if oob_paths:
2783
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2784

    
2785
    for instance in self.my_inst_names:
2786
      inst_config = self.my_inst_info[instance]
2787

    
2788
      for nname in inst_config.all_nodes:
2789
        if nname not in node_image:
2790
          gnode = self.NodeImage(name=nname)
2791
          gnode.ghost = (nname not in self.all_node_info)
2792
          node_image[nname] = gnode
2793

    
2794
      inst_config.MapLVsByNode(node_vol_should)
2795

    
2796
      pnode = inst_config.primary_node
2797
      node_image[pnode].pinst.append(instance)
2798

    
2799
      for snode in inst_config.secondary_nodes:
2800
        nimg = node_image[snode]
2801
        nimg.sinst.append(instance)
2802
        if pnode not in nimg.sbp:
2803
          nimg.sbp[pnode] = []
2804
        nimg.sbp[pnode].append(instance)
2805

    
2806
    # At this point, we have the in-memory data structures complete,
2807
    # except for the runtime information, which we'll gather next
2808

    
2809
    # Due to the way our RPC system works, exact response times cannot be
2810
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2811
    # time before and after executing the request, we can at least have a time
2812
    # window.
2813
    nvinfo_starttime = time.time()
2814
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2815
                                           node_verify_param,
2816
                                           self.cfg.GetClusterName())
2817
    nvinfo_endtime = time.time()
2818

    
2819
    if self.extra_lv_nodes and vg_name is not None:
2820
      extra_lv_nvinfo = \
2821
          self.rpc.call_node_verify(self.extra_lv_nodes,
2822
                                    {constants.NV_LVLIST: vg_name},
2823
                                    self.cfg.GetClusterName())
2824
    else:
2825
      extra_lv_nvinfo = {}
2826

    
2827
    all_drbd_map = self.cfg.ComputeDRBDMap()
2828

    
2829
    feedback_fn("* Gathering disk information (%s nodes)" %
2830
                len(self.my_node_names))
2831
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2832
                                     self.my_inst_info)
2833

    
2834
    feedback_fn("* Verifying configuration file consistency")
2835

    
2836
    # If not all nodes are being checked, we need to make sure the master node
2837
    # and a non-checked vm_capable node are in the list.
2838
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2839
    if absent_nodes:
2840
      vf_nvinfo = all_nvinfo.copy()
2841
      vf_node_info = list(self.my_node_info.values())
2842
      additional_nodes = []
2843
      if master_node not in self.my_node_info:
2844
        additional_nodes.append(master_node)
2845
        vf_node_info.append(self.all_node_info[master_node])
2846
      # Add the first vm_capable node we find which is not included
2847
      for node in absent_nodes:
2848
        nodeinfo = self.all_node_info[node]
2849
        if nodeinfo.vm_capable and not nodeinfo.offline:
2850
          additional_nodes.append(node)
2851
          vf_node_info.append(self.all_node_info[node])
2852
          break
2853
      key = constants.NV_FILELIST
2854
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2855
                                                 {key: node_verify_param[key]},
2856
                                                 self.cfg.GetClusterName()))
2857
    else:
2858
      vf_nvinfo = all_nvinfo
2859
      vf_node_info = self.my_node_info.values()
2860

    
2861
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2862

    
2863
    feedback_fn("* Verifying node status")
2864

    
2865
    refos_img = None
2866

    
2867
    for node_i in node_data_list:
2868
      node = node_i.name
2869
      nimg = node_image[node]
2870

    
2871
      if node_i.offline:
2872
        if verbose:
2873
          feedback_fn("* Skipping offline node %s" % (node,))
2874
        n_offline += 1
2875
        continue
2876

    
2877
      if node == master_node:
2878
        ntype = "master"
2879
      elif node_i.master_candidate:
2880
        ntype = "master candidate"
2881
      elif node_i.drained:
2882
        ntype = "drained"
2883
        n_drained += 1
2884
      else:
2885
        ntype = "regular"
2886
      if verbose:
2887
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2888

    
2889
      msg = all_nvinfo[node].fail_msg
2890
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2891
               msg)
2892
      if msg:
2893
        nimg.rpc_fail = True
2894
        continue
2895

    
2896
      nresult = all_nvinfo[node].payload
2897

    
2898
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2899
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2900
      self._VerifyNodeNetwork(node_i, nresult)
2901
      self._VerifyNodeUserScripts(node_i, nresult)
2902
      self._VerifyOob(node_i, nresult)
2903

    
2904
      if nimg.vm_capable:
2905
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2906
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2907
                             all_drbd_map)
2908

    
2909
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2910
        self._UpdateNodeInstances(node_i, nresult, nimg)
2911
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2912
        self._UpdateNodeOS(node_i, nresult, nimg)
2913

    
2914
        if not nimg.os_fail:
2915
          if refos_img is None:
2916
            refos_img = nimg
2917
          self._VerifyNodeOS(node_i, nimg, refos_img)
2918
        self._VerifyNodeBridges(node_i, nresult, bridges)
2919

    
2920
        # Check whether all running instancies are primary for the node. (This
2921
        # can no longer be done from _VerifyInstance below, since some of the
2922
        # wrong instances could be from other node groups.)
2923
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2924

    
2925
        for inst in non_primary_inst:
2926
          # FIXME: investigate best way to handle offline insts
2927
          if inst.admin_state == constants.ADMINST_OFFLINE:
2928
            if verbose:
2929
              feedback_fn("* Skipping offline instance %s" % inst.name)
2930
            i_offline += 1
2931
            continue
2932
          test = inst in self.all_inst_info
2933
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2934
                   "instance should not run on node %s", node_i.name)
2935
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2936
                   "node is running unknown instance %s", inst)
2937

    
2938
    for node, result in extra_lv_nvinfo.items():
2939
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2940
                              node_image[node], vg_name)
2941

    
2942
    feedback_fn("* Verifying instance status")
2943
    for instance in self.my_inst_names:
2944
      if verbose:
2945
        feedback_fn("* Verifying instance %s" % instance)
2946
      inst_config = self.my_inst_info[instance]
2947
      self._VerifyInstance(instance, inst_config, node_image,
2948
                           instdisk[instance])
2949
      inst_nodes_offline = []
2950

    
2951
      pnode = inst_config.primary_node
2952
      pnode_img = node_image[pnode]
2953
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2954
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2955
               " primary node failed", instance)
2956

    
2957
      _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2958
               pnode_img.offline,
2959
               constants.CV_EINSTANCEBADNODE, instance,
2960
               "instance is marked as running and lives on offline node %s",
2961
               inst_config.primary_node)
2962

    
2963
      # If the instance is non-redundant we cannot survive losing its primary
2964
      # node, so we are not N+1 compliant. On the other hand we have no disk
2965
      # templates with more than one secondary so that situation is not well
2966
      # supported either.
2967
      # FIXME: does not support file-backed instances
2968
      if not inst_config.secondary_nodes:
2969
        i_non_redundant.append(instance)
2970

    
2971
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2972
               constants.CV_EINSTANCELAYOUT,
2973
               instance, "instance has multiple secondary nodes: %s",
2974
               utils.CommaJoin(inst_config.secondary_nodes),
2975
               code=self.ETYPE_WARNING)
2976

    
2977
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2978
        pnode = inst_config.primary_node
2979
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2980
        instance_groups = {}
2981

    
2982
        for node in instance_nodes:
2983
          instance_groups.setdefault(self.all_node_info[node].group,
2984
                                     []).append(node)
2985

    
2986
        pretty_list = [
2987
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2988
          # Sort so that we always list the primary node first.
2989
          for group, nodes in sorted(instance_groups.items(),
2990
                                     key=lambda (_, nodes): pnode in nodes,
2991
                                     reverse=True)]
2992

    
2993
        self._ErrorIf(len(instance_groups) > 1,
2994
                      constants.CV_EINSTANCESPLITGROUPS,
2995
                      instance, "instance has primary and secondary nodes in"
2996
                      " different groups: %s", utils.CommaJoin(pretty_list),
2997
                      code=self.ETYPE_WARNING)
2998

    
2999
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3000
        i_non_a_balanced.append(instance)
3001

    
3002
      for snode in inst_config.secondary_nodes:
3003
        s_img = node_image[snode]
3004
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3005
                 snode, "instance %s, connection to secondary node failed",
3006
                 instance)
3007

    
3008
        if s_img.offline:
3009
          inst_nodes_offline.append(snode)
3010

    
3011
      # warn that the instance lives on offline nodes
3012
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3013
               "instance has offline secondary node(s) %s",
3014
               utils.CommaJoin(inst_nodes_offline))
3015
      # ... or ghost/non-vm_capable nodes
3016
      for node in inst_config.all_nodes:
3017
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3018
                 instance, "instance lives on ghost node %s", node)
3019
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3020
                 instance, "instance lives on non-vm_capable node %s", node)
3021

    
3022
    feedback_fn("* Verifying orphan volumes")
3023
    reserved = utils.FieldSet(*cluster.reserved_lvs)
3024

    
3025
    # We will get spurious "unknown volume" warnings if any node of this group
3026
    # is secondary for an instance whose primary is in another group. To avoid
3027
    # them, we find these instances and add their volumes to node_vol_should.
3028
    for inst in self.all_inst_info.values():
3029
      for secondary in inst.secondary_nodes:
3030
        if (secondary in self.my_node_info
3031
            and inst.name not in self.my_inst_info):
3032
          inst.MapLVsByNode(node_vol_should)
3033
          break
3034

    
3035
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3036

    
3037
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3038
      feedback_fn("* Verifying N+1 Memory redundancy")
3039
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3040

    
3041
    feedback_fn("* Other Notes")
3042
    if i_non_redundant:
3043
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3044
                  % len(i_non_redundant))
3045

    
3046
    if i_non_a_balanced:
3047
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3048
                  % len(i_non_a_balanced))
3049

    
3050
    if i_offline:
3051
      feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3052

    
3053
    if n_offline:
3054
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3055

    
3056
    if n_drained:
3057
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3058

    
3059
    return not self.bad
3060

    
3061
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3062
    """Analyze the post-hooks' result
3063

3064
    This method analyses the hook result, handles it, and sends some
3065
    nicely-formatted feedback back to the user.
3066

3067
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
3068
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3069
    @param hooks_results: the results of the multi-node hooks rpc call
3070
    @param feedback_fn: function used send feedback back to the caller
3071
    @param lu_result: previous Exec result
3072
    @return: the new Exec result, based on the previous result
3073
        and hook results
3074

3075
    """
3076
    # We only really run POST phase hooks, only for non-empty groups,
3077
    # and are only interested in their results
3078
    if not self.my_node_names:
3079
      # empty node group
3080
      pass
3081
    elif phase == constants.HOOKS_PHASE_POST:
3082
      # Used to change hooks' output to proper indentation
3083
      feedback_fn("* Hooks Results")
3084
      assert hooks_results, "invalid result from hooks"
3085

    
3086
      for node_name in hooks_results:
3087
        res = hooks_results[node_name]
3088
        msg = res.fail_msg
3089
        test = msg and not res.offline
3090
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3091
                      "Communication failure in hooks execution: %s", msg)
3092
        if res.offline or msg:
3093
          # No need to investigate payload if node is offline or gave
3094
          # an error.
3095
          continue
3096
        for script, hkr, output in res.payload:
3097
          test = hkr == constants.HKR_FAIL
3098
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3099
                        "Script %s failed, output:", script)
3100
          if test:
3101
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3102
            feedback_fn("%s" % output)
3103
            lu_result = False
3104

    
3105
    return lu_result
3106

    
3107

    
3108
class LUClusterVerifyDisks(NoHooksLU):
3109
  """Verifies the cluster disks status.
3110

3111
  """
3112
  REQ_BGL = False
3113

    
3114
  def ExpandNames(self):
3115
    self.share_locks = _ShareAll()
3116
    self.needed_locks = {
3117
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3118
      }
3119

    
3120
  def Exec(self, feedback_fn):
3121
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3122

    
3123
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3124
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3125
                           for group in group_names])
3126

    
3127

    
3128
class LUGroupVerifyDisks(NoHooksLU):
3129
  """Verifies the status of all disks in a node group.
3130

3131
  """
3132
  REQ_BGL = False
3133

    
3134
  def ExpandNames(self):
3135
    # Raises errors.OpPrereqError on its own if group can't be found
3136
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3137

    
3138
    self.share_locks = _ShareAll()
3139
    self.needed_locks = {
3140
      locking.LEVEL_INSTANCE: [],
3141
      locking.LEVEL_NODEGROUP: [],
3142
      locking.LEVEL_NODE: [],
3143
      }
3144

    
3145
  def DeclareLocks(self, level):
3146
    if level == locking.LEVEL_INSTANCE:
3147
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3148

    
3149
      # Lock instances optimistically, needs verification once node and group
3150
      # locks have been acquired
3151
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3152
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3153

    
3154
    elif level == locking.LEVEL_NODEGROUP:
3155
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3156

    
3157
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3158
        set([self.group_uuid] +
3159
            # Lock all groups used by instances optimistically; this requires
3160
            # going via the node before it's locked, requiring verification
3161
            # later on
3162
            [group_uuid
3163
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3164
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3165

    
3166
    elif level == locking.LEVEL_NODE:
3167
      # This will only lock the nodes in the group to be verified which contain
3168
      # actual instances
3169
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3170
      self._LockInstancesNodes()
3171

    
3172
      # Lock all nodes in group to be verified
3173
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3174
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3175
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3176

    
3177
  def CheckPrereq(self):
3178
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3179
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3180
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3181

    
3182
    assert self.group_uuid in owned_groups
3183

    
3184
    # Check if locked instances are still correct
3185
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3186

    
3187
    # Get instance information
3188
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3189

    
3190
    # Check if node groups for locked instances are still correct
3191
    for (instance_name, inst) in self.instances.items():
3192
      assert owned_nodes.issuperset(inst.all_nodes), \
3193
        "Instance %s's nodes changed while we kept the lock" % instance_name
3194

    
3195
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3196
                                             owned_groups)
3197

    
3198
      assert self.group_uuid in inst_groups, \
3199
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3200

    
3201
  def Exec(self, feedback_fn):
3202
    """Verify integrity of cluster disks.
3203

3204
    @rtype: tuple of three items
3205
    @return: a tuple of (dict of node-to-node_error, list of instances
3206
        which need activate-disks, dict of instance: (node, volume) for
3207
        missing volumes
3208

3209
    """
3210
    res_nodes = {}
3211
    res_instances = set()
3212
    res_missing = {}
3213

    
3214
    nv_dict = _MapInstanceDisksToNodes([inst
3215
            for inst in self.instances.values()
3216
            if inst.admin_state == constants.ADMINST_UP])
3217

    
3218
    if nv_dict:
3219
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3220
                             set(self.cfg.GetVmCapableNodeList()))
3221

    
3222
      node_lvs = self.rpc.call_lv_list(nodes, [])
3223

    
3224
      for (node, node_res) in node_lvs.items():
3225
        if node_res.offline:
3226
          continue
3227

    
3228
        msg = node_res.fail_msg
3229
        if msg:
3230
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3231
          res_nodes[node] = msg
3232
          continue
3233

    
3234
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3235
          inst = nv_dict.pop((node, lv_name), None)
3236
          if not (lv_online or inst is None):
3237
            res_instances.add(inst)
3238

    
3239
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3240
      # better
3241
      for key, inst in nv_dict.iteritems():
3242
        res_missing.setdefault(inst, []).append(list(key))
3243

    
3244
    return (res_nodes, list(res_instances), res_missing)
3245

    
3246

    
3247
class LUClusterRepairDiskSizes(NoHooksLU):
3248
  """Verifies the cluster disks sizes.
3249

3250
  """
3251
  REQ_BGL = False
3252

    
3253
  def ExpandNames(self):
3254
    if self.op.instances:
3255
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3256
      self.needed_locks = {
3257
        locking.LEVEL_NODE_RES: [],
3258
        locking.LEVEL_INSTANCE: self.wanted_names,
3259
        }
3260
      self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3261
    else:
3262
      self.wanted_names = None
3263
      self.needed_locks = {
3264
        locking.LEVEL_NODE_RES: locking.ALL_SET,
3265
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3266
        }
3267
    self.share_locks = {
3268
      locking.LEVEL_NODE_RES: 1,
3269
      locking.LEVEL_INSTANCE: 0,
3270
      }
3271

    
3272
  def DeclareLocks(self, level):
3273
    if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3274
      self._LockInstancesNodes(primary_only=True, level=level)
3275

    
3276
  def CheckPrereq(self):
3277
    """Check prerequisites.
3278

3279
    This only checks the optional instance list against the existing names.
3280

3281
    """
3282
    if self.wanted_names is None:
3283
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3284

    
3285
    self.wanted_instances = \
3286
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3287

    
3288
  def _EnsureChildSizes(self, disk):
3289
    """Ensure children of the disk have the needed disk size.
3290

3291
    This is valid mainly for DRBD8 and fixes an issue where the
3292
    children have smaller disk size.
3293

3294
    @param disk: an L{ganeti.objects.Disk} object
3295

3296
    """
3297
    if disk.dev_type == constants.LD_DRBD8:
3298
      assert disk.children, "Empty children for DRBD8?"
3299
      fchild = disk.children[0]
3300
      mismatch = fchild.size < disk.size
3301
      if mismatch:
3302
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3303
                     fchild.size, disk.size)
3304
        fchild.size = disk.size
3305

    
3306
      # and we recurse on this child only, not on the metadev
3307
      return self._EnsureChildSizes(fchild) or mismatch
3308
    else:
3309
      return False
3310

    
3311
  def Exec(self, feedback_fn):
3312
    """Verify the size of cluster disks.
3313

3314
    """
3315
    # TODO: check child disks too
3316
    # TODO: check differences in size between primary/secondary nodes
3317
    per_node_disks = {}
3318
    for instance in self.wanted_instances:
3319
      pnode = instance.primary_node
3320
      if pnode not in per_node_disks:
3321
        per_node_disks[pnode] = []
3322
      for idx, disk in enumerate(instance.disks):
3323
        per_node_disks[pnode].append((instance, idx, disk))
3324

    
3325
    assert not (frozenset(per_node_disks.keys()) -
3326
                self.owned_locks(locking.LEVEL_NODE_RES)), \
3327
      "Not owning correct locks"
3328
    assert not self.owned_locks(locking.LEVEL_NODE)
3329

    
3330
    changed = []
3331
    for node, dskl in per_node_disks.items():
3332
      newl = [v[2].Copy() for v in dskl]
3333
      for dsk in newl:
3334
        self.cfg.SetDiskID(dsk, node)
3335
      result = self.rpc.call_blockdev_getsize(node, newl)
3336
      if result.fail_msg:
3337
        self.LogWarning("Failure in blockdev_getsize call to node"
3338
                        " %s, ignoring", node)
3339
        continue
3340
      if len(result.payload) != len(dskl):
3341
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3342
                        " result.payload=%s", node, len(dskl), result.payload)
3343
        self.LogWarning("Invalid result from node %s, ignoring node results",
3344
                        node)
3345
        continue
3346
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3347
        if size is None:
3348
          self.LogWarning("Disk %d of instance %s did not return size"
3349
                          " information, ignoring", idx, instance.name)
3350
          continue
3351
        if not isinstance(size, (int, long)):
3352
          self.LogWarning("Disk %d of instance %s did not return valid"
3353
                          " size information, ignoring", idx, instance.name)
3354
          continue
3355
        size = size >> 20
3356
        if size != disk.size:
3357
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3358
                       " correcting: recorded %d, actual %d", idx,
3359
                       instance.name, disk.size, size)
3360
          disk.size = size
3361
          self.cfg.Update(instance, feedback_fn)
3362
          changed.append((instance.name, idx, size))
3363
        if self._EnsureChildSizes(disk):
3364
          self.cfg.Update(instance, feedback_fn)
3365
          changed.append((instance.name, idx, disk.size))
3366
    return changed
3367

    
3368

    
3369
class LUClusterRename(LogicalUnit):
3370
  """Rename the cluster.
3371

3372
  """
3373
  HPATH = "cluster-rename"
3374
  HTYPE = constants.HTYPE_CLUSTER
3375

    
3376
  def BuildHooksEnv(self):
3377
    """Build hooks env.
3378

3379
    """
3380
    return {
3381
      "OP_TARGET": self.cfg.GetClusterName(),
3382
      "NEW_NAME": self.op.name,
3383
      }
3384

    
3385
  def BuildHooksNodes(self):
3386
    """Build hooks nodes.
3387

3388
    """
3389
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3390

    
3391
  def CheckPrereq(self):
3392
    """Verify that the passed name is a valid one.
3393

3394
    """
3395
    hostname = netutils.GetHostname(name=self.op.name,
3396
                                    family=self.cfg.GetPrimaryIPFamily())
3397

    
3398
    new_name = hostname.name
3399
    self.ip = new_ip = hostname.ip
3400
    old_name = self.cfg.GetClusterName()
3401
    old_ip = self.cfg.GetMasterIP()
3402
    if new_name == old_name and new_ip == old_ip:
3403
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3404
                                 " cluster has changed",
3405
                                 errors.ECODE_INVAL)
3406
    if new_ip != old_ip:
3407
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3408
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3409
                                   " reachable on the network" %
3410
                                   new_ip, errors.ECODE_NOTUNIQUE)
3411

    
3412
    self.op.name = new_name
3413

    
3414
  def Exec(self, feedback_fn):
3415
    """Rename the cluster.
3416

3417
    """
3418
    clustername = self.op.name
3419
    new_ip = self.ip
3420

    
3421
    # shutdown the master IP
3422
    master_params = self.cfg.GetMasterNetworkParameters()
3423
    ems = self.cfg.GetUseExternalMipScript()
3424
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3425
                                                     master_params, ems)
3426
    result.Raise("Could not disable the master role")
3427

    
3428
    try:
3429
      cluster = self.cfg.GetClusterInfo()
3430
      cluster.cluster_name = clustername
3431
      cluster.master_ip = new_ip
3432
      self.cfg.Update(cluster, feedback_fn)
3433

    
3434
      # update the known hosts file
3435
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3436
      node_list = self.cfg.GetOnlineNodeList()
3437
      try:
3438
        node_list.remove(master_params.name)
3439
      except ValueError:
3440
        pass
3441
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3442
    finally:
3443
      master_params.ip = new_ip
3444
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3445
                                                     master_params, ems)
3446
      msg = result.fail_msg
3447
      if msg:
3448
        self.LogWarning("Could not re-enable the master role on"
3449
                        " the master, please restart manually: %s", msg)
3450

    
3451
    return clustername
3452

    
3453

    
3454
def _ValidateNetmask(cfg, netmask):
3455
  """Checks if a netmask is valid.
3456

3457
  @type cfg: L{config.ConfigWriter}
3458
  @param cfg: The cluster configuration
3459
  @type netmask: int
3460
  @param netmask: the netmask to be verified
3461
  @raise errors.OpPrereqError: if the validation fails
3462

3463
  """
3464
  ip_family = cfg.GetPrimaryIPFamily()
3465
  try:
3466
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3467
  except errors.ProgrammerError:
3468
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3469
                               ip_family)
3470
  if not ipcls.ValidateNetmask(netmask):
3471
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3472
                                (netmask))
3473

    
3474

    
3475
class LUClusterSetParams(LogicalUnit):
3476
  """Change the parameters of the cluster.
3477

3478
  """
3479
  HPATH = "cluster-modify"
3480
  HTYPE = constants.HTYPE_CLUSTER
3481
  REQ_BGL = False
3482

    
3483
  def CheckArguments(self):
3484
    """Check parameters
3485

3486
    """
3487
    if self.op.uid_pool:
3488
      uidpool.CheckUidPool(self.op.uid_pool)
3489

    
3490
    if self.op.add_uids:
3491
      uidpool.CheckUidPool(self.op.add_uids)
3492

    
3493
    if self.op.remove_uids:
3494
      uidpool.CheckUidPool(self.op.remove_uids)
3495

    
3496
    if self.op.master_netmask is not None:
3497
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3498

    
3499
  def ExpandNames(self):
3500
    # FIXME: in the future maybe other cluster params won't require checking on
3501
    # all nodes to be modified.
3502
    self.needed_locks = {
3503
      locking.LEVEL_NODE: locking.ALL_SET,
3504
    }
3505
    self.share_locks[locking.LEVEL_NODE] = 1
3506

    
3507
  def BuildHooksEnv(self):
3508
    """Build hooks env.
3509

3510
    """
3511
    return {
3512
      "OP_TARGET": self.cfg.GetClusterName(),
3513
      "NEW_VG_NAME": self.op.vg_name,
3514
      }
3515

    
3516
  def BuildHooksNodes(self):
3517
    """Build hooks nodes.
3518

3519
    """
3520
    mn = self.cfg.GetMasterNode()
3521
    return ([mn], [mn])
3522

    
3523
  def CheckPrereq(self):
3524
    """Check prerequisites.
3525

3526
    This checks whether the given params don't conflict and
3527
    if the given volume group is valid.
3528

3529
    """
3530
    if self.op.vg_name is not None and not self.op.vg_name:
3531
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3532
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3533
                                   " instances exist", errors.ECODE_INVAL)
3534

    
3535
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3536
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3537
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3538
                                   " drbd-based instances exist",
3539
                                   errors.ECODE_INVAL)
3540

    
3541
    node_list = self.owned_locks(locking.LEVEL_NODE)
3542

    
3543
    # if vg_name not None, checks given volume group on all nodes
3544
    if self.op.vg_name:
3545
      vglist = self.rpc.call_vg_list(node_list)
3546
      for node in node_list:
3547
        msg = vglist[node].fail_msg
3548
        if msg:
3549
          # ignoring down node
3550
          self.LogWarning("Error while gathering data on node %s"
3551
                          " (ignoring node): %s", node, msg)
3552
          continue
3553
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3554
                                              self.op.vg_name,
3555
                                              constants.MIN_VG_SIZE)
3556
        if vgstatus:
3557
          raise errors.OpPrereqError("Error on node '%s': %s" %
3558
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3559

    
3560
    if self.op.drbd_helper:
3561
      # checks given drbd helper on all nodes
3562
      helpers = self.rpc.call_drbd_helper(node_list)
3563
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3564
        if ninfo.offline:
3565
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3566
          continue
3567
        msg = helpers[node].fail_msg
3568
        if msg:
3569
          raise errors.OpPrereqError("Error checking drbd helper on node"
3570
                                     " '%s': %s" % (node, msg),
3571
                                     errors.ECODE_ENVIRON)
3572
        node_helper = helpers[node].payload
3573
        if node_helper != self.op.drbd_helper:
3574
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3575
                                     (node, node_helper), errors.ECODE_ENVIRON)
3576

    
3577
    self.cluster = cluster = self.cfg.GetClusterInfo()
3578
    # validate params changes
3579
    if self.op.beparams:
3580
      objects.UpgradeBeParams(self.op.beparams)
3581
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3582
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3583

    
3584
    if self.op.ndparams:
3585
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3586
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3587

    
3588
      # TODO: we need a more general way to handle resetting
3589
      # cluster-level parameters to default values
3590
      if self.new_ndparams["oob_program"] == "":
3591
        self.new_ndparams["oob_program"] = \
3592
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3593

    
3594
    if self.op.nicparams:
3595
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3596
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3597
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3598
      nic_errors = []
3599

    
3600
      # check all instances for consistency
3601
      for instance in self.cfg.GetAllInstancesInfo().values():
3602
        for nic_idx, nic in enumerate(instance.nics):
3603
          params_copy = copy.deepcopy(nic.nicparams)
3604
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3605

    
3606
          # check parameter syntax
3607
          try:
3608
            objects.NIC.CheckParameterSyntax(params_filled)
3609
          except errors.ConfigurationError, err:
3610
            nic_errors.append("Instance %s, nic/%d: %s" %
3611
                              (instance.name, nic_idx, err))
3612

    
3613
          # if we're moving instances to routed, check that they have an ip
3614
          target_mode = params_filled[constants.NIC_MODE]
3615
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3616
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3617
                              " address" % (instance.name, nic_idx))
3618
      if nic_errors:
3619
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3620
                                   "\n".join(nic_errors))
3621

    
3622
    # hypervisor list/parameters
3623
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3624
    if self.op.hvparams:
3625
      for hv_name, hv_dict in self.op.hvparams.items():
3626
        if hv_name not in self.new_hvparams:
3627
          self.new_hvparams[hv_name] = hv_dict
3628
        else:
3629
          self.new_hvparams[hv_name].update(hv_dict)
3630

    
3631
    # os hypervisor parameters
3632
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3633
    if self.op.os_hvp:
3634
      for os_name, hvs in self.op.os_hvp.items():
3635
        if os_name not in self.new_os_hvp:
3636
          self.new_os_hvp[os_name] = hvs
3637
        else:
3638
          for hv_name, hv_dict in hvs.items():
3639
            if hv_name not in self.new_os_hvp[os_name]:
3640
              self.new_os_hvp[os_name][hv_name] = hv_dict
3641
            else:
3642
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3643

    
3644
    # os parameters
3645
    self.new_osp = objects.FillDict(cluster.osparams, {})
3646
    if self.op.osparams:
3647
      for os_name, osp in self.op.osparams.items():
3648
        if os_name not in self.new_osp:
3649
          self.new_osp[os_name] = {}
3650

    
3651
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3652
                                                  use_none=True)
3653

    
3654
        if not self.new_osp[os_name]:
3655
          # we removed all parameters
3656
          del self.new_osp[os_name]
3657
        else:
3658
          # check the parameter validity (remote check)
3659
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3660
                         os_name, self.new_osp[os_name])
3661

    
3662
    # changes to the hypervisor list
3663
    if self.op.enabled_hypervisors is not None:
3664
      self.hv_list = self.op.enabled_hypervisors
3665
      for hv in self.hv_list:
3666
        # if the hypervisor doesn't already exist in the cluster
3667
        # hvparams, we initialize it to empty, and then (in both
3668
        # cases) we make sure to fill the defaults, as we might not
3669
        # have a complete defaults list if the hypervisor wasn't
3670
        # enabled before
3671
        if hv not in new_hvp:
3672
          new_hvp[hv] = {}
3673
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3674
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3675
    else:
3676
      self.hv_list = cluster.enabled_hypervisors
3677

    
3678
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3679
      # either the enabled list has changed, or the parameters have, validate
3680
      for hv_name, hv_params in self.new_hvparams.items():
3681
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3682
            (self.op.enabled_hypervisors and
3683
             hv_name in self.op.enabled_hypervisors)):
3684
          # either this is a new hypervisor, or its parameters have changed
3685
          hv_class = hypervisor.GetHypervisor(hv_name)
3686
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3687
          hv_class.CheckParameterSyntax(hv_params)
3688
          _CheckHVParams(self, node_list, hv_name, hv_params)
3689

    
3690
    if self.op.os_hvp:
3691
      # no need to check any newly-enabled hypervisors, since the
3692
      # defaults have already been checked in the above code-block
3693
      for os_name, os_hvp in self.new_os_hvp.items():
3694
        for hv_name, hv_params in os_hvp.items():
3695
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3696
          # we need to fill in the new os_hvp on top of the actual hv_p
3697
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3698
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3699
          hv_class = hypervisor.GetHypervisor(hv_name)
3700
          hv_class.CheckParameterSyntax(new_osp)
3701
          _CheckHVParams(self, node_list, hv_name, new_osp)
3702

    
3703
    if self.op.default_iallocator:
3704
      alloc_script = utils.FindFile(self.op.default_iallocator,
3705
                                    constants.IALLOCATOR_SEARCH_PATH,
3706
                                    os.path.isfile)
3707
      if alloc_script is None:
3708
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3709
                                   " specified" % self.op.default_iallocator,
3710
                                   errors.ECODE_INVAL)
3711

    
3712
  def Exec(self, feedback_fn):
3713
    """Change the parameters of the cluster.
3714

3715
    """
3716
    if self.op.vg_name is not None:
3717
      new_volume = self.op.vg_name
3718
      if not new_volume:
3719
        new_volume = None
3720
      if new_volume != self.cfg.GetVGName():
3721
        self.cfg.SetVGName(new_volume)
3722
      else:
3723
        feedback_fn("Cluster LVM configuration already in desired"
3724
                    " state, not changing")
3725
    if self.op.drbd_helper is not None:
3726
      new_helper = self.op.drbd_helper
3727
      if not new_helper:
3728
        new_helper = None
3729
      if new_helper != self.cfg.GetDRBDHelper():
3730
        self.cfg.SetDRBDHelper(new_helper)
3731
      else:
3732
        feedback_fn("Cluster DRBD helper already in desired state,"
3733
                    " not changing")
3734
    if self.op.hvparams:
3735
      self.cluster.hvparams = self.new_hvparams
3736
    if self.op.os_hvp:
3737
      self.cluster.os_hvp = self.new_os_hvp
3738
    if self.op.enabled_hypervisors is not None:
3739
      self.cluster.hvparams = self.new_hvparams
3740
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3741
    if self.op.beparams:
3742
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3743
    if self.op.nicparams:
3744
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3745
    if self.op.osparams:
3746
      self.cluster.osparams = self.new_osp
3747
    if self.op.ndparams:
3748
      self.cluster.ndparams = self.new_ndparams
3749

    
3750
    if self.op.candidate_pool_size is not None:
3751
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3752
      # we need to update the pool size here, otherwise the save will fail
3753
      _AdjustCandidatePool(self, [])
3754

    
3755
    if self.op.maintain_node_health is not None:
3756
      if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3757
        feedback_fn("Note: CONFD was disabled at build time, node health"
3758
                    " maintenance is not useful (still enabling it)")
3759
      self.cluster.maintain_node_health = self.op.maintain_node_health
3760

    
3761
    if self.op.prealloc_wipe_disks is not None:
3762
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3763

    
3764
    if self.op.add_uids is not None:
3765
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3766

    
3767
    if self.op.remove_uids is not None:
3768
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3769

    
3770
    if self.op.uid_pool is not None:
3771
      self.cluster.uid_pool = self.op.uid_pool
3772

    
3773
    if self.op.default_iallocator is not None:
3774
      self.cluster.default_iallocator = self.op.default_iallocator
3775

    
3776
    if self.op.reserved_lvs is not None:
3777
      self.cluster.reserved_lvs = self.op.reserved_lvs
3778

    
3779
    if self.op.use_external_mip_script is not None:
3780
      self.cluster.use_external_mip_script = self.op.use_external_mip_script
3781

    
3782
    def helper_os(aname, mods, desc):
3783
      desc += " OS list"
3784
      lst = getattr(self.cluster, aname)
3785
      for key, val in mods:
3786
        if key == constants.DDM_ADD:
3787
          if val in lst:
3788
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3789
          else:
3790
            lst.append(val)
3791
        elif key == constants.DDM_REMOVE:
3792
          if val in lst:
3793
            lst.remove(val)
3794
          else:
3795
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3796
        else:
3797
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3798

    
3799
    if self.op.hidden_os:
3800
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3801

    
3802
    if self.op.blacklisted_os:
3803
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3804

    
3805
    if self.op.master_netdev:
3806
      master_params = self.cfg.GetMasterNetworkParameters()
3807
      ems = self.cfg.GetUseExternalMipScript()
3808
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3809
                  self.cluster.master_netdev)
3810
      result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3811
                                                       master_params, ems)
3812
      result.Raise("Could not disable the master ip")
3813
      feedback_fn("Changing master_netdev from %s to %s" %
3814
                  (master_params.netdev, self.op.master_netdev))
3815
      self.cluster.master_netdev = self.op.master_netdev
3816

    
3817
    if self.op.master_netmask:
3818
      master_params = self.cfg.GetMasterNetworkParameters()
3819
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3820
      result = self.rpc.call_node_change_master_netmask(master_params.name,
3821
                                                        master_params.netmask,
3822
                                                        self.op.master_netmask,
3823
                                                        master_params.ip,
3824
                                                        master_params.netdev)
3825
      if result.fail_msg:
3826
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3827
        feedback_fn(msg)
3828

    
3829
      self.cluster.master_netmask = self.op.master_netmask
3830

    
3831
    self.cfg.Update(self.cluster, feedback_fn)
3832

    
3833
    if self.op.master_netdev:
3834
      master_params = self.cfg.GetMasterNetworkParameters()
3835
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3836
                  self.op.master_netdev)
3837
      ems = self.cfg.GetUseExternalMipScript()
3838
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3839
                                                     master_params, ems)
3840
      if result.fail_msg:
3841
        self.LogWarning("Could not re-enable the master ip on"
3842
                        " the master, please restart manually: %s",
3843
                        result.fail_msg)
3844

    
3845

    
3846
def _UploadHelper(lu, nodes, fname):
3847
  """Helper for uploading a file and showing warnings.
3848

3849
  """
3850
  if os.path.exists(fname):
3851
    result = lu.rpc.call_upload_file(nodes, fname)
3852
    for to_node, to_result in result.items():
3853
      msg = to_result.fail_msg
3854
      if msg:
3855
        msg = ("Copy of file %s to node %s failed: %s" %
3856
               (fname, to_node, msg))
3857
        lu.proc.LogWarning(msg)
3858

    
3859

    
3860
def _ComputeAncillaryFiles(cluster, redist):
3861
  """Compute files external to Ganeti which need to be consistent.
3862

3863
  @type redist: boolean
3864
  @param redist: Whether to include files which need to be redistributed
3865

3866
  """
3867
  # Compute files for all nodes
3868
  files_all = set([
3869
    constants.SSH_KNOWN_HOSTS_FILE,
3870
    constants.CONFD_HMAC_KEY,
3871
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3872
    constants.SPICE_CERT_FILE,
3873
    constants.SPICE_CACERT_FILE,
3874
    constants.RAPI_USERS_FILE,
3875
    ])
3876

    
3877
  if not redist:
3878
    files_all.update(constants.ALL_CERT_FILES)
3879
    files_all.update(ssconf.SimpleStore().GetFileList())
3880
  else:
3881
    # we need to ship at least the RAPI certificate
3882
    files_all.add(constants.RAPI_CERT_FILE)
3883

    
3884
  if cluster.modify_etc_hosts:
3885
    files_all.add(constants.ETC_HOSTS)
3886

    
3887
  # Files which are optional, these must:
3888
  # - be present in one other category as well
3889
  # - either exist or not exist on all nodes of that category (mc, vm all)
3890
  files_opt = set([
3891
    constants.RAPI_USERS_FILE,
3892
    ])
3893

    
3894
  # Files which should only be on master candidates
3895
  files_mc = set()
3896

    
3897
  if not redist:
3898
    files_mc.add(constants.CLUSTER_CONF_FILE)
3899

    
3900
    # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3901
    # replication
3902
    files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
3903

    
3904
  # Files which should only be on VM-capable nodes
3905
  files_vm = set(filename
3906
    for hv_name in cluster.enabled_hypervisors
3907
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3908

    
3909
  files_opt |= set(filename
3910
    for hv_name in cluster.enabled_hypervisors
3911
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3912

    
3913
  # Filenames in each category must be unique
3914
  all_files_set = files_all | files_mc | files_vm
3915
  assert (len(all_files_set) ==
3916
          sum(map(len, [files_all, files_mc, files_vm]))), \
3917
         "Found file listed in more than one file list"
3918

    
3919
  # Optional files must be present in one other category
3920
  assert all_files_set.issuperset(files_opt), \
3921
         "Optional file not in a different required list"
3922

    
3923
  return (files_all, files_opt, files_mc, files_vm)
3924

    
3925

    
3926
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3927
  """Distribute additional files which are part of the cluster configuration.
3928

3929
  ConfigWriter takes care of distributing the config and ssconf files, but
3930
  there are more files which should be distributed to all nodes. This function
3931
  makes sure those are copied.
3932

3933
  @param lu: calling logical unit
3934
  @param additional_nodes: list of nodes not in the config to distribute to
3935
  @type additional_vm: boolean
3936
  @param additional_vm: whether the additional nodes are vm-capable or not
3937

3938
  """
3939
  # Gather target nodes
3940
  cluster = lu.cfg.GetClusterInfo()
3941
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3942

    
3943
  online_nodes = lu.cfg.GetOnlineNodeList()
3944
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3945

    
3946
  if additional_nodes is not None:
3947
    online_nodes.extend(additional_nodes)
3948
    if additional_vm:
3949
      vm_nodes.extend(additional_nodes)
3950

    
3951
  # Never distribute to master node
3952
  for nodelist in [online_nodes, vm_nodes]:
3953
    if master_info.name in nodelist:
3954
      nodelist.remove(master_info.name)
3955

    
3956
  # Gather file lists
3957
  (files_all, _, files_mc, files_vm) = \
3958
    _ComputeAncillaryFiles(cluster, True)
3959

    
3960
  # Never re-distribute configuration file from here
3961
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3962
              constants.CLUSTER_CONF_FILE in files_vm)
3963
  assert not files_mc, "Master candidates not handled in this function"
3964

    
3965
  filemap = [
3966
    (online_nodes, files_all),
3967
    (vm_nodes, files_vm),
3968
    ]
3969

    
3970
  # Upload the files
3971
  for (node_list, files) in filemap:
3972
    for fname in files:
3973
      _UploadHelper(lu, node_list, fname)
3974

    
3975

    
3976
class LUClusterRedistConf(NoHooksLU):
3977
  """Force the redistribution of cluster configuration.
3978

3979
  This is a very simple LU.
3980

3981
  """
3982
  REQ_BGL = False
3983

    
3984
  def ExpandNames(self):
3985
    self.needed_locks = {
3986
      locking.LEVEL_NODE: locking.ALL_SET,
3987
    }
3988
    self.share_locks[locking.LEVEL_NODE] = 1
3989

    
3990
  def Exec(self, feedback_fn):
3991
    """Redistribute the configuration.
3992

3993
    """
3994
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3995
    _RedistributeAncillaryFiles(self)
3996

    
3997

    
3998
class LUClusterActivateMasterIp(NoHooksLU):
3999
  """Activate the master IP on the master node.
4000

4001
  """
4002
  def Exec(self, feedback_fn):
4003
    """Activate the master IP.
4004

4005
    """
4006
    master_params = self.cfg.GetMasterNetworkParameters()
4007
    ems = self.cfg.GetUseExternalMipScript()
4008
    result = self.rpc.call_node_activate_master_ip(master_params.name,
4009
                                                   master_params, ems)
4010
    result.Raise("Could not activate the master IP")
4011

    
4012

    
4013
class LUClusterDeactivateMasterIp(NoHooksLU):
4014
  """Deactivate the master IP on the master node.
4015

4016
  """
4017
  def Exec(self, feedback_fn):
4018
    """Deactivate the master IP.
4019

4020
    """
4021
    master_params = self.cfg.GetMasterNetworkParameters()
4022
    ems = self.cfg.GetUseExternalMipScript()
4023
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4024
                                                     master_params, ems)
4025
    result.Raise("Could not deactivate the master IP")
4026

    
4027

    
4028
def _WaitForSync(lu, instance, disks=None, oneshot=False):
4029
  """Sleep and poll for an instance's disk to sync.
4030

4031
  """
4032
  if not instance.disks or disks is not None and not disks:
4033
    return True
4034

    
4035
  disks = _ExpandCheckDisks(instance, disks)
4036

    
4037
  if not oneshot:
4038
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4039

    
4040
  node = instance.primary_node
4041

    
4042
  for dev in disks:
4043
    lu.cfg.SetDiskID(dev, node)
4044

    
4045
  # TODO: Convert to utils.Retry
4046

    
4047
  retries = 0
4048
  degr_retries = 10 # in seconds, as we sleep 1 second each time
4049
  while True:
4050
    max_time = 0
4051
    done = True
4052
    cumul_degraded = False
4053
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4054
    msg = rstats.fail_msg
4055
    if msg:
4056
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4057
      retries += 1
4058
      if retries >= 10:
4059
        raise errors.RemoteError("Can't contact node %s for mirror data,"
4060
                                 " aborting." % node)
4061
      time.sleep(6)
4062
      continue
4063
    rstats = rstats.payload
4064
    retries = 0
4065
    for i, mstat in enumerate(rstats):
4066
      if mstat is None:
4067
        lu.LogWarning("Can't compute data for node %s/%s",
4068
                           node, disks[i].iv_name)
4069
        continue
4070

    
4071
      cumul_degraded = (cumul_degraded or
4072
                        (mstat.is_degraded and mstat.sync_percent is None))
4073
      if mstat.sync_percent is not None:
4074
        done = False
4075
        if mstat.estimated_time is not None:
4076
          rem_time = ("%s remaining (estimated)" %
4077
                      utils.FormatSeconds(mstat.estimated_time))
4078
          max_time = mstat.estimated_time
4079
        else:
4080
          rem_time = "no time estimate"
4081
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4082
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
4083

    
4084
    # if we're done but degraded, let's do a few small retries, to
4085
    # make sure we see a stable and not transient situation; therefore
4086
    # we force restart of the loop
4087
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
4088
      logging.info("Degraded disks found, %d retries left", degr_retries)
4089
      degr_retries -= 1
4090
      time.sleep(1)
4091
      continue
4092

    
4093
    if done or oneshot:
4094
      break
4095

    
4096
    time.sleep(min(60, max_time))
4097

    
4098
  if done:
4099
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4100
  return not cumul_degraded
4101

    
4102

    
4103
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4104
  """Check that mirrors are not degraded.
4105

4106
  The ldisk parameter, if True, will change the test from the
4107
  is_degraded attribute (which represents overall non-ok status for
4108
  the device(s)) to the ldisk (representing the local storage status).
4109

4110
  """
4111
  lu.cfg.SetDiskID(dev, node)
4112

    
4113
  result = True
4114

    
4115
  if on_primary or dev.AssembleOnSecondary():
4116
    rstats = lu.rpc.call_blockdev_find(node, dev)
4117
    msg = rstats.fail_msg
4118
    if msg:
4119
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4120
      result = False
4121
    elif not rstats.payload:
4122
      lu.LogWarning("Can't find disk on node %s", node)
4123
      result = False
4124
    else:
4125
      if ldisk:
4126
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4127
      else:
4128
        result = result and not rstats.payload.is_degraded
4129

    
4130
  if dev.children:
4131
    for child in dev.children:
4132
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4133

    
4134
  return result
4135

    
4136

    
4137
class LUOobCommand(NoHooksLU):
4138
  """Logical unit for OOB handling.
4139

4140
  """
4141
  REG_BGL = False
4142
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4143

    
4144
  def ExpandNames(self):
4145
    """Gather locks we need.
4146

4147
    """
4148
    if self.op.node_names:
4149
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4150
      lock_names = self.op.node_names
4151
    else:
4152
      lock_names = locking.ALL_SET
4153

    
4154
    self.needed_locks = {
4155
      locking.LEVEL_NODE: lock_names,
4156
      }
4157

    
4158
  def CheckPrereq(self):
4159
    """Check prerequisites.
4160

4161
    This checks:
4162
     - the node exists in the configuration
4163
     - OOB is supported
4164

4165
    Any errors are signaled by raising errors.OpPrereqError.
4166

4167
    """
4168
    self.nodes = []
4169
    self.master_node = self.cfg.GetMasterNode()
4170

    
4171
    assert self.op.power_delay >= 0.0
4172

    
4173
    if self.op.node_names:
4174
      if (self.op.command in self._SKIP_MASTER and
4175
          self.master_node in self.op.node_names):
4176
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4177
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4178

    
4179
        if master_oob_handler:
4180
          additional_text = ("run '%s %s %s' if you want to operate on the"
4181
                             " master regardless") % (master_oob_handler,
4182
                                                      self.op.command,
4183
                                                      self.master_node)
4184
        else:
4185
          additional_text = "it does not support out-of-band operations"
4186

    
4187
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4188
                                    " allowed for %s; %s") %
4189
                                   (self.master_node, self.op.command,
4190
                                    additional_text), errors.ECODE_INVAL)
4191
    else:
4192
      self.op.node_names = self.cfg.GetNodeList()
4193
      if self.op.command in self._SKIP_MASTER:
4194
        self.op.node_names.remove(self.master_node)
4195

    
4196
    if self.op.command in self._SKIP_MASTER:
4197
      assert self.master_node not in self.op.node_names
4198

    
4199
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4200
      if node is None:
4201
        raise errors.OpPrereqError("Node %s not found" % node_name,
4202
                                   errors.ECODE_NOENT)
4203
      else:
4204
        self.nodes.append(node)
4205

    
4206
      if (not self.op.ignore_status and
4207
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4208
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4209
                                    " not marked offline") % node_name,
4210
                                   errors.ECODE_STATE)
4211

    
4212
  def Exec(self, feedback_fn):
4213
    """Execute OOB and return result if we expect any.
4214

4215
    """
4216
    master_node = self.master_node
4217
    ret = []
4218

    
4219
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4220
                                              key=lambda node: node.name)):
4221
      node_entry = [(constants.RS_NORMAL, node.name)]
4222
      ret.append(node_entry)
4223

    
4224
      oob_program = _SupportsOob(self.cfg, node)
4225

    
4226
      if not oob_program:
4227
        node_entry.append((constants.RS_UNAVAIL, None))
4228
        continue
4229

    
4230
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4231
                   self.op.command, oob_program, node.name)
4232
      result = self.rpc.call_run_oob(master_node, oob_program,
4233
                                     self.op.command, node.name,
4234
                                     self.op.timeout)
4235

    
4236
      if result.fail_msg:
4237
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4238
                        node.name, result.fail_msg)
4239
        node_entry.append((constants.RS_NODATA, None))
4240
      else:
4241
        try:
4242
          self._CheckPayload(result)
4243
        except errors.OpExecError, err:
4244
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4245
                          node.name, err)
4246
          node_entry.append((constants.RS_NODATA, None))
4247
        else:
4248
          if self.op.command == constants.OOB_HEALTH:
4249
            # For health we should log important events
4250
            for item, status in result.payload:
4251
              if status in [constants.OOB_STATUS_WARNING,
4252
                            constants.OOB_STATUS_CRITICAL]:
4253
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4254
                                item, node.name, status)
4255

    
4256
          if self.op.command == constants.OOB_POWER_ON:
4257
            node.powered = True
4258
          elif self.op.command == constants.OOB_POWER_OFF:
4259
            node.powered = False
4260
          elif self.op.command == constants.OOB_POWER_STATUS:
4261
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4262
            if powered != node.powered:
4263
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4264
                               " match actual power state (%s)"), node.powered,
4265
                              node.name, powered)
4266

    
4267
          # For configuration changing commands we should update the node
4268
          if self.op.command in (constants.OOB_POWER_ON,
4269
                                 constants.OOB_POWER_OFF):
4270
            self.cfg.Update(node, feedback_fn)
4271

    
4272
          node_entry.append((constants.RS_NORMAL, result.payload))
4273

    
4274
          if (self.op.command == constants.OOB_POWER_ON and
4275
              idx < len(self.nodes) - 1):
4276
            time.sleep(self.op.power_delay)
4277

    
4278
    return ret
4279

    
4280
  def _CheckPayload(self, result):
4281
    """Checks if the payload is valid.
4282

4283
    @param result: RPC result
4284
    @raises errors.OpExecError: If payload is not valid
4285

4286
    """
4287
    errs = []
4288
    if self.op.command == constants.OOB_HEALTH:
4289
      if not isinstance(result.payload, list):
4290
        errs.append("command 'health' is expected to return a list but got %s" %
4291
                    type(result.payload))
4292
      else:
4293
        for item, status in result.payload:
4294
          if status not in constants.OOB_STATUSES:
4295
            errs.append("health item '%s' has invalid status '%s'" %
4296
                        (item, status))
4297

    
4298
    if self.op.command == constants.OOB_POWER_STATUS:
4299
      if not isinstance(result.payload, dict):
4300
        errs.append("power-status is expected to return a dict but got %s" %
4301
                    type(result.payload))
4302

    
4303
    if self.op.command in [
4304
        constants.OOB_POWER_ON,
4305
        constants.OOB_POWER_OFF,
4306
        constants.OOB_POWER_CYCLE,
4307
        ]:
4308
      if result.payload is not None:
4309
        errs.append("%s is expected to not return payload but got '%s'" %
4310
                    (self.op.command, result.payload))
4311

    
4312
    if errs:
4313
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4314
                               utils.CommaJoin(errs))
4315

    
4316

    
4317
class _OsQuery(_QueryBase):
4318
  FIELDS = query.OS_FIELDS
4319

    
4320
  def ExpandNames(self, lu):
4321
    # Lock all nodes in shared mode
4322
    # Temporary removal of locks, should be reverted later
4323
    # TODO: reintroduce locks when they are lighter-weight
4324
    lu.needed_locks = {}
4325
    #self.share_locks[locking.LEVEL_NODE] = 1
4326
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4327

    
4328
    # The following variables interact with _QueryBase._GetNames
4329
    if self.names:
4330
      self.wanted = self.names
4331
    else:
4332
      self.wanted = locking.ALL_SET
4333

    
4334
    self.do_locking = self.use_locking
4335

    
4336
  def DeclareLocks(self, lu, level):
4337
    pass
4338

    
4339
  @staticmethod
4340
  def _DiagnoseByOS(rlist):
4341
    """Remaps a per-node return list into an a per-os per-node dictionary
4342

4343
    @param rlist: a map with node names as keys and OS objects as values
4344

4345
    @rtype: dict
4346
    @return: a dictionary with osnames as keys and as value another
4347
        map, with nodes as keys and tuples of (path, status, diagnose,
4348
        variants, parameters, api_versions) as values, eg::
4349

4350
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4351
                                     (/srv/..., False, "invalid api")],
4352
                           "node2": [(/srv/..., True, "", [], [])]}
4353
          }
4354

4355
    """
4356
    all_os = {}
4357
    # we build here the list of nodes that didn't fail the RPC (at RPC
4358
    # level), so that nodes with a non-responding node daemon don't
4359
    # make all OSes invalid
4360
    good_nodes = [node_name for node_name in rlist
4361
                  if not rlist[node_name].fail_msg]
4362
    for node_name, nr in rlist.items():
4363
      if nr.fail_msg or not nr.payload:
4364
        continue
4365
      for (name, path, status, diagnose, variants,
4366
           params, api_versions) in nr.payload:
4367
        if name not in all_os:
4368
          # build a list of nodes for this os containing empty lists
4369
          # for each node in node_list
4370
          all_os[name] = {}
4371
          for nname in good_nodes:
4372
            all_os[name][nname] = []
4373
        # convert params from [name, help] to (name, help)
4374
        params = [tuple(v) for v in params]
4375
        all_os[name][node_name].append((path, status, diagnose,
4376
                                        variants, params, api_versions))
4377
    return all_os
4378

    
4379
  def _GetQueryData(self, lu):
4380
    """Computes the list of nodes and their attributes.
4381

4382
    """
4383
    # Locking is not used
4384
    assert not (compat.any(lu.glm.is_owned(level)
4385
                           for level in locking.LEVELS
4386
                           if level != locking.LEVEL_CLUSTER) or
4387
                self.do_locking or self.use_locking)
4388

    
4389
    valid_nodes = [node.name
4390
                   for node in lu.cfg.GetAllNodesInfo().values()
4391
                   if not node.offline and node.vm_capable]
4392
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4393
    cluster = lu.cfg.GetClusterInfo()
4394

    
4395
    data = {}
4396

    
4397
    for (os_name, os_data) in pol.items():
4398
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4399
                          hidden=(os_name in cluster.hidden_os),
4400
                          blacklisted=(os_name in cluster.blacklisted_os))
4401

    
4402
      variants = set()
4403
      parameters = set()
4404
      api_versions = set()
4405

    
4406
      for idx, osl in enumerate(os_data.values()):
4407
        info.valid = bool(info.valid and osl and osl[0][1])
4408
        if not info.valid:
4409
          break
4410

    
4411
        (node_variants, node_params, node_api) = osl[0][3:6]
4412
        if idx == 0:
4413
          # First entry
4414
          variants.update(node_variants)
4415
          parameters.update(node_params)
4416
          api_versions.update(node_api)
4417
        else:
4418
          # Filter out inconsistent values
4419
          variants.intersection_update(node_variants)
4420
          parameters.intersection_update(node_params)
4421
          api_versions.intersection_update(node_api)
4422

    
4423
      info.variants = list(variants)
4424
      info.parameters = list(parameters)
4425
      info.api_versions = list(api_versions)
4426

    
4427
      data[os_name] = info
4428

    
4429
    # Prepare data in requested order
4430
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4431
            if name in data]
4432

    
4433

    
4434
class LUOsDiagnose(NoHooksLU):
4435
  """Logical unit for OS diagnose/query.
4436

4437
  """
4438
  REQ_BGL = False
4439

    
4440
  @staticmethod
4441
  def _BuildFilter(fields, names):
4442
    """Builds a filter for querying OSes.
4443

4444
    """
4445
    name_filter = qlang.MakeSimpleFilter("name", names)
4446

    
4447
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4448
    # respective field is not requested
4449
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4450
                     for fname in ["hidden", "blacklisted"]
4451
                     if fname not in fields]
4452
    if "valid" not in fields:
4453
      status_filter.append([qlang.OP_TRUE, "valid"])
4454

    
4455
    if status_filter:
4456
      status_filter.insert(0, qlang.OP_AND)
4457
    else:
4458
      status_filter = None
4459

    
4460
    if name_filter and status_filter:
4461
      return [qlang.OP_AND, name_filter, status_filter]
4462
    elif name_filter:
4463
      return name_filter
4464
    else:
4465
      return status_filter
4466

    
4467
  def CheckArguments(self):
4468
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4469
                       self.op.output_fields, False)
4470

    
4471
  def ExpandNames(self):
4472
    self.oq.ExpandNames(self)
4473

    
4474
  def Exec(self, feedback_fn):
4475
    return self.oq.OldStyleQuery(self)
4476

    
4477

    
4478
class LUNodeRemove(LogicalUnit):
4479
  """Logical unit for removing a node.
4480

4481
  """
4482
  HPATH = "node-remove"
4483
  HTYPE = constants.HTYPE_NODE
4484

    
4485
  def BuildHooksEnv(self):
4486
    """Build hooks env.
4487

4488
    This doesn't run on the target node in the pre phase as a failed
4489
    node would then be impossible to remove.
4490

4491
    """
4492
    return {
4493
      "OP_TARGET": self.op.node_name,
4494
      "NODE_NAME": self.op.node_name,
4495
      }
4496

    
4497
  def BuildHooksNodes(self):
4498
    """Build hooks nodes.
4499

4500
    """
4501
    all_nodes = self.cfg.GetNodeList()
4502
    try:
4503
      all_nodes.remove(self.op.node_name)
4504
    except ValueError:
4505
      logging.warning("Node '%s', which is about to be removed, was not found"
4506
                      " in the list of all nodes", self.op.node_name)
4507
    return (all_nodes, all_nodes)
4508

    
4509
  def CheckPrereq(self):
4510
    """Check prerequisites.
4511

4512
    This checks:
4513
     - the node exists in the configuration
4514
     - it does not have primary or secondary instances
4515
     - it's not the master
4516

4517
    Any errors are signaled by raising errors.OpPrereqError.
4518

4519
    """
4520
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4521
    node = self.cfg.GetNodeInfo(self.op.node_name)
4522
    assert node is not None
4523

    
4524
    masternode = self.cfg.GetMasterNode()
4525
    if node.name == masternode:
4526
      raise errors.OpPrereqError("Node is the master node, failover to another"
4527
                                 " node is required", errors.ECODE_INVAL)
4528

    
4529
    for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4530
      if node.name in instance.all_nodes:
4531
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4532
                                   " please remove first" % instance_name,
4533
                                   errors.ECODE_INVAL)
4534
    self.op.node_name = node.name
4535
    self.node = node
4536

    
4537
  def Exec(self, feedback_fn):
4538
    """Removes the node from the cluster.
4539

4540
    """
4541
    node = self.node
4542
    logging.info("Stopping the node daemon and removing configs from node %s",
4543
                 node.name)
4544

    
4545
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4546

    
4547
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4548
      "Not owning BGL"
4549

    
4550
    # Promote nodes to master candidate as needed
4551
    _AdjustCandidatePool(self, exceptions=[node.name])
4552
    self.context.RemoveNode(node.name)
4553

    
4554
    # Run post hooks on the node before it's removed
4555
    _RunPostHook(self, node.name)
4556

    
4557
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4558
    msg = result.fail_msg
4559
    if msg:
4560
      self.LogWarning("Errors encountered on the remote node while leaving"
4561
                      " the cluster: %s", msg)
4562

    
4563
    # Remove node from our /etc/hosts
4564
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4565
      master_node = self.cfg.GetMasterNode()
4566
      result = self.rpc.call_etc_hosts_modify(master_node,
4567
                                              constants.ETC_HOSTS_REMOVE,
4568
                                              node.name, None)
4569
      result.Raise("Can't update hosts file with new host data")
4570
      _RedistributeAncillaryFiles(self)
4571

    
4572

    
4573
class _NodeQuery(_QueryBase):
4574
  FIELDS = query.NODE_FIELDS
4575

    
4576
  def ExpandNames(self, lu):
4577
    lu.needed_locks = {}
4578
    lu.share_locks = _ShareAll()
4579

    
4580
    if self.names:
4581
      self.wanted = _GetWantedNodes(lu, self.names)
4582
    else:
4583
      self.wanted = locking.ALL_SET
4584

    
4585
    self.do_locking = (self.use_locking and
4586
                       query.NQ_LIVE in self.requested_data)
4587

    
4588
    if self.do_locking:
4589
      # If any non-static field is requested we need to lock the nodes
4590
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4591

    
4592
  def DeclareLocks(self, lu, level):
4593
    pass
4594

    
4595
  def _GetQueryData(self, lu):
4596
    """Computes the list of nodes and their attributes.
4597

4598
    """
4599
    all_info = lu.cfg.GetAllNodesInfo()
4600

    
4601
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4602

    
4603
    # Gather data as requested
4604
    if query.NQ_LIVE in self.requested_data:
4605
      # filter out non-vm_capable nodes
4606
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4607

    
4608
      node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4609
                                        [lu.cfg.GetHypervisorType()])
4610
      live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4611
                       for (name, nresult) in node_data.items()
4612
                       if not nresult.fail_msg and nresult.payload)
4613
    else:
4614
      live_data = None
4615

    
4616
    if query.NQ_INST in self.requested_data:
4617
      node_to_primary = dict([(name, set()) for name in nodenames])
4618
      node_to_secondary = dict([(name, set()) for name in nodenames])
4619

    
4620
      inst_data = lu.cfg.GetAllInstancesInfo()
4621

    
4622
      for inst in inst_data.values():
4623
        if inst.primary_node in node_to_primary:
4624
          node_to_primary[inst.primary_node].add(inst.name)
4625
        for secnode in inst.secondary_nodes:
4626
          if secnode in node_to_secondary:
4627
            node_to_secondary[secnode].add(inst.name)
4628
    else:
4629
      node_to_primary = None
4630
      node_to_secondary = None
4631

    
4632
    if query.NQ_OOB in self.requested_data:
4633
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4634
                         for name, node in all_info.iteritems())
4635
    else:
4636
      oob_support = None
4637

    
4638
    if query.NQ_GROUP in self.requested_data:
4639
      groups = lu.cfg.GetAllNodeGroupsInfo()
4640
    else:
4641
      groups = {}
4642

    
4643
    return query.NodeQueryData([all_info[name] for name in nodenames],
4644
                               live_data, lu.cfg.GetMasterNode(),
4645
                               node_to_primary, node_to_secondary, groups,
4646
                               oob_support, lu.cfg.GetClusterInfo())
4647

    
4648

    
4649
class LUNodeQuery(NoHooksLU):
4650
  """Logical unit for querying nodes.
4651

4652
  """
4653
  # pylint: disable=W0142
4654
  REQ_BGL = False
4655

    
4656
  def CheckArguments(self):
4657
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4658
                         self.op.output_fields, self.op.use_locking)
4659

    
4660
  def ExpandNames(self):
4661
    self.nq.ExpandNames(self)
4662

    
4663
  def DeclareLocks(self, level):
4664
    self.nq.DeclareLocks(self, level)
4665

    
4666
  def Exec(self, feedback_fn):
4667
    return self.nq.OldStyleQuery(self)
4668

    
4669

    
4670
class LUNodeQueryvols(NoHooksLU):
4671
  """Logical unit for getting volumes on node(s).
4672

4673
  """
4674
  REQ_BGL = False
4675
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4676
  _FIELDS_STATIC = utils.FieldSet("node")
4677

    
4678
  def CheckArguments(self):
4679
    _CheckOutputFields(static=self._FIELDS_STATIC,
4680
                       dynamic=self._FIELDS_DYNAMIC,
4681
                       selected=self.op.output_fields)
4682

    
4683
  def ExpandNames(self):
4684
    self.share_locks = _ShareAll()
4685
    self.needed_locks = {}
4686

    
4687
    if not self.op.nodes:
4688
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4689
    else:
4690
      self.needed_locks[locking.LEVEL_NODE] = \
4691
        _GetWantedNodes(self, self.op.nodes)
4692

    
4693
  def Exec(self, feedback_fn):
4694
    """Computes the list of nodes and their attributes.
4695

4696
    """
4697
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4698
    volumes = self.rpc.call_node_volumes(nodenames)
4699

    
4700
    ilist = self.cfg.GetAllInstancesInfo()
4701
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4702

    
4703
    output = []
4704
    for node in nodenames:
4705
      nresult = volumes[node]
4706
      if nresult.offline:
4707
        continue
4708
      msg = nresult.fail_msg
4709
      if msg:
4710
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4711
        continue
4712

    
4713
      node_vols = sorted(nresult.payload,
4714
                         key=operator.itemgetter("dev"))
4715

    
4716
      for vol in node_vols:
4717
        node_output = []
4718
        for field in self.op.output_fields:
4719
          if field == "node":
4720
            val = node
4721
          elif field == "phys":
4722
            val = vol["dev"]
4723
          elif field == "vg":
4724
            val = vol["vg"]
4725
          elif field == "name":
4726
            val = vol["name"]
4727
          elif field == "size":
4728
            val = int(float(vol["size"]))
4729
          elif field == "instance":
4730
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4731
          else:
4732
            raise errors.ParameterError(field)
4733
          node_output.append(str(val))
4734

    
4735
        output.append(node_output)
4736

    
4737
    return output
4738

    
4739

    
4740
class LUNodeQueryStorage(NoHooksLU):
4741
  """Logical unit for getting information on storage units on node(s).
4742

4743
  """
4744
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4745
  REQ_BGL = False
4746

    
4747
  def CheckArguments(self):
4748
    _CheckOutputFields(static=self._FIELDS_STATIC,
4749
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4750
                       selected=self.op.output_fields)
4751

    
4752
  def ExpandNames(self):
4753
    self.share_locks = _ShareAll()
4754
    self.needed_locks = {}
4755

    
4756
    if self.op.nodes:
4757
      self.needed_locks[locking.LEVEL_NODE] = \
4758
        _GetWantedNodes(self, self.op.nodes)
4759
    else:
4760
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4761

    
4762
  def Exec(self, feedback_fn):
4763
    """Computes the list of nodes and their attributes.
4764

4765
    """
4766
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4767

    
4768
    # Always get name to sort by
4769
    if constants.SF_NAME in self.op.output_fields:
4770
      fields = self.op.output_fields[:]
4771
    else:
4772
      fields = [constants.SF_NAME] + self.op.output_fields
4773

    
4774
    # Never ask for node or type as it's only known to the LU
4775
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4776
      while extra in fields:
4777
        fields.remove(extra)
4778

    
4779
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4780
    name_idx = field_idx[constants.SF_NAME]
4781

    
4782
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4783
    data = self.rpc.call_storage_list(self.nodes,
4784
                                      self.op.storage_type, st_args,
4785
                                      self.op.name, fields)
4786

    
4787
    result = []
4788

    
4789
    for node in utils.NiceSort(self.nodes):
4790
      nresult = data[node]
4791
      if nresult.offline:
4792
        continue
4793

    
4794
      msg = nresult.fail_msg
4795
      if msg:
4796
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4797
        continue
4798

    
4799
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4800

    
4801
      for name in utils.NiceSort(rows.keys()):
4802
        row = rows[name]
4803

    
4804
        out = []
4805

    
4806
        for field in self.op.output_fields:
4807
          if field == constants.SF_NODE:
4808
            val = node
4809
          elif field == constants.SF_TYPE:
4810
            val = self.op.storage_type
4811
          elif field in field_idx:
4812
            val = row[field_idx[field]]
4813
          else:
4814
            raise errors.ParameterError(field)
4815

    
4816
          out.append(val)
4817

    
4818
        result.append(out)
4819

    
4820
    return result
4821

    
4822

    
4823
class _InstanceQuery(_QueryBase):
4824
  FIELDS = query.INSTANCE_FIELDS
4825

    
4826
  def ExpandNames(self, lu):
4827
    lu.needed_locks = {}
4828
    lu.share_locks = _ShareAll()
4829

    
4830
    if self.names:
4831
      self.wanted = _GetWantedInstances(lu, self.names)
4832
    else:
4833
      self.wanted = locking.ALL_SET
4834

    
4835
    self.do_locking = (self.use_locking and
4836
                       query.IQ_LIVE in self.requested_data)
4837
    if self.do_locking:
4838
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4839
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4840
      lu.needed_locks[locking.LEVEL_NODE] = []
4841
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4842

    
4843
    self.do_grouplocks = (self.do_locking and
4844
                          query.IQ_NODES in self.requested_data)
4845

    
4846
  def DeclareLocks(self, lu, level):
4847
    if self.do_locking:
4848
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4849
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4850

    
4851
        # Lock all groups used by instances optimistically; this requires going
4852
        # via the node before it's locked, requiring verification later on
4853
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4854
          set(group_uuid
4855
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4856
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4857
      elif level == locking.LEVEL_NODE:
4858
        lu._LockInstancesNodes() # pylint: disable=W0212
4859

    
4860
  @staticmethod
4861
  def _CheckGroupLocks(lu):
4862
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4863
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4864

    
4865
    # Check if node groups for locked instances are still correct
4866
    for instance_name in owned_instances:
4867
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4868

    
4869
  def _GetQueryData(self, lu):
4870
    """Computes the list of instances and their attributes.
4871

4872
    """
4873
    if self.do_grouplocks:
4874
      self._CheckGroupLocks(lu)
4875

    
4876
    cluster = lu.cfg.GetClusterInfo()
4877
    all_info = lu.cfg.GetAllInstancesInfo()
4878

    
4879
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4880

    
4881
    instance_list = [all_info[name] for name in instance_names]
4882
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4883
                                        for inst in instance_list)))
4884
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4885
    bad_nodes = []
4886
    offline_nodes = []
4887
    wrongnode_inst = set()
4888

    
4889
    # Gather data as requested
4890
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4891
      live_data = {}
4892
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4893
      for name in nodes:
4894
        result = node_data[name]
4895
        if result.offline:
4896
          # offline nodes will be in both lists
4897
          assert result.fail_msg
4898
          offline_nodes.append(name)
4899
        if result.fail_msg:
4900
          bad_nodes.append(name)
4901
        elif result.payload:
4902
          for inst in result.payload:
4903
            if inst in all_info:
4904
              if all_info[inst].primary_node == name:
4905
                live_data.update(result.payload)
4906
              else:
4907
                wrongnode_inst.add(inst)
4908
            else:
4909
              # orphan instance; we don't list it here as we don't
4910
              # handle this case yet in the output of instance listing
4911
              logging.warning("Orphan instance '%s' found on node %s",
4912
                              inst, name)
4913
        # else no instance is alive
4914
    else:
4915
      live_data = {}
4916

    
4917
    if query.IQ_DISKUSAGE in self.requested_data:
4918
      disk_usage = dict((inst.name,
4919
                         _ComputeDiskSize(inst.disk_template,
4920
                                          [{constants.IDISK_SIZE: disk.size}
4921
                                           for disk in inst.disks]))
4922
                        for inst in instance_list)
4923
    else:
4924
      disk_usage = None
4925

    
4926
    if query.IQ_CONSOLE in self.requested_data:
4927
      consinfo = {}
4928
      for inst in instance_list:
4929
        if inst.name in live_data:
4930
          # Instance is running
4931
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4932
        else:
4933
          consinfo[inst.name] = None
4934
      assert set(consinfo.keys()) == set(instance_names)
4935
    else:
4936
      consinfo = None
4937

    
4938
    if query.IQ_NODES in self.requested_data:
4939
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4940
                                            instance_list)))
4941
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4942
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4943
                    for uuid in set(map(operator.attrgetter("group"),
4944
                                        nodes.values())))
4945
    else:
4946
      nodes = None
4947
      groups = None
4948

    
4949
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4950
                                   disk_usage, offline_nodes, bad_nodes,
4951
                                   live_data, wrongnode_inst, consinfo,
4952
                                   nodes, groups)
4953

    
4954

    
4955
class LUQuery(NoHooksLU):
4956
  """Query for resources/items of a certain kind.
4957

4958
  """
4959
  # pylint: disable=W0142
4960
  REQ_BGL = False
4961

    
4962
  def CheckArguments(self):
4963
    qcls = _GetQueryImplementation(self.op.what)
4964

    
4965
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4966

    
4967
  def ExpandNames(self):
4968
    self.impl.ExpandNames(self)
4969

    
4970
  def DeclareLocks(self, level):
4971
    self.impl.DeclareLocks(self, level)
4972

    
4973
  def Exec(self, feedback_fn):
4974
    return self.impl.NewStyleQuery(self)
4975

    
4976

    
4977
class LUQueryFields(NoHooksLU):
4978
  """Query for resources/items of a certain kind.
4979

4980
  """
4981
  # pylint: disable=W0142
4982
  REQ_BGL = False
4983

    
4984
  def CheckArguments(self):
4985
    self.qcls = _GetQueryImplementation(self.op.what)
4986

    
4987
  def ExpandNames(self):
4988
    self.needed_locks = {}
4989

    
4990
  def Exec(self, feedback_fn):
4991
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4992

    
4993

    
4994
class LUNodeModifyStorage(NoHooksLU):
4995
  """Logical unit for modifying a storage volume on a node.
4996

4997
  """
4998
  REQ_BGL = False
4999

    
5000
  def CheckArguments(self):
5001
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5002

    
5003
    storage_type = self.op.storage_type
5004

    
5005
    try:
5006
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5007
    except KeyError:
5008
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
5009
                                 " modified" % storage_type,
5010
                                 errors.ECODE_INVAL)
5011

    
5012
    diff = set(self.op.changes.keys()) - modifiable
5013
    if diff:
5014
      raise errors.OpPrereqError("The following fields can not be modified for"
5015
                                 " storage units of type '%s': %r" %
5016
                                 (storage_type, list(diff)),
5017
                                 errors.ECODE_INVAL)
5018

    
5019
  def ExpandNames(self):
5020
    self.needed_locks = {
5021
      locking.LEVEL_NODE: self.op.node_name,
5022
      }
5023

    
5024
  def Exec(self, feedback_fn):
5025
    """Computes the list of nodes and their attributes.
5026

5027
    """
5028
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5029
    result = self.rpc.call_storage_modify(self.op.node_name,
5030
                                          self.op.storage_type, st_args,
5031
                                          self.op.name, self.op.changes)
5032
    result.Raise("Failed to modify storage unit '%s' on %s" %
5033
                 (self.op.name, self.op.node_name))
5034

    
5035

    
5036
class LUNodeAdd(LogicalUnit):
5037
  """Logical unit for adding node to the cluster.
5038

5039
  """
5040
  HPATH = "node-add"
5041
  HTYPE = constants.HTYPE_NODE
5042
  _NFLAGS = ["master_capable", "vm_capable"]
5043

    
5044
  def CheckArguments(self):
5045
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5046
    # validate/normalize the node name
5047
    self.hostname = netutils.GetHostname(name=self.op.node_name,
5048
                                         family=self.primary_ip_family)
5049
    self.op.node_name = self.hostname.name
5050

    
5051
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5052
      raise errors.OpPrereqError("Cannot readd the master node",
5053
                                 errors.ECODE_STATE)
5054

    
5055
    if self.op.readd and self.op.group:
5056
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
5057
                                 " being readded", errors.ECODE_INVAL)
5058

    
5059
  def BuildHooksEnv(self):
5060
    """Build hooks env.
5061

5062
    This will run on all nodes before, and on all nodes + the new node after.
5063

5064
    """
5065
    return {
5066
      "OP_TARGET": self.op.node_name,
5067
      "NODE_NAME": self.op.node_name,
5068
      "NODE_PIP": self.op.primary_ip,
5069
      "NODE_SIP": self.op.secondary_ip,
5070
      "MASTER_CAPABLE": str(self.op.master_capable),
5071
      "VM_CAPABLE": str(self.op.vm_capable),
5072
      }
5073

    
5074
  def BuildHooksNodes(self):
5075
    """Build hooks nodes.
5076

5077
    """
5078
    # Exclude added node
5079
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5080
    post_nodes = pre_nodes + [self.op.node_name, ]
5081

    
5082
    return (pre_nodes, post_nodes)
5083

    
5084
  def CheckPrereq(self):
5085
    """Check prerequisites.
5086

5087
    This checks:
5088
     - the new node is not already in the config
5089
     - it is resolvable
5090
     - its parameters (single/dual homed) matches the cluster
5091

5092
    Any errors are signaled by raising errors.OpPrereqError.
5093

5094
    """
5095
    cfg = self.cfg
5096
    hostname = self.hostname
5097
    node = hostname.name
5098
    primary_ip = self.op.primary_ip = hostname.ip
5099
    if self.op.secondary_ip is None:
5100
      if self.primary_ip_family == netutils.IP6Address.family:
5101
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5102
                                   " IPv4 address must be given as secondary",
5103
                                   errors.ECODE_INVAL)
5104
      self.op.secondary_ip = primary_ip
5105

    
5106
    secondary_ip = self.op.secondary_ip
5107
    if not netutils.IP4Address.IsValid(secondary_ip):
5108
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5109
                                 " address" % secondary_ip, errors.ECODE_INVAL)
5110

    
5111
    node_list = cfg.GetNodeList()
5112
    if not self.op.readd and node in node_list:
5113
      raise errors.OpPrereqError("Node %s is already in the configuration" %
5114
                                 node, errors.ECODE_EXISTS)
5115
    elif self.op.readd and node not in node_list:
5116
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5117
                                 errors.ECODE_NOENT)
5118

    
5119
    self.changed_primary_ip = False
5120

    
5121
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5122
      if self.op.readd and node == existing_node_name:
5123
        if existing_node.secondary_ip != secondary_ip:
5124
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
5125
                                     " address configuration as before",
5126
                                     errors.ECODE_INVAL)
5127
        if existing_node.primary_ip != primary_ip:
5128
          self.changed_primary_ip = True
5129

    
5130
        continue
5131

    
5132
      if (existing_node.primary_ip == primary_ip or
5133
          existing_node.secondary_ip == primary_ip or
5134
          existing_node.primary_ip == secondary_ip or
5135
          existing_node.secondary_ip == secondary_ip):
5136
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5137
                                   " existing node %s" % existing_node.name,
5138
                                   errors.ECODE_NOTUNIQUE)
5139

    
5140
    # After this 'if' block, None is no longer a valid value for the
5141
    # _capable op attributes
5142
    if self.op.readd:
5143
      old_node = self.cfg.GetNodeInfo(node)
5144
      assert old_node is not None, "Can't retrieve locked node %s" % node
5145
      for attr in self._NFLAGS:
5146
        if getattr(self.op, attr) is None:
5147
          setattr(self.op, attr, getattr(old_node, attr))
5148
    else:
5149
      for attr in self._NFLAGS:
5150
        if getattr(self.op, attr) is None:
5151
          setattr(self.op, attr, True)
5152

    
5153
    if self.op.readd and not self.op.vm_capable:
5154
      pri, sec = cfg.GetNodeInstances(node)
5155
      if pri or sec:
5156
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5157
                                   " flag set to false, but it already holds"
5158
                                   " instances" % node,
5159
                                   errors.ECODE_STATE)
5160

    
5161
    # check that the type of the node (single versus dual homed) is the
5162
    # same as for the master
5163
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5164
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5165
    newbie_singlehomed = secondary_ip == primary_ip
5166
    if master_singlehomed != newbie_singlehomed:
5167
      if master_singlehomed:
5168
        raise errors.OpPrereqError("The master has no secondary ip but the"
5169
                                   " new node has one",
5170
                                   errors.ECODE_INVAL)
5171
      else:
5172
        raise errors.OpPrereqError("The master has a secondary ip but the"
5173
                                   " new node doesn't have one",
5174
                                   errors.ECODE_INVAL)
5175

    
5176
    # checks reachability
5177
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5178
      raise errors.OpPrereqError("Node not reachable by ping",
5179
                                 errors.ECODE_ENVIRON)
5180

    
5181
    if not newbie_singlehomed:
5182
      # check reachability from my secondary ip to newbie's secondary ip
5183
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5184
                           source=myself.secondary_ip):
5185
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5186
                                   " based ping to node daemon port",
5187
                                   errors.ECODE_ENVIRON)
5188

    
5189
    if self.op.readd:
5190
      exceptions = [node]
5191
    else:
5192
      exceptions = []
5193

    
5194
    if self.op.master_capable:
5195
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5196
    else:
5197
      self.master_candidate = False
5198

    
5199
    if self.op.readd:
5200
      self.new_node = old_node
5201
    else:
5202
      node_group = cfg.LookupNodeGroup(self.op.group)
5203
      self.new_node = objects.Node(name=node,
5204
                                   primary_ip=primary_ip,
5205
                                   secondary_ip=secondary_ip,
5206
                                   master_candidate=self.master_candidate,
5207
                                   offline=False, drained=False,
5208
                                   group=node_group)
5209

    
5210
    if self.op.ndparams:
5211
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5212

    
5213
  def Exec(self, feedback_fn):
5214
    """Adds the new node to the cluster.
5215

5216
    """
5217
    new_node = self.new_node
5218
    node = new_node.name
5219

    
5220
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5221
      "Not owning BGL"
5222

    
5223
    # We adding a new node so we assume it's powered
5224
    new_node.powered = True
5225

    
5226
    # for re-adds, reset the offline/drained/master-candidate flags;
5227
    # we need to reset here, otherwise offline would prevent RPC calls
5228
    # later in the procedure; this also means that if the re-add
5229
    # fails, we are left with a non-offlined, broken node
5230
    if self.op.readd:
5231
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5232
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5233
      # if we demote the node, we do cleanup later in the procedure
5234
      new_node.master_candidate = self.master_candidate
5235
      if self.changed_primary_ip:
5236
        new_node.primary_ip = self.op.primary_ip
5237

    
5238
    # copy the master/vm_capable flags
5239
    for attr in self._NFLAGS:
5240
      setattr(new_node, attr, getattr(self.op, attr))
5241

    
5242
    # notify the user about any possible mc promotion
5243
    if new_node.master_candidate:
5244
      self.LogInfo("Node will be a master candidate")
5245

    
5246
    if self.op.ndparams:
5247
      new_node.ndparams = self.op.ndparams
5248
    else:
5249
      new_node.ndparams = {}
5250

    
5251
    # check connectivity
5252
    result = self.rpc.call_version([node])[node]
5253
    result.Raise("Can't get version information from node %s" % node)
5254
    if constants.PROTOCOL_VERSION == result.payload:
5255
      logging.info("Communication to node %s fine, sw version %s match",
5256
                   node, result.payload)
5257
    else:
5258
      raise errors.OpExecError("Version mismatch master version %s,"
5259
                               " node version %s" %
5260
                               (constants.PROTOCOL_VERSION, result.payload))
5261

    
5262
    # Add node to our /etc/hosts, and add key to known_hosts
5263
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5264
      master_node = self.cfg.GetMasterNode()
5265
      result = self.rpc.call_etc_hosts_modify(master_node,
5266
                                              constants.ETC_HOSTS_ADD,
5267
                                              self.hostname.name,
5268
                                              self.hostname.ip)
5269
      result.Raise("Can't update hosts file with new host data")
5270

    
5271
    if new_node.secondary_ip != new_node.primary_ip:
5272
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5273
                               False)
5274

    
5275
    node_verify_list = [self.cfg.GetMasterNode()]
5276
    node_verify_param = {
5277
      constants.NV_NODELIST: ([node], {}),
5278
      # TODO: do a node-net-test as well?
5279
    }
5280

    
5281
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5282
                                       self.cfg.GetClusterName())
5283
    for verifier in node_verify_list:
5284
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5285
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5286
      if nl_payload:
5287
        for failed in nl_payload:
5288
          feedback_fn("ssh/hostname verification failed"
5289
                      " (checking from %s): %s" %
5290
                      (verifier, nl_payload[failed]))
5291
        raise errors.OpExecError("ssh/hostname verification failed")
5292

    
5293
    if self.op.readd:
5294
      _RedistributeAncillaryFiles(self)
5295
      self.context.ReaddNode(new_node)
5296
      # make sure we redistribute the config
5297
      self.cfg.Update(new_node, feedback_fn)
5298
      # and make sure the new node will not have old files around
5299
      if not new_node.master_candidate:
5300
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5301
        msg = result.fail_msg
5302
        if msg:
5303
          self.LogWarning("Node failed to demote itself from master"
5304
                          " candidate status: %s" % msg)
5305
    else:
5306
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5307
                                  additional_vm=self.op.vm_capable)
5308
      self.context.AddNode(new_node, self.proc.GetECId())
5309

    
5310

    
5311
class LUNodeSetParams(LogicalUnit):
5312
  """Modifies the parameters of a node.
5313

5314
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5315
      to the node role (as _ROLE_*)
5316
  @cvar _R2F: a dictionary from node role to tuples of flags
5317
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5318

5319
  """
5320
  HPATH = "node-modify"
5321
  HTYPE = constants.HTYPE_NODE
5322
  REQ_BGL = False
5323
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5324
  _F2R = {
5325
    (True, False, False): _ROLE_CANDIDATE,
5326
    (False, True, False): _ROLE_DRAINED,
5327
    (False, False, True): _ROLE_OFFLINE,
5328
    (False, False, False): _ROLE_REGULAR,
5329
    }
5330
  _R2F = dict((v, k) for k, v in _F2R.items())
5331
  _FLAGS = ["master_candidate", "drained", "offline"]
5332

    
5333
  def CheckArguments(self):
5334
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5335
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5336
                self.op.master_capable, self.op.vm_capable,
5337
                self.op.secondary_ip, self.op.ndparams]
5338
    if all_mods.count(None) == len(all_mods):
5339
      raise errors.OpPrereqError("Please pass at least one modification",
5340
                                 errors.ECODE_INVAL)
5341
    if all_mods.count(True) > 1:
5342
      raise errors.OpPrereqError("Can't set the node into more than one"
5343
                                 " state at the same time",
5344
                                 errors.ECODE_INVAL)
5345

    
5346
    # Boolean value that tells us whether we might be demoting from MC
5347
    self.might_demote = (self.op.master_candidate == False or
5348
                         self.op.offline == True or
5349
                         self.op.drained == True or
5350
                         self.op.master_capable == False)
5351

    
5352
    if self.op.secondary_ip:
5353
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5354
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5355
                                   " address" % self.op.secondary_ip,
5356
                                   errors.ECODE_INVAL)
5357

    
5358
    self.lock_all = self.op.auto_promote and self.might_demote
5359
    self.lock_instances = self.op.secondary_ip is not None
5360

    
5361
  def _InstanceFilter(self, instance):
5362
    """Filter for getting affected instances.
5363

5364
    """
5365
    return (instance.disk_template in constants.DTS_INT_MIRROR and
5366
            self.op.node_name in instance.all_nodes)
5367

    
5368
  def ExpandNames(self):
5369
    if self.lock_all:
5370
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5371
    else:
5372
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5373

    
5374
    # Since modifying a node can have severe effects on currently running
5375
    # operations the resource lock is at least acquired in shared mode
5376
    self.needed_locks[locking.LEVEL_NODE_RES] = \
5377
      self.needed_locks[locking.LEVEL_NODE]
5378

    
5379
    # Get node resource and instance locks in shared mode; they are not used
5380
    # for anything but read-only access
5381
    self.share_locks[locking.LEVEL_NODE_RES] = 1
5382
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5383

    
5384
    if self.lock_instances:
5385
      self.needed_locks[locking.LEVEL_INSTANCE] = \
5386
        frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5387

    
5388
  def BuildHooksEnv(self):
5389
    """Build hooks env.
5390

5391
    This runs on the master node.
5392

5393
    """
5394
    return {
5395
      "OP_TARGET": self.op.node_name,
5396
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5397
      "OFFLINE": str(self.op.offline),
5398
      "DRAINED": str(self.op.drained),
5399
      "MASTER_CAPABLE": str(self.op.master_capable),
5400
      "VM_CAPABLE": str(self.op.vm_capable),
5401
      }
5402

    
5403
  def BuildHooksNodes(self):
5404
    """Build hooks nodes.
5405

5406
    """
5407
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5408
    return (nl, nl)
5409

    
5410
  def CheckPrereq(self):
5411
    """Check prerequisites.
5412

5413
    This only checks the instance list against the existing names.
5414

5415
    """
5416
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5417

    
5418
    if self.lock_instances:
5419
      affected_instances = \
5420
        self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5421

    
5422
      # Verify instance locks
5423
      owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5424
      wanted_instances = frozenset(affected_instances.keys())
5425
      if wanted_instances - owned_instances:
5426
        raise errors.OpPrereqError("Instances affected by changing node %s's"
5427
                                   " secondary IP address have changed since"
5428
                                   " locks were acquired, wanted '%s', have"
5429
                                   " '%s'; retry the operation" %
5430
                                   (self.op.node_name,
5431
                                    utils.CommaJoin(wanted_instances),
5432
                                    utils.CommaJoin(owned_instances)),
5433
                                   errors.ECODE_STATE)
5434
    else:
5435
      affected_instances = None
5436

    
5437
    if (self.op.master_candidate is not None or
5438
        self.op.drained is not None or
5439
        self.op.offline is not None):
5440
      # we can't change the master's node flags
5441
      if self.op.node_name == self.cfg.GetMasterNode():
5442
        raise errors.OpPrereqError("The master role can be changed"
5443
                                   " only via master-failover",
5444
                                   errors.ECODE_INVAL)
5445

    
5446
    if self.op.master_candidate and not node.master_capable:
5447
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5448
                                 " it a master candidate" % node.name,
5449
                                 errors.ECODE_STATE)
5450

    
5451
    if self.op.vm_capable == False:
5452
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5453
      if ipri or isec:
5454
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5455
                                   " the vm_capable flag" % node.name,
5456
                                   errors.ECODE_STATE)
5457

    
5458
    if node.master_candidate and self.might_demote and not self.lock_all:
5459
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5460
      # check if after removing the current node, we're missing master
5461
      # candidates
5462
      (mc_remaining, mc_should, _) = \
5463
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5464
      if mc_remaining < mc_should:
5465
        raise errors.OpPrereqError("Not enough master candidates, please"
5466
                                   " pass auto promote option to allow"
5467
                                   " promotion", errors.ECODE_STATE)
5468

    
5469
    self.old_flags = old_flags = (node.master_candidate,
5470
                                  node.drained, node.offline)
5471
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5472
    self.old_role = old_role = self._F2R[old_flags]
5473

    
5474
    # Check for ineffective changes
5475
    for attr in self._FLAGS:
5476
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5477
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5478
        setattr(self.op, attr, None)
5479

    
5480
    # Past this point, any flag change to False means a transition
5481
    # away from the respective state, as only real changes are kept
5482

    
5483
    # TODO: We might query the real power state if it supports OOB
5484
    if _SupportsOob(self.cfg, node):
5485
      if self.op.offline is False and not (node.powered or
5486
                                           self.op.powered == True):
5487
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5488
                                    " offline status can be reset") %
5489
                                   self.op.node_name)
5490
    elif self.op.powered is not None:
5491
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5492
                                  " as it does not support out-of-band"
5493
                                  " handling") % self.op.node_name)
5494

    
5495
    # If we're being deofflined/drained, we'll MC ourself if needed
5496
    if (self.op.drained == False or self.op.offline == False or
5497
        (self.op.master_capable and not node.master_capable)):
5498
      if _DecideSelfPromotion(self):
5499
        self.op.master_candidate = True
5500
        self.LogInfo("Auto-promoting node to master candidate")
5501

    
5502
    # If we're no longer master capable, we'll demote ourselves from MC
5503
    if self.op.master_capable == False and node.master_candidate:
5504
      self.LogInfo("Demoting from master candidate")
5505
      self.op.master_candidate = False
5506

    
5507
    # Compute new role
5508
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5509
    if self.op.master_candidate:
5510
      new_role = self._ROLE_CANDIDATE
5511
    elif self.op.drained:
5512
      new_role = self._ROLE_DRAINED
5513
    elif self.op.offline:
5514
      new_role = self._ROLE_OFFLINE
5515
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5516
      # False is still in new flags, which means we're un-setting (the
5517
      # only) True flag
5518
      new_role = self._ROLE_REGULAR
5519
    else: # no new flags, nothing, keep old role
5520
      new_role = old_role
5521

    
5522
    self.new_role = new_role
5523

    
5524
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5525
      # Trying to transition out of offline status
5526
      # TODO: Use standard RPC runner, but make sure it works when the node is
5527
      # still marked offline
5528
      result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5529
      if result.fail_msg:
5530
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5531
                                   " to report its version: %s" %
5532
                                   (node.name, result.fail_msg),
5533
                                   errors.ECODE_STATE)
5534
      else:
5535
        self.LogWarning("Transitioning node from offline to online state"
5536
                        " without using re-add. Please make sure the node"
5537
                        " is healthy!")
5538

    
5539
    if self.op.secondary_ip:
5540
      # Ok even without locking, because this can't be changed by any LU
5541
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5542
      master_singlehomed = master.secondary_ip == master.primary_ip
5543
      if master_singlehomed and self.op.secondary_ip:
5544
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5545
                                   " homed cluster", errors.ECODE_INVAL)
5546

    
5547
      assert not (frozenset(affected_instances) -
5548
                  self.owned_locks(locking.LEVEL_INSTANCE))
5549

    
5550
      if node.offline:
5551
        if affected_instances:
5552
          raise errors.OpPrereqError("Cannot change secondary IP address:"
5553
                                     " offline node has instances (%s)"
5554
                                     " configured to use it" %
5555
                                     utils.CommaJoin(affected_instances.keys()))
5556
      else:
5557
        # On online nodes, check that no instances are running, and that
5558
        # the node has the new ip and we can reach it.
5559
        for instance in affected_instances.values():
5560
          _CheckInstanceState(self, instance, INSTANCE_DOWN,
5561
                              msg="cannot change secondary ip")
5562

    
5563
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5564
        if master.name != node.name:
5565
          # check reachability from master secondary ip to new secondary ip
5566
          if not netutils.TcpPing(self.op.secondary_ip,
5567
                                  constants.DEFAULT_NODED_PORT,
5568
                                  source=master.secondary_ip):
5569
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5570
                                       " based ping to node daemon port",
5571
                                       errors.ECODE_ENVIRON)
5572

    
5573
    if self.op.ndparams:
5574
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5575
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5576
      self.new_ndparams = new_ndparams
5577

    
5578
  def Exec(self, feedback_fn):
5579
    """Modifies a node.
5580

5581
    """
5582
    node = self.node
5583
    old_role = self.old_role
5584
    new_role = self.new_role
5585

    
5586
    result = []
5587

    
5588
    if self.op.ndparams:
5589
      node.ndparams = self.new_ndparams
5590

    
5591
    if self.op.powered is not None:
5592
      node.powered = self.op.powered
5593

    
5594
    for attr in ["master_capable", "vm_capable"]:
5595
      val = getattr(self.op, attr)
5596
      if val is not None:
5597
        setattr(node, attr, val)
5598
        result.append((attr, str(val)))
5599

    
5600
    if new_role != old_role:
5601
      # Tell the node to demote itself, if no longer MC and not offline
5602
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5603
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5604
        if msg:
5605
          self.LogWarning("Node failed to demote itself: %s", msg)
5606

    
5607
      new_flags = self._R2F[new_role]
5608
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5609
        if of != nf:
5610
          result.append((desc, str(nf)))
5611
      (node.master_candidate, node.drained, node.offline) = new_flags
5612

    
5613
      # we locked all nodes, we adjust the CP before updating this node
5614
      if self.lock_all:
5615
        _AdjustCandidatePool(self, [node.name])
5616

    
5617
    if self.op.secondary_ip:
5618
      node.secondary_ip = self.op.secondary_ip
5619
      result.append(("secondary_ip", self.op.secondary_ip))
5620

    
5621
    # this will trigger configuration file update, if needed
5622
    self.cfg.Update(node, feedback_fn)
5623

    
5624
    # this will trigger job queue propagation or cleanup if the mc
5625
    # flag changed
5626
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5627
      self.context.ReaddNode(node)
5628

    
5629
    return result
5630

    
5631

    
5632
class LUNodePowercycle(NoHooksLU):
5633
  """Powercycles a node.
5634

5635
  """
5636
  REQ_BGL = False
5637

    
5638
  def CheckArguments(self):
5639
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5640
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5641
      raise errors.OpPrereqError("The node is the master and the force"
5642
                                 " parameter was not set",
5643
                                 errors.ECODE_INVAL)
5644

    
5645
  def ExpandNames(self):
5646
    """Locking for PowercycleNode.
5647

5648
    This is a last-resort option and shouldn't block on other
5649
    jobs. Therefore, we grab no locks.
5650

5651
    """
5652
    self.needed_locks = {}
5653

    
5654
  def Exec(self, feedback_fn):
5655
    """Reboots a node.
5656

5657
    """
5658
    result = self.rpc.call_node_powercycle(self.op.node_name,
5659
                                           self.cfg.GetHypervisorType())
5660
    result.Raise("Failed to schedule the reboot")
5661
    return result.payload
5662

    
5663

    
5664
class LUClusterQuery(NoHooksLU):
5665
  """Query cluster configuration.
5666

5667
  """
5668
  REQ_BGL = False
5669

    
5670
  def ExpandNames(self):
5671
    self.needed_locks = {}
5672

    
5673
  def Exec(self, feedback_fn):
5674
    """Return cluster config.
5675

5676
    """
5677
    cluster = self.cfg.GetClusterInfo()
5678
    os_hvp = {}
5679

    
5680
    # Filter just for enabled hypervisors
5681
    for os_name, hv_dict in cluster.os_hvp.items():
5682
      os_hvp[os_name] = {}
5683
      for hv_name, hv_params in hv_dict.items():
5684
        if hv_name in cluster.enabled_hypervisors:
5685
          os_hvp[os_name][hv_name] = hv_params
5686

    
5687
    # Convert ip_family to ip_version
5688
    primary_ip_version = constants.IP4_VERSION
5689
    if cluster.primary_ip_family == netutils.IP6Address.family:
5690
      primary_ip_version = constants.IP6_VERSION
5691

    
5692
    result = {
5693
      "software_version": constants.RELEASE_VERSION,
5694
      "protocol_version": constants.PROTOCOL_VERSION,
5695
      "config_version": constants.CONFIG_VERSION,
5696
      "os_api_version": max(constants.OS_API_VERSIONS),
5697
      "export_version": constants.EXPORT_VERSION,
5698
      "architecture": (platform.architecture()[0], platform.machine()),
5699
      "name": cluster.cluster_name,
5700
      "master": cluster.master_node,
5701
      "default_hypervisor": cluster.enabled_hypervisors[0],
5702
      "enabled_hypervisors": cluster.enabled_hypervisors,
5703
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5704
                        for hypervisor_name in cluster.enabled_hypervisors]),
5705
      "os_hvp": os_hvp,
5706
      "beparams": cluster.beparams,
5707
      "osparams": cluster.osparams,
5708
      "nicparams": cluster.nicparams,
5709
      "ndparams": cluster.ndparams,
5710
      "candidate_pool_size": cluster.candidate_pool_size,
5711
      "master_netdev": cluster.master_netdev,
5712
      "master_netmask": cluster.master_netmask,
5713
      "use_external_mip_script": cluster.use_external_mip_script,
5714
      "volume_group_name": cluster.volume_group_name,
5715
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5716
      "file_storage_dir": cluster.file_storage_dir,
5717
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5718
      "maintain_node_health": cluster.maintain_node_health,
5719
      "ctime": cluster.ctime,
5720
      "mtime": cluster.mtime,
5721
      "uuid": cluster.uuid,
5722
      "tags": list(cluster.GetTags()),
5723
      "uid_pool": cluster.uid_pool,
5724
      "default_iallocator": cluster.default_iallocator,
5725
      "reserved_lvs": cluster.reserved_lvs,
5726
      "primary_ip_version": primary_ip_version,
5727
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5728
      "hidden_os": cluster.hidden_os,
5729
      "blacklisted_os": cluster.blacklisted_os,
5730
      }
5731

    
5732
    return result
5733

    
5734

    
5735
class LUClusterConfigQuery(NoHooksLU):
5736
  """Return configuration values.
5737

5738
  """
5739
  REQ_BGL = False
5740
  _FIELDS_DYNAMIC = utils.FieldSet()
5741
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5742
                                  "watcher_pause", "volume_group_name")
5743

    
5744
  def CheckArguments(self):
5745
    _CheckOutputFields(static=self._FIELDS_STATIC,
5746
                       dynamic=self._FIELDS_DYNAMIC,
5747
                       selected=self.op.output_fields)
5748

    
5749
  def ExpandNames(self):
5750
    self.needed_locks = {}
5751

    
5752
  def Exec(self, feedback_fn):
5753
    """Dump a representation of the cluster config to the standard output.
5754

5755
    """
5756
    values = []
5757
    for field in self.op.output_fields:
5758
      if field == "cluster_name":
5759
        entry = self.cfg.GetClusterName()
5760
      elif field == "master_node":
5761
        entry = self.cfg.GetMasterNode()
5762
      elif field == "drain_flag":
5763
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5764
      elif field == "watcher_pause":
5765
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5766
      elif field == "volume_group_name":
5767
        entry = self.cfg.GetVGName()
5768
      else:
5769
        raise errors.ParameterError(field)
5770
      values.append(entry)
5771
    return values
5772

    
5773

    
5774
class LUInstanceActivateDisks(NoHooksLU):
5775
  """Bring up an instance's disks.
5776

5777
  """
5778
  REQ_BGL = False
5779

    
5780
  def ExpandNames(self):
5781
    self._ExpandAndLockInstance()
5782
    self.needed_locks[locking.LEVEL_NODE] = []
5783
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5784

    
5785
  def DeclareLocks(self, level):
5786
    if level == locking.LEVEL_NODE:
5787
      self._LockInstancesNodes()
5788

    
5789
  def CheckPrereq(self):
5790
    """Check prerequisites.
5791

5792
    This checks that the instance is in the cluster.
5793

5794
    """
5795
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5796
    assert self.instance is not None, \
5797
      "Cannot retrieve locked instance %s" % self.op.instance_name
5798
    _CheckNodeOnline(self, self.instance.primary_node)
5799

    
5800
  def Exec(self, feedback_fn):
5801
    """Activate the disks.
5802

5803
    """
5804
    disks_ok, disks_info = \
5805
              _AssembleInstanceDisks(self, self.instance,
5806
                                     ignore_size=self.op.ignore_size)
5807
    if not disks_ok:
5808
      raise errors.OpExecError("Cannot activate block devices")
5809

    
5810
    return disks_info
5811

    
5812

    
5813
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5814
                           ignore_size=False):
5815
  """Prepare the block devices for an instance.
5816

5817
  This sets up the block devices on all nodes.
5818

5819
  @type lu: L{LogicalUnit}
5820
  @param lu: the logical unit on whose behalf we execute
5821
  @type instance: L{objects.Instance}
5822
  @param instance: the instance for whose disks we assemble
5823
  @type disks: list of L{objects.Disk} or None
5824
  @param disks: which disks to assemble (or all, if None)
5825
  @type ignore_secondaries: boolean
5826
  @param ignore_secondaries: if true, errors on secondary nodes
5827
      won't result in an error return from the function
5828
  @type ignore_size: boolean
5829
  @param ignore_size: if true, the current known size of the disk
5830
      will not be used during the disk activation, useful for cases
5831
      when the size is wrong
5832
  @return: False if the operation failed, otherwise a list of
5833
      (host, instance_visible_name, node_visible_name)
5834
      with the mapping from node devices to instance devices
5835

5836
  """
5837
  device_info = []
5838
  disks_ok = True
5839
  iname = instance.name
5840
  disks = _ExpandCheckDisks(instance, disks)
5841

    
5842
  # With the two passes mechanism we try to reduce the window of
5843
  # opportunity for the race condition of switching DRBD to primary
5844
  # before handshaking occured, but we do not eliminate it
5845

    
5846
  # The proper fix would be to wait (with some limits) until the
5847
  # connection has been made and drbd transitions from WFConnection
5848
  # into any other network-connected state (Connected, SyncTarget,
5849
  # SyncSource, etc.)
5850

    
5851
  # 1st pass, assemble on all nodes in secondary mode
5852
  for idx, inst_disk in enumerate(disks):
5853
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5854
      if ignore_size:
5855
        node_disk = node_disk.Copy()
5856
        node_disk.UnsetSize()
5857
      lu.cfg.SetDiskID(node_disk, node)
5858
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5859
      msg = result.fail_msg
5860
      if msg:
5861
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5862
                           " (is_primary=False, pass=1): %s",
5863
                           inst_disk.iv_name, node, msg)
5864
        if not ignore_secondaries:
5865
          disks_ok = False
5866

    
5867
  # FIXME: race condition on drbd migration to primary
5868

    
5869
  # 2nd pass, do only the primary node
5870
  for idx, inst_disk in enumerate(disks):
5871
    dev_path = None
5872

    
5873
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5874
      if node != instance.primary_node:
5875
        continue
5876
      if ignore_size:
5877
        node_disk = node_disk.Copy()
5878
        node_disk.UnsetSize()
5879
      lu.cfg.SetDiskID(node_disk, node)
5880
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5881
      msg = result.fail_msg
5882
      if msg:
5883
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5884
                           " (is_primary=True, pass=2): %s",
5885
                           inst_disk.iv_name, node, msg)
5886
        disks_ok = False
5887
      else:
5888
        dev_path = result.payload
5889

    
5890
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5891

    
5892
  # leave the disks configured for the primary node
5893
  # this is a workaround that would be fixed better by
5894
  # improving the logical/physical id handling
5895
  for disk in disks:
5896
    lu.cfg.SetDiskID(disk, instance.primary_node)
5897

    
5898
  return disks_ok, device_info
5899

    
5900

    
5901
def _StartInstanceDisks(lu, instance, force):
5902
  """Start the disks of an instance.
5903

5904
  """
5905
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5906
                                           ignore_secondaries=force)
5907
  if not disks_ok:
5908
    _ShutdownInstanceDisks(lu, instance)
5909
    if force is not None and not force:
5910
      lu.proc.LogWarning("", hint="If the message above refers to a"
5911
                         " secondary node,"
5912
                         " you can retry the operation using '--force'.")
5913
    raise errors.OpExecError("Disk consistency error")
5914

    
5915

    
5916
class LUInstanceDeactivateDisks(NoHooksLU):
5917
  """Shutdown an instance's disks.
5918

5919
  """
5920
  REQ_BGL = False
5921

    
5922
  def ExpandNames(self):
5923
    self._ExpandAndLockInstance()
5924
    self.needed_locks[locking.LEVEL_NODE] = []
5925
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5926

    
5927
  def DeclareLocks(self, level):
5928
    if level == locking.LEVEL_NODE:
5929
      self._LockInstancesNodes()
5930

    
5931
  def CheckPrereq(self):
5932
    """Check prerequisites.
5933

5934
    This checks that the instance is in the cluster.
5935

5936
    """
5937
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5938
    assert self.instance is not None, \
5939
      "Cannot retrieve locked instance %s" % self.op.instance_name
5940

    
5941
  def Exec(self, feedback_fn):
5942
    """Deactivate the disks
5943

5944
    """
5945
    instance = self.instance
5946
    if self.op.force:
5947
      _ShutdownInstanceDisks(self, instance)
5948
    else:
5949
      _SafeShutdownInstanceDisks(self, instance)
5950

    
5951

    
5952
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5953
  """Shutdown block devices of an instance.
5954

5955
  This function checks if an instance is running, before calling
5956
  _ShutdownInstanceDisks.
5957

5958
  """
5959
  _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
5960
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5961

    
5962

    
5963
def _ExpandCheckDisks(instance, disks):
5964
  """Return the instance disks selected by the disks list
5965

5966
  @type disks: list of L{objects.Disk} or None
5967
  @param disks: selected disks
5968
  @rtype: list of L{objects.Disk}
5969
  @return: selected instance disks to act on
5970

5971
  """
5972
  if disks is None:
5973
    return instance.disks
5974
  else:
5975
    if not set(disks).issubset(instance.disks):
5976
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5977
                                   " target instance")
5978
    return disks
5979

    
5980

    
5981
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5982
  """Shutdown block devices of an instance.
5983

5984
  This does the shutdown on all nodes of the instance.
5985

5986
  If the ignore_primary is false, errors on the primary node are
5987
  ignored.
5988

5989
  """
5990
  all_result = True
5991
  disks = _ExpandCheckDisks(instance, disks)
5992

    
5993
  for disk in disks:
5994
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5995
      lu.cfg.SetDiskID(top_disk, node)
5996
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5997
      msg = result.fail_msg
5998
      if msg:
5999
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6000
                      disk.iv_name, node, msg)
6001
        if ((node == instance.primary_node and not ignore_primary) or
6002
            (node != instance.primary_node and not result.offline)):
6003
          all_result = False
6004
  return all_result
6005

    
6006

    
6007
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6008
  """Checks if a node has enough free memory.
6009

6010
  This function check if a given node has the needed amount of free
6011
  memory. In case the node has less memory or we cannot get the
6012
  information from the node, this function raise an OpPrereqError
6013
  exception.
6014

6015
  @type lu: C{LogicalUnit}
6016
  @param lu: a logical unit from which we get configuration data
6017
  @type node: C{str}
6018
  @param node: the node to check
6019
  @type reason: C{str}
6020
  @param reason: string to use in the error message
6021
  @type requested: C{int}
6022
  @param requested: the amount of memory in MiB to check for
6023
  @type hypervisor_name: C{str}
6024
  @param hypervisor_name: the hypervisor to ask for memory stats
6025
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6026
      we cannot check the node
6027

6028
  """
6029
  nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6030
  nodeinfo[node].Raise("Can't get data from node %s" % node,
6031
                       prereq=True, ecode=errors.ECODE_ENVIRON)
6032
  (_, _, (hv_info, )) = nodeinfo[node].payload
6033

    
6034
  free_mem = hv_info.get("memory_free", None)
6035
  if not isinstance(free_mem, int):
6036
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6037
                               " was '%s'" % (node, free_mem),
6038
                               errors.ECODE_ENVIRON)
6039
  if requested > free_mem:
6040
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6041
                               " needed %s MiB, available %s MiB" %
6042
                               (node, reason, requested, free_mem),
6043
                               errors.ECODE_NORES)
6044

    
6045

    
6046
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6047
  """Checks if nodes have enough free disk space in the all VGs.
6048

6049
  This function check if all given nodes have the needed amount of
6050
  free disk. In case any node has less disk or we cannot get the
6051
  information from the node, this function raise an OpPrereqError
6052
  exception.
6053

6054
  @type lu: C{LogicalUnit}
6055
  @param lu: a logical unit from which we get configuration data
6056
  @type nodenames: C{list}
6057
  @param nodenames: the list of node names to check
6058
  @type req_sizes: C{dict}
6059
  @param req_sizes: the hash of vg and corresponding amount of disk in
6060
      MiB to check for
6061
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6062
      or we cannot check the node
6063

6064
  """
6065
  for vg, req_size in req_sizes.items():
6066
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6067

    
6068

    
6069
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6070
  """Checks if nodes have enough free disk space in the specified VG.
6071

6072
  This function check if all given nodes have the needed amount of
6073
  free disk. In case any node has less disk or we cannot get the
6074
  information from the node, this function raise an OpPrereqError
6075
  exception.
6076

6077
  @type lu: C{LogicalUnit}
6078
  @param lu: a logical unit from which we get configuration data
6079
  @type nodenames: C{list}
6080
  @param nodenames: the list of node names to check
6081
  @type vg: C{str}
6082
  @param vg: the volume group to check
6083
  @type requested: C{int}
6084
  @param requested: the amount of disk in MiB to check for
6085
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6086
      or we cannot check the node
6087

6088
  """
6089
  nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6090
  for node in nodenames:
6091
    info = nodeinfo[node]
6092
    info.Raise("Cannot get current information from node %s" % node,
6093
               prereq=True, ecode=errors.ECODE_ENVIRON)
6094
    (_, (vg_info, ), _) = info.payload
6095
    vg_free = vg_info.get("vg_free", None)
6096
    if not isinstance(vg_free, int):
6097
      raise errors.OpPrereqError("Can't compute free disk space on node"
6098
                                 " %s for vg %s, result was '%s'" %
6099
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
6100
    if requested > vg_free:
6101
      raise errors.OpPrereqError("Not enough disk space on target node %s"
6102
                                 " vg %s: required %d MiB, available %d MiB" %
6103
                                 (node, vg, requested, vg_free),
6104
                                 errors.ECODE_NORES)
6105

    
6106

    
6107
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6108
  """Checks if nodes have enough physical CPUs
6109

6110
  This function checks if all given nodes have the needed number of
6111
  physical CPUs. In case any node has less CPUs or we cannot get the
6112
  information from the node, this function raises an OpPrereqError
6113
  exception.
6114

6115
  @type lu: C{LogicalUnit}
6116
  @param lu: a logical unit from which we get configuration data
6117
  @type nodenames: C{list}
6118
  @param nodenames: the list of node names to check
6119
  @type requested: C{int}
6120
  @param requested: the minimum acceptable number of physical CPUs
6121
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6122
      or we cannot check the node
6123

6124
  """
6125
  nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6126
  for node in nodenames:
6127
    info = nodeinfo[node]
6128
    info.Raise("Cannot get current information from node %s" % node,
6129
               prereq=True, ecode=errors.ECODE_ENVIRON)
6130
    (_, _, (hv_info, )) = info.payload
6131
    num_cpus = hv_info.get("cpu_total", None)
6132
    if not isinstance(num_cpus, int):
6133
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6134
                                 " on node %s, result was '%s'" %
6135
                                 (node, num_cpus), errors.ECODE_ENVIRON)
6136
    if requested > num_cpus:
6137
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6138
                                 "required" % (node, num_cpus, requested),
6139
                                 errors.ECODE_NORES)
6140

    
6141

    
6142
class LUInstanceStartup(LogicalUnit):
6143
  """Starts an instance.
6144

6145
  """
6146
  HPATH = "instance-start"
6147
  HTYPE = constants.HTYPE_INSTANCE
6148
  REQ_BGL = False
6149

    
6150
  def CheckArguments(self):
6151
    # extra beparams
6152
    if self.op.beparams:
6153
      # fill the beparams dict
6154
      objects.UpgradeBeParams(self.op.beparams)
6155
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6156

    
6157
  def ExpandNames(self):
6158
    self._ExpandAndLockInstance()
6159

    
6160
  def BuildHooksEnv(self):
6161
    """Build hooks env.
6162

6163
    This runs on master, primary and secondary nodes of the instance.
6164

6165
    """
6166
    env = {
6167
      "FORCE": self.op.force,
6168
      }
6169

    
6170
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6171

    
6172
    return env
6173

    
6174
  def BuildHooksNodes(self):
6175
    """Build hooks nodes.
6176

6177
    """
6178
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6179
    return (nl, nl)
6180

    
6181
  def CheckPrereq(self):
6182
    """Check prerequisites.
6183

6184
    This checks that the instance is in the cluster.
6185

6186
    """
6187
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6188
    assert self.instance is not None, \
6189
      "Cannot retrieve locked instance %s" % self.op.instance_name
6190

    
6191
    # extra hvparams
6192
    if self.op.hvparams:
6193
      # check hypervisor parameter syntax (locally)
6194
      cluster = self.cfg.GetClusterInfo()
6195
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6196
      filled_hvp = cluster.FillHV(instance)
6197
      filled_hvp.update(self.op.hvparams)
6198
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6199
      hv_type.CheckParameterSyntax(filled_hvp)
6200
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6201

    
6202
    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6203

    
6204
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6205

    
6206
    if self.primary_offline and self.op.ignore_offline_nodes:
6207
      self.proc.LogWarning("Ignoring offline primary node")
6208

    
6209
      if self.op.hvparams or self.op.beparams:
6210
        self.proc.LogWarning("Overridden parameters are ignored")
6211
    else:
6212
      _CheckNodeOnline(self, instance.primary_node)
6213

    
6214
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6215

    
6216
      # check bridges existence
6217
      _CheckInstanceBridgesExist(self, instance)
6218

    
6219
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6220
                                                instance.name,
6221
                                                instance.hypervisor)
6222
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6223
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6224
      if not remote_info.payload: # not running already
6225
        _CheckNodeFreeMemory(self, instance.primary_node,
6226
                             "starting instance %s" % instance.name,
6227
                             bep[constants.BE_MAXMEM], instance.hypervisor)
6228

    
6229
  def Exec(self, feedback_fn):
6230
    """Start the instance.
6231

6232
    """
6233
    instance = self.instance
6234
    force = self.op.force
6235

    
6236
    if not self.op.no_remember:
6237
      self.cfg.MarkInstanceUp(instance.name)
6238

    
6239
    if self.primary_offline:
6240
      assert self.op.ignore_offline_nodes
6241
      self.proc.LogInfo("Primary node offline, marked instance as started")
6242
    else:
6243
      node_current = instance.primary_node
6244

    
6245
      _StartInstanceDisks(self, instance, force)
6246

    
6247
      result = \
6248
        self.rpc.call_instance_start(node_current,
6249
                                     (instance, self.op.hvparams,
6250
                                      self.op.beparams),
6251
                                     self.op.startup_paused)
6252
      msg = result.fail_msg
6253
      if msg:
6254
        _ShutdownInstanceDisks(self, instance)
6255
        raise errors.OpExecError("Could not start instance: %s" % msg)
6256

    
6257

    
6258
class LUInstanceReboot(LogicalUnit):
6259
  """Reboot an instance.
6260

6261
  """
6262
  HPATH = "instance-reboot"
6263
  HTYPE = constants.HTYPE_INSTANCE
6264
  REQ_BGL = False
6265

    
6266
  def ExpandNames(self):
6267
    self._ExpandAndLockInstance()
6268

    
6269
  def BuildHooksEnv(self):
6270
    """Build hooks env.
6271

6272
    This runs on master, primary and secondary nodes of the instance.
6273

6274
    """
6275
    env = {
6276
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6277
      "REBOOT_TYPE": self.op.reboot_type,
6278
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6279
      }
6280

    
6281
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6282

    
6283
    return env
6284

    
6285
  def BuildHooksNodes(self):
6286
    """Build hooks nodes.
6287

6288
    """
6289
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6290
    return (nl, nl)
6291

    
6292
  def CheckPrereq(self):
6293
    """Check prerequisites.
6294

6295
    This checks that the instance is in the cluster.
6296

6297
    """
6298
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6299
    assert self.instance is not None, \
6300
      "Cannot retrieve locked instance %s" % self.op.instance_name
6301
    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6302
    _CheckNodeOnline(self, instance.primary_node)
6303

    
6304
    # check bridges existence
6305
    _CheckInstanceBridgesExist(self, instance)
6306

    
6307
  def Exec(self, feedback_fn):
6308
    """Reboot the instance.
6309

6310
    """
6311
    instance = self.instance
6312
    ignore_secondaries = self.op.ignore_secondaries
6313
    reboot_type = self.op.reboot_type
6314

    
6315
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6316
                                              instance.name,
6317
                                              instance.hypervisor)
6318
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6319
    instance_running = bool(remote_info.payload)
6320

    
6321
    node_current = instance.primary_node
6322

    
6323
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6324
                                            constants.INSTANCE_REBOOT_HARD]:
6325
      for disk in instance.disks:
6326
        self.cfg.SetDiskID(disk, node_current)
6327
      result = self.rpc.call_instance_reboot(node_current, instance,
6328
                                             reboot_type,
6329
                                             self.op.shutdown_timeout)
6330
      result.Raise("Could not reboot instance")
6331
    else:
6332
      if instance_running:
6333
        result = self.rpc.call_instance_shutdown(node_current, instance,
6334
                                                 self.op.shutdown_timeout)
6335
        result.Raise("Could not shutdown instance for full reboot")
6336
        _ShutdownInstanceDisks(self, instance)
6337
      else:
6338
        self.LogInfo("Instance %s was already stopped, starting now",
6339
                     instance.name)
6340
      _StartInstanceDisks(self, instance, ignore_secondaries)
6341
      result = self.rpc.call_instance_start(node_current,
6342
                                            (instance, None, None), False)
6343
      msg = result.fail_msg
6344
      if msg:
6345
        _ShutdownInstanceDisks(self, instance)
6346
        raise errors.OpExecError("Could not start instance for"
6347
                                 " full reboot: %s" % msg)
6348

    
6349
    self.cfg.MarkInstanceUp(instance.name)
6350

    
6351

    
6352
class LUInstanceShutdown(LogicalUnit):
6353
  """Shutdown an instance.
6354

6355
  """
6356
  HPATH = "instance-stop"
6357
  HTYPE = constants.HTYPE_INSTANCE
6358
  REQ_BGL = False
6359

    
6360
  def ExpandNames(self):
6361
    self._ExpandAndLockInstance()
6362

    
6363
  def BuildHooksEnv(self):
6364
    """Build hooks env.
6365

6366
    This runs on master, primary and secondary nodes of the instance.
6367

6368
    """
6369
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6370
    env["TIMEOUT"] = self.op.timeout
6371
    return env
6372

    
6373
  def BuildHooksNodes(self):
6374
    """Build hooks nodes.
6375

6376
    """
6377
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6378
    return (nl, nl)
6379

    
6380
  def CheckPrereq(self):
6381
    """Check prerequisites.
6382

6383
    This checks that the instance is in the cluster.
6384

6385
    """
6386
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6387
    assert self.instance is not None, \
6388
      "Cannot retrieve locked instance %s" % self.op.instance_name
6389

    
6390
    _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6391

    
6392
    self.primary_offline = \
6393
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6394

    
6395
    if self.primary_offline and self.op.ignore_offline_nodes:
6396
      self.proc.LogWarning("Ignoring offline primary node")
6397
    else:
6398
      _CheckNodeOnline(self, self.instance.primary_node)
6399

    
6400
  def Exec(self, feedback_fn):
6401
    """Shutdown the instance.
6402

6403
    """
6404
    instance = self.instance
6405
    node_current = instance.primary_node
6406
    timeout = self.op.timeout
6407

    
6408
    if not self.op.no_remember:
6409
      self.cfg.MarkInstanceDown(instance.name)
6410

    
6411
    if self.primary_offline:
6412
      assert self.op.ignore_offline_nodes
6413
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6414
    else:
6415
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6416
      msg = result.fail_msg
6417
      if msg:
6418
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6419

    
6420
      _ShutdownInstanceDisks(self, instance)
6421

    
6422

    
6423
class LUInstanceReinstall(LogicalUnit):
6424
  """Reinstall an instance.
6425

6426
  """
6427
  HPATH = "instance-reinstall"
6428
  HTYPE = constants.HTYPE_INSTANCE
6429
  REQ_BGL = False
6430

    
6431
  def ExpandNames(self):
6432
    self._ExpandAndLockInstance()
6433

    
6434
  def BuildHooksEnv(self):
6435
    """Build hooks env.
6436

6437
    This runs on master, primary and secondary nodes of the instance.
6438

6439
    """
6440
    return _BuildInstanceHookEnvByObject(self, self.instance)
6441

    
6442
  def BuildHooksNodes(self):
6443
    """Build hooks nodes.
6444

6445
    """
6446
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6447
    return (nl, nl)
6448

    
6449
  def CheckPrereq(self):
6450
    """Check prerequisites.
6451

6452
    This checks that the instance is in the cluster and is not running.
6453

6454
    """
6455
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6456
    assert instance is not None, \
6457
      "Cannot retrieve locked instance %s" % self.op.instance_name
6458
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6459
                     " offline, cannot reinstall")
6460
    for node in instance.secondary_nodes:
6461
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6462
                       " cannot reinstall")
6463

    
6464
    if instance.disk_template == constants.DT_DISKLESS:
6465
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6466
                                 self.op.instance_name,
6467
                                 errors.ECODE_INVAL)
6468
    _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6469

    
6470
    if self.op.os_type is not None:
6471
      # OS verification
6472
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6473
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6474
      instance_os = self.op.os_type
6475
    else:
6476
      instance_os = instance.os
6477

    
6478
    nodelist = list(instance.all_nodes)
6479

    
6480
    if self.op.osparams:
6481
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6482
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6483
      self.os_inst = i_osdict # the new dict (without defaults)
6484
    else:
6485
      self.os_inst = None
6486

    
6487
    self.instance = instance
6488

    
6489
  def Exec(self, feedback_fn):
6490
    """Reinstall the instance.
6491

6492
    """
6493
    inst = self.instance
6494

    
6495
    if self.op.os_type is not None:
6496
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6497
      inst.os = self.op.os_type
6498
      # Write to configuration
6499
      self.cfg.Update(inst, feedback_fn)
6500

    
6501
    _StartInstanceDisks(self, inst, None)
6502
    try:
6503
      feedback_fn("Running the instance OS create scripts...")
6504
      # FIXME: pass debug option from opcode to backend
6505
      result = self.rpc.call_instance_os_add(inst.primary_node,
6506
                                             (inst, self.os_inst), True,
6507
                                             self.op.debug_level)
6508
      result.Raise("Could not install OS for instance %s on node %s" %
6509
                   (inst.name, inst.primary_node))
6510
    finally:
6511
      _ShutdownInstanceDisks(self, inst)
6512

    
6513

    
6514
class LUInstanceRecreateDisks(LogicalUnit):
6515
  """Recreate an instance's missing disks.
6516

6517
  """
6518
  HPATH = "instance-recreate-disks"
6519
  HTYPE = constants.HTYPE_INSTANCE
6520
  REQ_BGL = False
6521

    
6522
  def CheckArguments(self):
6523
    # normalise the disk list
6524
    self.op.disks = sorted(frozenset(self.op.disks))
6525

    
6526
  def ExpandNames(self):
6527
    self._ExpandAndLockInstance()
6528
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6529
    if self.op.nodes:
6530
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6531
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6532
    else:
6533
      self.needed_locks[locking.LEVEL_NODE] = []
6534

    
6535
  def DeclareLocks(self, level):
6536
    if level == locking.LEVEL_NODE:
6537
      # if we replace the nodes, we only need to lock the old primary,
6538
      # otherwise we need to lock all nodes for disk re-creation
6539
      primary_only = bool(self.op.nodes)
6540
      self._LockInstancesNodes(primary_only=primary_only)
6541
    elif level == locking.LEVEL_NODE_RES:
6542
      # Copy node locks
6543
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6544
        self.needed_locks[locking.LEVEL_NODE][:]
6545

    
6546
  def BuildHooksEnv(self):
6547
    """Build hooks env.
6548

6549
    This runs on master, primary and secondary nodes of the instance.
6550

6551
    """
6552
    return _BuildInstanceHookEnvByObject(self, self.instance)
6553

    
6554
  def BuildHooksNodes(self):
6555
    """Build hooks nodes.
6556

6557
    """
6558
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6559
    return (nl, nl)
6560

    
6561
  def CheckPrereq(self):
6562
    """Check prerequisites.
6563

6564
    This checks that the instance is in the cluster and is not running.
6565

6566
    """
6567
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6568
    assert instance is not None, \
6569
      "Cannot retrieve locked instance %s" % self.op.instance_name
6570
    if self.op.nodes:
6571
      if len(self.op.nodes) != len(instance.all_nodes):
6572
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6573
                                   " %d replacement nodes were specified" %
6574
                                   (instance.name, len(instance.all_nodes),
6575
                                    len(self.op.nodes)),
6576
                                   errors.ECODE_INVAL)
6577
      assert instance.disk_template != constants.DT_DRBD8 or \
6578
          len(self.op.nodes) == 2
6579
      assert instance.disk_template != constants.DT_PLAIN or \
6580
          len(self.op.nodes) == 1
6581
      primary_node = self.op.nodes[0]
6582
    else:
6583
      primary_node = instance.primary_node
6584
    _CheckNodeOnline(self, primary_node)
6585

    
6586
    if instance.disk_template == constants.DT_DISKLESS:
6587
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6588
                                 self.op.instance_name, errors.ECODE_INVAL)
6589
    # if we replace nodes *and* the old primary is offline, we don't
6590
    # check
6591
    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6592
    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6593
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6594
    if not (self.op.nodes and old_pnode.offline):
6595
      _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6596
                          msg="cannot recreate disks")
6597

    
6598
    if not self.op.disks:
6599
      self.op.disks = range(len(instance.disks))
6600
    else:
6601
      for idx in self.op.disks:
6602
        if idx >= len(instance.disks):
6603
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6604
                                     errors.ECODE_INVAL)
6605
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6606
      raise errors.OpPrereqError("Can't recreate disks partially and"
6607
                                 " change the nodes at the same time",
6608
                                 errors.ECODE_INVAL)
6609
    self.instance = instance
6610

    
6611
  def Exec(self, feedback_fn):
6612
    """Recreate the disks.
6613

6614
    """
6615
    instance = self.instance
6616

    
6617
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6618
            self.owned_locks(locking.LEVEL_NODE_RES))
6619

    
6620
    to_skip = []
6621
    mods = [] # keeps track of needed logical_id changes
6622

    
6623
    for idx, disk in enumerate(instance.disks):
6624
      if idx not in self.op.disks: # disk idx has not been passed in
6625
        to_skip.append(idx)
6626
        continue
6627
      # update secondaries for disks, if needed
6628
      if self.op.nodes:
6629
        if disk.dev_type == constants.LD_DRBD8:
6630
          # need to update the nodes and minors
6631
          assert len(self.op.nodes) == 2
6632
          assert len(disk.logical_id) == 6 # otherwise disk internals
6633
                                           # have changed
6634
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6635
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6636
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6637
                    new_minors[0], new_minors[1], old_secret)
6638
          assert len(disk.logical_id) == len(new_id)
6639
          mods.append((idx, new_id))
6640

    
6641
    # now that we have passed all asserts above, we can apply the mods
6642
    # in a single run (to avoid partial changes)
6643
    for idx, new_id in mods:
6644
      instance.disks[idx].logical_id = new_id
6645

    
6646
    # change primary node, if needed
6647
    if self.op.nodes:
6648
      instance.primary_node = self.op.nodes[0]
6649
      self.LogWarning("Changing the instance's nodes, you will have to"
6650
                      " remove any disks left on the older nodes manually")
6651

    
6652
    if self.op.nodes:
6653
      self.cfg.Update(instance, feedback_fn)
6654

    
6655
    _CreateDisks(self, instance, to_skip=to_skip)
6656

    
6657

    
6658
class LUInstanceRename(LogicalUnit):
6659
  """Rename an instance.
6660

6661
  """
6662
  HPATH = "instance-rename"
6663
  HTYPE = constants.HTYPE_INSTANCE
6664

    
6665
  def CheckArguments(self):
6666
    """Check arguments.
6667

6668
    """
6669
    if self.op.ip_check and not self.op.name_check:
6670
      # TODO: make the ip check more flexible and not depend on the name check
6671
      raise errors.OpPrereqError("IP address check requires a name check",
6672
                                 errors.ECODE_INVAL)
6673

    
6674
  def BuildHooksEnv(self):
6675
    """Build hooks env.
6676

6677
    This runs on master, primary and secondary nodes of the instance.
6678

6679
    """
6680
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6681
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6682
    return env
6683

    
6684
  def BuildHooksNodes(self):
6685
    """Build hooks nodes.
6686

6687
    """
6688
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6689
    return (nl, nl)
6690

    
6691
  def CheckPrereq(self):
6692
    """Check prerequisites.
6693

6694
    This checks that the instance is in the cluster and is not running.
6695

6696
    """
6697
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6698
                                                self.op.instance_name)
6699
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6700
    assert instance is not None
6701
    _CheckNodeOnline(self, instance.primary_node)
6702
    _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6703
                        msg="cannot rename")
6704
    self.instance = instance
6705

    
6706
    new_name = self.op.new_name
6707
    if self.op.name_check:
6708
      hostname = netutils.GetHostname(name=new_name)
6709
      if hostname.name != new_name:
6710
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6711
                     hostname.name)
6712
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6713
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6714
                                    " same as given hostname '%s'") %
6715
                                    (hostname.name, self.op.new_name),
6716
                                    errors.ECODE_INVAL)
6717
      new_name = self.op.new_name = hostname.name
6718
      if (self.op.ip_check and
6719
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6720
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6721
                                   (hostname.ip, new_name),
6722
                                   errors.ECODE_NOTUNIQUE)
6723

    
6724
    instance_list = self.cfg.GetInstanceList()
6725
    if new_name in instance_list and new_name != instance.name:
6726
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6727
                                 new_name, errors.ECODE_EXISTS)
6728

    
6729
  def Exec(self, feedback_fn):
6730
    """Rename the instance.
6731

6732
    """
6733
    inst = self.instance
6734
    old_name = inst.name
6735

    
6736
    rename_file_storage = False
6737
    if (inst.disk_template in constants.DTS_FILEBASED and
6738
        self.op.new_name != inst.name):
6739
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6740
      rename_file_storage = True
6741

    
6742
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6743
    # Change the instance lock. This is definitely safe while we hold the BGL.
6744
    # Otherwise the new lock would have to be added in acquired mode.
6745
    assert self.REQ_BGL
6746
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6747
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6748

    
6749
    # re-read the instance from the configuration after rename
6750
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6751

    
6752
    if rename_file_storage:
6753
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6754
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6755
                                                     old_file_storage_dir,
6756
                                                     new_file_storage_dir)
6757
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6758
                   " (but the instance has been renamed in Ganeti)" %
6759
                   (inst.primary_node, old_file_storage_dir,
6760
                    new_file_storage_dir))
6761

    
6762
    _StartInstanceDisks(self, inst, None)
6763
    try:
6764
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6765
                                                 old_name, self.op.debug_level)
6766
      msg = result.fail_msg
6767
      if msg:
6768
        msg = ("Could not run OS rename script for instance %s on node %s"
6769
               " (but the instance has been renamed in Ganeti): %s" %
6770
               (inst.name, inst.primary_node, msg))
6771
        self.proc.LogWarning(msg)
6772
    finally:
6773
      _ShutdownInstanceDisks(self, inst)
6774

    
6775
    return inst.name
6776

    
6777

    
6778
class LUInstanceRemove(LogicalUnit):
6779
  """Remove an instance.
6780

6781
  """
6782
  HPATH = "instance-remove"
6783
  HTYPE = constants.HTYPE_INSTANCE
6784
  REQ_BGL = False
6785

    
6786
  def ExpandNames(self):
6787
    self._ExpandAndLockInstance()
6788
    self.needed_locks[locking.LEVEL_NODE] = []
6789
    self.needed_locks[locking.LEVEL_NODE_RES] = []
6790
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6791

    
6792
  def DeclareLocks(self, level):
6793
    if level == locking.LEVEL_NODE:
6794
      self._LockInstancesNodes()
6795
    elif level == locking.LEVEL_NODE_RES:
6796
      # Copy node locks
6797
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6798
        self.needed_locks[locking.LEVEL_NODE][:]
6799

    
6800
  def BuildHooksEnv(self):
6801
    """Build hooks env.
6802

6803
    This runs on master, primary and secondary nodes of the instance.
6804

6805
    """
6806
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6807
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6808
    return env
6809

    
6810
  def BuildHooksNodes(self):
6811
    """Build hooks nodes.
6812

6813
    """
6814
    nl = [self.cfg.GetMasterNode()]
6815
    nl_post = list(self.instance.all_nodes) + nl
6816
    return (nl, nl_post)
6817

    
6818
  def CheckPrereq(self):
6819
    """Check prerequisites.
6820

6821
    This checks that the instance is in the cluster.
6822

6823
    """
6824
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6825
    assert self.instance is not None, \
6826
      "Cannot retrieve locked instance %s" % self.op.instance_name
6827

    
6828
  def Exec(self, feedback_fn):
6829
    """Remove the instance.
6830

6831
    """
6832
    instance = self.instance
6833
    logging.info("Shutting down instance %s on node %s",
6834
                 instance.name, instance.primary_node)
6835

    
6836
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6837
                                             self.op.shutdown_timeout)
6838
    msg = result.fail_msg
6839
    if msg:
6840
      if self.op.ignore_failures:
6841
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6842
      else:
6843
        raise errors.OpExecError("Could not shutdown instance %s on"
6844
                                 " node %s: %s" %
6845
                                 (instance.name, instance.primary_node, msg))
6846

    
6847
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6848
            self.owned_locks(locking.LEVEL_NODE_RES))
6849
    assert not (set(instance.all_nodes) -
6850
                self.owned_locks(locking.LEVEL_NODE)), \
6851
      "Not owning correct locks"
6852

    
6853
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6854

    
6855

    
6856
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6857
  """Utility function to remove an instance.
6858

6859
  """
6860
  logging.info("Removing block devices for instance %s", instance.name)
6861

    
6862
  if not _RemoveDisks(lu, instance):
6863
    if not ignore_failures:
6864
      raise errors.OpExecError("Can't remove instance's disks")
6865
    feedback_fn("Warning: can't remove instance's disks")
6866

    
6867
  logging.info("Removing instance %s out of cluster config", instance.name)
6868

    
6869
  lu.cfg.RemoveInstance(instance.name)
6870

    
6871
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6872
    "Instance lock removal conflict"
6873

    
6874
  # Remove lock for the instance
6875
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6876

    
6877

    
6878
class LUInstanceQuery(NoHooksLU):
6879
  """Logical unit for querying instances.
6880

6881
  """
6882
  # pylint: disable=W0142
6883
  REQ_BGL = False
6884

    
6885
  def CheckArguments(self):
6886
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6887
                             self.op.output_fields, self.op.use_locking)
6888

    
6889
  def ExpandNames(self):
6890
    self.iq.ExpandNames(self)
6891

    
6892
  def DeclareLocks(self, level):
6893
    self.iq.DeclareLocks(self, level)
6894

    
6895
  def Exec(self, feedback_fn):
6896
    return self.iq.OldStyleQuery(self)
6897

    
6898

    
6899
class LUInstanceFailover(LogicalUnit):
6900
  """Failover an instance.
6901

6902
  """
6903
  HPATH = "instance-failover"
6904
  HTYPE = constants.HTYPE_INSTANCE
6905
  REQ_BGL = False
6906

    
6907
  def CheckArguments(self):
6908
    """Check the arguments.
6909

6910
    """
6911
    self.iallocator = getattr(self.op, "iallocator", None)
6912
    self.target_node = getattr(self.op, "target_node", None)
6913

    
6914
  def ExpandNames(self):
6915
    self._ExpandAndLockInstance()
6916

    
6917
    if self.op.target_node is not None:
6918
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6919

    
6920
    self.needed_locks[locking.LEVEL_NODE] = []
6921
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6922

    
6923
    ignore_consistency = self.op.ignore_consistency
6924
    shutdown_timeout = self.op.shutdown_timeout
6925
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6926
                                       cleanup=False,
6927
                                       failover=True,
6928
                                       ignore_consistency=ignore_consistency,
6929
                                       shutdown_timeout=shutdown_timeout)
6930
    self.tasklets = [self._migrater]
6931

    
6932
  def DeclareLocks(self, level):
6933
    if level == locking.LEVEL_NODE:
6934
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6935
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6936
        if self.op.target_node is None:
6937
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6938
        else:
6939
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6940
                                                   self.op.target_node]
6941
        del self.recalculate_locks[locking.LEVEL_NODE]
6942
      else:
6943
        self._LockInstancesNodes()
6944

    
6945
  def BuildHooksEnv(self):
6946
    """Build hooks env.
6947

6948
    This runs on master, primary and secondary nodes of the instance.
6949

6950
    """
6951
    instance = self._migrater.instance
6952
    source_node = instance.primary_node
6953
    target_node = self.op.target_node
6954
    env = {
6955
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6956
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6957
      "OLD_PRIMARY": source_node,
6958
      "NEW_PRIMARY": target_node,
6959
      }
6960

    
6961
    if instance.disk_template in constants.DTS_INT_MIRROR:
6962
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6963
      env["NEW_SECONDARY"] = source_node
6964
    else:
6965
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6966

    
6967
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6968

    
6969
    return env
6970

    
6971
  def BuildHooksNodes(self):
6972
    """Build hooks nodes.
6973

6974
    """
6975
    instance = self._migrater.instance
6976
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6977
    return (nl, nl + [instance.primary_node])
6978

    
6979

    
6980
class LUInstanceMigrate(LogicalUnit):
6981
  """Migrate an instance.
6982

6983
  This is migration without shutting down, compared to the failover,
6984
  which is done with shutdown.
6985

6986
  """
6987
  HPATH = "instance-migrate"
6988
  HTYPE = constants.HTYPE_INSTANCE
6989
  REQ_BGL = False
6990

    
6991
  def ExpandNames(self):
6992
    self._ExpandAndLockInstance()
6993

    
6994
    if self.op.target_node is not None:
6995
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6996

    
6997
    self.needed_locks[locking.LEVEL_NODE] = []
6998
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6999

    
7000
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
7001
                                       cleanup=self.op.cleanup,
7002
                                       failover=False,
7003
                                       fallback=self.op.allow_failover)
7004
    self.tasklets = [self._migrater]
7005

    
7006
  def DeclareLocks(self, level):
7007
    if level == locking.LEVEL_NODE:
7008
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7009
      if instance.disk_template in constants.DTS_EXT_MIRROR:
7010
        if self.op.target_node is None:
7011
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7012
        else:
7013
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7014
                                                   self.op.target_node]
7015
        del self.recalculate_locks[locking.LEVEL_NODE]
7016
      else:
7017
        self._LockInstancesNodes()
7018

    
7019
  def BuildHooksEnv(self):
7020
    """Build hooks env.
7021

7022
    This runs on master, primary and secondary nodes of the instance.
7023

7024
    """
7025
    instance = self._migrater.instance
7026
    source_node = instance.primary_node
7027
    target_node = self.op.target_node
7028
    env = _BuildInstanceHookEnvByObject(self, instance)
7029
    env.update({
7030
      "MIGRATE_LIVE": self._migrater.live,
7031
      "MIGRATE_CLEANUP": self.op.cleanup,
7032
      "OLD_PRIMARY": source_node,
7033
      "NEW_PRIMARY": target_node,
7034
      })
7035

    
7036
    if instance.disk_template in constants.DTS_INT_MIRROR:
7037
      env["OLD_SECONDARY"] = target_node
7038
      env["NEW_SECONDARY"] = source_node
7039
    else:
7040
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7041

    
7042
    return env
7043

    
7044
  def BuildHooksNodes(self):
7045
    """Build hooks nodes.
7046

7047
    """
7048
    instance = self._migrater.instance
7049
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7050
    return (nl, nl + [instance.primary_node])
7051

    
7052

    
7053
class LUInstanceMove(LogicalUnit):
7054
  """Move an instance by data-copying.
7055

7056
  """
7057
  HPATH = "instance-move"
7058
  HTYPE = constants.HTYPE_INSTANCE
7059
  REQ_BGL = False
7060

    
7061
  def ExpandNames(self):
7062
    self._ExpandAndLockInstance()
7063
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7064
    self.op.target_node = target_node
7065
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
7066
    self.needed_locks[locking.LEVEL_NODE_RES] = []
7067
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7068

    
7069
  def DeclareLocks(self, level):
7070
    if level == locking.LEVEL_NODE:
7071
      self._LockInstancesNodes(primary_only=True)
7072
    elif level == locking.LEVEL_NODE_RES:
7073
      # Copy node locks
7074
      self.needed_locks[locking.LEVEL_NODE_RES] = \
7075
        self.needed_locks[locking.LEVEL_NODE][:]
7076

    
7077
  def BuildHooksEnv(self):
7078
    """Build hooks env.
7079

7080
    This runs on master, primary and secondary nodes of the instance.
7081

7082
    """
7083
    env = {
7084
      "TARGET_NODE": self.op.target_node,
7085
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7086
      }
7087
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7088
    return env
7089

    
7090
  def BuildHooksNodes(self):
7091
    """Build hooks nodes.
7092

7093
    """
7094
    nl = [
7095
      self.cfg.GetMasterNode(),
7096
      self.instance.primary_node,
7097
      self.op.target_node,
7098
      ]
7099
    return (nl, nl)
7100

    
7101
  def CheckPrereq(self):
7102
    """Check prerequisites.
7103

7104
    This checks that the instance is in the cluster.
7105

7106
    """
7107
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7108
    assert self.instance is not None, \
7109
      "Cannot retrieve locked instance %s" % self.op.instance_name
7110

    
7111
    node = self.cfg.GetNodeInfo(self.op.target_node)
7112
    assert node is not None, \
7113
      "Cannot retrieve locked node %s" % self.op.target_node
7114

    
7115
    self.target_node = target_node = node.name
7116

    
7117
    if target_node == instance.primary_node:
7118
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
7119
                                 (instance.name, target_node),
7120
                                 errors.ECODE_STATE)
7121

    
7122
    bep = self.cfg.GetClusterInfo().FillBE(instance)
7123

    
7124
    for idx, dsk in enumerate(instance.disks):
7125
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7126
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7127
                                   " cannot copy" % idx, errors.ECODE_STATE)
7128

    
7129
    _CheckNodeOnline(self, target_node)
7130
    _CheckNodeNotDrained(self, target_node)
7131
    _CheckNodeVmCapable(self, target_node)
7132

    
7133
    if instance.admin_state == constants.ADMINST_UP:
7134
      # check memory requirements on the secondary node
7135
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7136
                           instance.name, bep[constants.BE_MAXMEM],
7137
                           instance.hypervisor)
7138
    else:
7139
      self.LogInfo("Not checking memory on the secondary node as"
7140
                   " instance will not be started")
7141

    
7142
    # check bridge existance
7143
    _CheckInstanceBridgesExist(self, instance, node=target_node)
7144

    
7145
  def Exec(self, feedback_fn):
7146
    """Move an instance.
7147

7148
    The move is done by shutting it down on its present node, copying
7149
    the data over (slow) and starting it on the new node.
7150

7151
    """
7152
    instance = self.instance
7153

    
7154
    source_node = instance.primary_node
7155
    target_node = self.target_node
7156

    
7157
    self.LogInfo("Shutting down instance %s on source node %s",
7158
                 instance.name, source_node)
7159

    
7160
    assert (self.owned_locks(locking.LEVEL_NODE) ==
7161
            self.owned_locks(locking.LEVEL_NODE_RES))
7162

    
7163
    result = self.rpc.call_instance_shutdown(source_node, instance,
7164
                                             self.op.shutdown_timeout)
7165
    msg = result.fail_msg
7166
    if msg:
7167
      if self.op.ignore_consistency:
7168
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
7169
                             " Proceeding anyway. Please make sure node"
7170
                             " %s is down. Error details: %s",
7171
                             instance.name, source_node, source_node, msg)
7172
      else:
7173
        raise errors.OpExecError("Could not shutdown instance %s on"
7174
                                 " node %s: %s" %
7175
                                 (instance.name, source_node, msg))
7176

    
7177
    # create the target disks
7178
    try:
7179
      _CreateDisks(self, instance, target_node=target_node)
7180
    except errors.OpExecError:
7181
      self.LogWarning("Device creation failed, reverting...")
7182
      try:
7183
        _RemoveDisks(self, instance, target_node=target_node)
7184
      finally:
7185
        self.cfg.ReleaseDRBDMinors(instance.name)
7186
        raise
7187

    
7188
    cluster_name = self.cfg.GetClusterInfo().cluster_name
7189

    
7190
    errs = []
7191
    # activate, get path, copy the data over
7192
    for idx, disk in enumerate(instance.disks):
7193
      self.LogInfo("Copying data for disk %d", idx)
7194
      result = self.rpc.call_blockdev_assemble(target_node, disk,
7195
                                               instance.name, True, idx)
7196
      if result.fail_msg:
7197
        self.LogWarning("Can't assemble newly created disk %d: %s",
7198
                        idx, result.fail_msg)
7199
        errs.append(result.fail_msg)
7200
        break
7201
      dev_path = result.payload
7202
      result = self.rpc.call_blockdev_export(source_node, disk,
7203
                                             target_node, dev_path,
7204
                                             cluster_name)
7205
      if result.fail_msg:
7206
        self.LogWarning("Can't copy data over for disk %d: %s",
7207
                        idx, result.fail_msg)
7208
        errs.append(result.fail_msg)
7209
        break
7210

    
7211
    if errs:
7212
      self.LogWarning("Some disks failed to copy, aborting")
7213
      try:
7214
        _RemoveDisks(self, instance, target_node=target_node)
7215
      finally:
7216
        self.cfg.ReleaseDRBDMinors(instance.name)
7217
        raise errors.OpExecError("Errors during disk copy: %s" %
7218
                                 (",".join(errs),))
7219

    
7220
    instance.primary_node = target_node
7221
    self.cfg.Update(instance, feedback_fn)
7222

    
7223
    self.LogInfo("Removing the disks on the original node")
7224
    _RemoveDisks(self, instance, target_node=source_node)
7225

    
7226
    # Only start the instance if it's marked as up
7227
    if instance.admin_state == constants.ADMINST_UP:
7228
      self.LogInfo("Starting instance %s on node %s",
7229
                   instance.name, target_node)
7230

    
7231
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7232
                                           ignore_secondaries=True)
7233
      if not disks_ok:
7234
        _ShutdownInstanceDisks(self, instance)
7235
        raise errors.OpExecError("Can't activate the instance's disks")
7236

    
7237
      result = self.rpc.call_instance_start(target_node,
7238
                                            (instance, None, None), False)
7239
      msg = result.fail_msg
7240
      if msg:
7241
        _ShutdownInstanceDisks(self, instance)
7242
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7243
                                 (instance.name, target_node, msg))
7244

    
7245

    
7246
class LUNodeMigrate(LogicalUnit):
7247
  """Migrate all instances from a node.
7248

7249
  """
7250
  HPATH = "node-migrate"
7251
  HTYPE = constants.HTYPE_NODE
7252
  REQ_BGL = False
7253

    
7254
  def CheckArguments(self):
7255
    pass
7256

    
7257
  def ExpandNames(self):
7258
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7259

    
7260
    self.share_locks = _ShareAll()
7261
    self.needed_locks = {
7262
      locking.LEVEL_NODE: [self.op.node_name],
7263
      }
7264

    
7265
  def BuildHooksEnv(self):
7266
    """Build hooks env.
7267

7268
    This runs on the master, the primary and all the secondaries.
7269

7270
    """
7271
    return {
7272
      "NODE_NAME": self.op.node_name,
7273
      }
7274

    
7275
  def BuildHooksNodes(self):
7276
    """Build hooks nodes.
7277

7278
    """
7279
    nl = [self.cfg.GetMasterNode()]
7280
    return (nl, nl)
7281

    
7282
  def CheckPrereq(self):
7283
    pass
7284

    
7285
  def Exec(self, feedback_fn):
7286
    # Prepare jobs for migration instances
7287
    jobs = [
7288
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7289
                                 mode=self.op.mode,
7290
                                 live=self.op.live,
7291
                                 iallocator=self.op.iallocator,
7292
                                 target_node=self.op.target_node)]
7293
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7294
      ]
7295

    
7296
    # TODO: Run iallocator in this opcode and pass correct placement options to
7297
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7298
    # running the iallocator and the actual migration, a good consistency model
7299
    # will have to be found.
7300

    
7301
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7302
            frozenset([self.op.node_name]))
7303

    
7304
    return ResultWithJobs(jobs)
7305

    
7306

    
7307
class TLMigrateInstance(Tasklet):
7308
  """Tasklet class for instance migration.
7309

7310
  @type live: boolean
7311
  @ivar live: whether the migration will be done live or non-live;
7312
      this variable is initalized only after CheckPrereq has run
7313
  @type cleanup: boolean
7314
  @ivar cleanup: Wheater we cleanup from a failed migration
7315
  @type iallocator: string
7316
  @ivar iallocator: The iallocator used to determine target_node
7317
  @type target_node: string
7318
  @ivar target_node: If given, the target_node to reallocate the instance to
7319
  @type failover: boolean
7320
  @ivar failover: Whether operation results in failover or migration
7321
  @type fallback: boolean
7322
  @ivar fallback: Whether fallback to failover is allowed if migration not
7323
                  possible
7324
  @type ignore_consistency: boolean
7325
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7326
                            and target node
7327
  @type shutdown_timeout: int
7328
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7329

7330
  """
7331

    
7332
  # Constants
7333
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7334
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7335

    
7336
  def __init__(self, lu, instance_name, cleanup=False,
7337
               failover=False, fallback=False,
7338
               ignore_consistency=False,
7339
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7340
    """Initializes this class.
7341

7342
    """
7343
    Tasklet.__init__(self, lu)
7344

    
7345
    # Parameters
7346
    self.instance_name = instance_name
7347
    self.cleanup = cleanup
7348
    self.live = False # will be overridden later
7349
    self.failover = failover
7350
    self.fallback = fallback
7351
    self.ignore_consistency = ignore_consistency
7352
    self.shutdown_timeout = shutdown_timeout
7353

    
7354
  def CheckPrereq(self):
7355
    """Check prerequisites.
7356

7357
    This checks that the instance is in the cluster.
7358

7359
    """
7360
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7361
    instance = self.cfg.GetInstanceInfo(instance_name)
7362
    assert instance is not None
7363
    self.instance = instance
7364

    
7365
    if (not self.cleanup and
7366
        not instance.admin_state == constants.ADMINST_UP and
7367
        not self.failover and self.fallback):
7368
      self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7369
                      " switching to failover")
7370
      self.failover = True
7371

    
7372
    if instance.disk_template not in constants.DTS_MIRRORED:
7373
      if self.failover:
7374
        text = "failovers"
7375
      else:
7376
        text = "migrations"
7377
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7378
                                 " %s" % (instance.disk_template, text),
7379
                                 errors.ECODE_STATE)
7380

    
7381
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7382
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7383

    
7384
      if self.lu.op.iallocator:
7385
        self._RunAllocator()
7386
      else:
7387
        # We set set self.target_node as it is required by
7388
        # BuildHooksEnv
7389
        self.target_node = self.lu.op.target_node
7390

    
7391
      # self.target_node is already populated, either directly or by the
7392
      # iallocator run
7393
      target_node = self.target_node
7394
      if self.target_node == instance.primary_node:
7395
        raise errors.OpPrereqError("Cannot migrate instance %s"
7396
                                   " to its primary (%s)" %
7397
                                   (instance.name, instance.primary_node))
7398

    
7399
      if len(self.lu.tasklets) == 1:
7400
        # It is safe to release locks only when we're the only tasklet
7401
        # in the LU
7402
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7403
                      keep=[instance.primary_node, self.target_node])
7404

    
7405
    else:
7406
      secondary_nodes = instance.secondary_nodes
7407
      if not secondary_nodes:
7408
        raise errors.ConfigurationError("No secondary node but using"
7409
                                        " %s disk template" %
7410
                                        instance.disk_template)
7411
      target_node = secondary_nodes[0]
7412
      if self.lu.op.iallocator or (self.lu.op.target_node and
7413
                                   self.lu.op.target_node != target_node):
7414
        if self.failover:
7415
          text = "failed over"
7416
        else:
7417
          text = "migrated"
7418
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7419
                                   " be %s to arbitrary nodes"
7420
                                   " (neither an iallocator nor a target"
7421
                                   " node can be passed)" %
7422
                                   (instance.disk_template, text),
7423
                                   errors.ECODE_INVAL)
7424

    
7425
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7426

    
7427
    # check memory requirements on the secondary node
7428
    if not self.failover or instance.admin_state == constants.ADMINST_UP:
7429
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7430
                           instance.name, i_be[constants.BE_MAXMEM],
7431
                           instance.hypervisor)
7432
    else:
7433
      self.lu.LogInfo("Not checking memory on the secondary node as"
7434
                      " instance will not be started")
7435

    
7436
    # check bridge existance
7437
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7438

    
7439
    if not self.cleanup:
7440
      _CheckNodeNotDrained(self.lu, target_node)
7441
      if not self.failover:
7442
        result = self.rpc.call_instance_migratable(instance.primary_node,
7443
                                                   instance)
7444
        if result.fail_msg and self.fallback:
7445
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7446
                          " failover")
7447
          self.failover = True
7448
        else:
7449
          result.Raise("Can't migrate, please use failover",
7450
                       prereq=True, ecode=errors.ECODE_STATE)
7451

    
7452
    assert not (self.failover and self.cleanup)
7453

    
7454
    if not self.failover:
7455
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7456
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7457
                                   " parameters are accepted",
7458
                                   errors.ECODE_INVAL)
7459
      if self.lu.op.live is not None:
7460
        if self.lu.op.live:
7461
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7462
        else:
7463
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7464
        # reset the 'live' parameter to None so that repeated
7465
        # invocations of CheckPrereq do not raise an exception
7466
        self.lu.op.live = None
7467
      elif self.lu.op.mode is None:
7468
        # read the default value from the hypervisor
7469
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7470
                                                skip_globals=False)
7471
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7472

    
7473
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7474
    else:
7475
      # Failover is never live
7476
      self.live = False
7477

    
7478
  def _RunAllocator(self):
7479
    """Run the allocator based on input opcode.
7480

7481
    """
7482
    ial = IAllocator(self.cfg, self.rpc,
7483
                     mode=constants.IALLOCATOR_MODE_RELOC,
7484
                     name=self.instance_name,
7485
                     # TODO See why hail breaks with a single node below
7486
                     relocate_from=[self.instance.primary_node,
7487
                                    self.instance.primary_node],
7488
                     )
7489

    
7490
    ial.Run(self.lu.op.iallocator)
7491

    
7492
    if not ial.success:
7493
      raise errors.OpPrereqError("Can't compute nodes using"
7494
                                 " iallocator '%s': %s" %
7495
                                 (self.lu.op.iallocator, ial.info),
7496
                                 errors.ECODE_NORES)
7497
    if len(ial.result) != ial.required_nodes:
7498
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7499
                                 " of nodes (%s), required %s" %
7500
                                 (self.lu.op.iallocator, len(ial.result),
7501
                                  ial.required_nodes), errors.ECODE_FAULT)
7502
    self.target_node = ial.result[0]
7503
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7504
                 self.instance_name, self.lu.op.iallocator,
7505
                 utils.CommaJoin(ial.result))
7506

    
7507
  def _WaitUntilSync(self):
7508
    """Poll with custom rpc for disk sync.
7509

7510
    This uses our own step-based rpc call.
7511

7512
    """
7513
    self.feedback_fn("* wait until resync is done")
7514
    all_done = False
7515
    while not all_done:
7516
      all_done = True
7517
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7518
                                            self.nodes_ip,
7519
                                            self.instance.disks)
7520
      min_percent = 100
7521
      for node, nres in result.items():
7522
        nres.Raise("Cannot resync disks on node %s" % node)
7523
        node_done, node_percent = nres.payload
7524
        all_done = all_done and node_done
7525
        if node_percent is not None:
7526
          min_percent = min(min_percent, node_percent)
7527
      if not all_done:
7528
        if min_percent < 100:
7529
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7530
        time.sleep(2)
7531

    
7532
  def _EnsureSecondary(self, node):
7533
    """Demote a node to secondary.
7534

7535
    """
7536
    self.feedback_fn("* switching node %s to secondary mode" % node)
7537

    
7538
    for dev in self.instance.disks:
7539
      self.cfg.SetDiskID(dev, node)
7540

    
7541
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7542
                                          self.instance.disks)
7543
    result.Raise("Cannot change disk to secondary on node %s" % node)
7544

    
7545
  def _GoStandalone(self):
7546
    """Disconnect from the network.
7547

7548
    """
7549
    self.feedback_fn("* changing into standalone mode")
7550
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7551
                                               self.instance.disks)
7552
    for node, nres in result.items():
7553
      nres.Raise("Cannot disconnect disks node %s" % node)
7554

    
7555
  def _GoReconnect(self, multimaster):
7556
    """Reconnect to the network.
7557

7558
    """
7559
    if multimaster:
7560
      msg = "dual-master"
7561
    else:
7562
      msg = "single-master"
7563
    self.feedback_fn("* changing disks into %s mode" % msg)
7564
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7565
                                           self.instance.disks,
7566
                                           self.instance.name, multimaster)
7567
    for node, nres in result.items():
7568
      nres.Raise("Cannot change disks config on node %s" % node)
7569

    
7570
  def _ExecCleanup(self):
7571
    """Try to cleanup after a failed migration.
7572

7573
    The cleanup is done by:
7574
      - check that the instance is running only on one node
7575
        (and update the config if needed)
7576
      - change disks on its secondary node to secondary
7577
      - wait until disks are fully synchronized
7578
      - disconnect from the network
7579
      - change disks into single-master mode
7580
      - wait again until disks are fully synchronized
7581

7582
    """
7583
    instance = self.instance
7584
    target_node = self.target_node
7585
    source_node = self.source_node
7586

    
7587
    # check running on only one node
7588
    self.feedback_fn("* checking where the instance actually runs"
7589
                     " (if this hangs, the hypervisor might be in"
7590
                     " a bad state)")
7591
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7592
    for node, result in ins_l.items():
7593
      result.Raise("Can't contact node %s" % node)
7594

    
7595
    runningon_source = instance.name in ins_l[source_node].payload
7596
    runningon_target = instance.name in ins_l[target_node].payload
7597

    
7598
    if runningon_source and runningon_target:
7599
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7600
                               " or the hypervisor is confused; you will have"
7601
                               " to ensure manually that it runs only on one"
7602
                               " and restart this operation")
7603

    
7604
    if not (runningon_source or runningon_target):
7605
      raise errors.OpExecError("Instance does not seem to be running at all;"
7606
                               " in this case it's safer to repair by"
7607
                               " running 'gnt-instance stop' to ensure disk"
7608
                               " shutdown, and then restarting it")
7609

    
7610
    if runningon_target:
7611
      # the migration has actually succeeded, we need to update the config
7612
      self.feedback_fn("* instance running on secondary node (%s),"
7613
                       " updating config" % target_node)
7614
      instance.primary_node = target_node
7615
      self.cfg.Update(instance, self.feedback_fn)
7616
      demoted_node = source_node
7617
    else:
7618
      self.feedback_fn("* instance confirmed to be running on its"
7619
                       " primary node (%s)" % source_node)
7620
      demoted_node = target_node
7621

    
7622
    if instance.disk_template in constants.DTS_INT_MIRROR:
7623
      self._EnsureSecondary(demoted_node)
7624
      try:
7625
        self._WaitUntilSync()
7626
      except errors.OpExecError:
7627
        # we ignore here errors, since if the device is standalone, it
7628
        # won't be able to sync
7629
        pass
7630
      self._GoStandalone()
7631
      self._GoReconnect(False)
7632
      self._WaitUntilSync()
7633

    
7634
    self.feedback_fn("* done")
7635

    
7636
  def _RevertDiskStatus(self):
7637
    """Try to revert the disk status after a failed migration.
7638

7639
    """
7640
    target_node = self.target_node
7641
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7642
      return
7643

    
7644
    try:
7645
      self._EnsureSecondary(target_node)
7646
      self._GoStandalone()
7647
      self._GoReconnect(False)
7648
      self._WaitUntilSync()
7649
    except errors.OpExecError, err:
7650
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7651
                         " please try to recover the instance manually;"
7652
                         " error '%s'" % str(err))
7653

    
7654
  def _AbortMigration(self):
7655
    """Call the hypervisor code to abort a started migration.
7656

7657
    """
7658
    instance = self.instance
7659
    target_node = self.target_node
7660
    source_node = self.source_node
7661
    migration_info = self.migration_info
7662

    
7663
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7664
                                                                 instance,
7665
                                                                 migration_info,
7666
                                                                 False)
7667
    abort_msg = abort_result.fail_msg
7668
    if abort_msg:
7669
      logging.error("Aborting migration failed on target node %s: %s",
7670
                    target_node, abort_msg)
7671
      # Don't raise an exception here, as we stil have to try to revert the
7672
      # disk status, even if this step failed.
7673

    
7674
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7675
        instance, False, self.live)
7676
    abort_msg = abort_result.fail_msg
7677
    if abort_msg:
7678
      logging.error("Aborting migration failed on source node %s: %s",
7679
                    source_node, abort_msg)
7680

    
7681
  def _ExecMigration(self):
7682
    """Migrate an instance.
7683

7684
    The migrate is done by:
7685
      - change the disks into dual-master mode
7686
      - wait until disks are fully synchronized again
7687
      - migrate the instance
7688
      - change disks on the new secondary node (the old primary) to secondary
7689
      - wait until disks are fully synchronized
7690
      - change disks into single-master mode
7691

7692
    """
7693
    instance = self.instance
7694
    target_node = self.target_node
7695
    source_node = self.source_node
7696

    
7697
    # Check for hypervisor version mismatch and warn the user.
7698
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7699
                                       None, [self.instance.hypervisor])
7700
    for ninfo in nodeinfo.values():
7701
      ninfo.Raise("Unable to retrieve node information from node '%s'" %
7702
                  ninfo.node)
7703
    (_, _, (src_info, )) = nodeinfo[source_node].payload
7704
    (_, _, (dst_info, )) = nodeinfo[target_node].payload
7705

    
7706
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
7707
        (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
7708
      src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
7709
      dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
7710
      if src_version != dst_version:
7711
        self.feedback_fn("* warning: hypervisor version mismatch between"
7712
                         " source (%s) and target (%s) node" %
7713
                         (src_version, dst_version))
7714

    
7715
    self.feedback_fn("* checking disk consistency between source and target")
7716
    for dev in instance.disks:
7717
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7718
        raise errors.OpExecError("Disk %s is degraded or not fully"
7719
                                 " synchronized on target node,"
7720
                                 " aborting migration" % dev.iv_name)
7721

    
7722
    # First get the migration information from the remote node
7723
    result = self.rpc.call_migration_info(source_node, instance)
7724
    msg = result.fail_msg
7725
    if msg:
7726
      log_err = ("Failed fetching source migration information from %s: %s" %
7727
                 (source_node, msg))
7728
      logging.error(log_err)
7729
      raise errors.OpExecError(log_err)
7730

    
7731
    self.migration_info = migration_info = result.payload
7732

    
7733
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7734
      # Then switch the disks to master/master mode
7735
      self._EnsureSecondary(target_node)
7736
      self._GoStandalone()
7737
      self._GoReconnect(True)
7738
      self._WaitUntilSync()
7739

    
7740
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7741
    result = self.rpc.call_accept_instance(target_node,
7742
                                           instance,
7743
                                           migration_info,
7744
                                           self.nodes_ip[target_node])
7745

    
7746
    msg = result.fail_msg
7747
    if msg:
7748
      logging.error("Instance pre-migration failed, trying to revert"
7749
                    " disk status: %s", msg)
7750
      self.feedback_fn("Pre-migration failed, aborting")
7751
      self._AbortMigration()
7752
      self._RevertDiskStatus()
7753
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7754
                               (instance.name, msg))
7755

    
7756
    self.feedback_fn("* migrating instance to %s" % target_node)
7757
    result = self.rpc.call_instance_migrate(source_node, instance,
7758
                                            self.nodes_ip[target_node],
7759
                                            self.live)
7760
    msg = result.fail_msg
7761
    if msg:
7762
      logging.error("Instance migration failed, trying to revert"
7763
                    " disk status: %s", msg)
7764
      self.feedback_fn("Migration failed, aborting")
7765
      self._AbortMigration()
7766
      self._RevertDiskStatus()
7767
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7768
                               (instance.name, msg))
7769

    
7770
    self.feedback_fn("* starting memory transfer")
7771
    last_feedback = time.time()
7772
    while True:
7773
      result = self.rpc.call_instance_get_migration_status(source_node,
7774
                                                           instance)
7775
      msg = result.fail_msg
7776
      ms = result.payload   # MigrationStatus instance
7777
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7778
        logging.error("Instance migration failed, trying to revert"
7779
                      " disk status: %s", msg)
7780
        self.feedback_fn("Migration failed, aborting")
7781
        self._AbortMigration()
7782
        self._RevertDiskStatus()
7783
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7784
                                 (instance.name, msg))
7785

    
7786
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7787
        self.feedback_fn("* memory transfer complete")
7788
        break
7789

    
7790
      if (utils.TimeoutExpired(last_feedback,
7791
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7792
          ms.transferred_ram is not None):
7793
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7794
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7795
        last_feedback = time.time()
7796

    
7797
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7798

    
7799
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7800
                                                           instance,
7801
                                                           True,
7802
                                                           self.live)
7803
    msg = result.fail_msg
7804
    if msg:
7805
      logging.error("Instance migration succeeded, but finalization failed"
7806
                    " on the source node: %s", msg)
7807
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7808
                               msg)
7809

    
7810
    instance.primary_node = target_node
7811

    
7812
    # distribute new instance config to the other nodes
7813
    self.cfg.Update(instance, self.feedback_fn)
7814

    
7815
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7816
                                                           instance,
7817
                                                           migration_info,
7818
                                                           True)
7819
    msg = result.fail_msg
7820
    if msg:
7821
      logging.error("Instance migration succeeded, but finalization failed"
7822
                    " on the target node: %s", msg)
7823
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7824
                               msg)
7825

    
7826
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7827
      self._EnsureSecondary(source_node)
7828
      self._WaitUntilSync()
7829
      self._GoStandalone()
7830
      self._GoReconnect(False)
7831
      self._WaitUntilSync()
7832

    
7833
    self.feedback_fn("* done")
7834

    
7835
  def _ExecFailover(self):
7836
    """Failover an instance.
7837

7838
    The failover is done by shutting it down on its present node and
7839
    starting it on the secondary.
7840

7841
    """
7842
    instance = self.instance
7843
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7844

    
7845
    source_node = instance.primary_node
7846
    target_node = self.target_node
7847

    
7848
    if instance.admin_state == constants.ADMINST_UP:
7849
      self.feedback_fn("* checking disk consistency between source and target")
7850
      for dev in instance.disks:
7851
        # for drbd, these are drbd over lvm
7852
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7853
          if primary_node.offline:
7854
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7855
                             " target node %s" %
7856
                             (primary_node.name, dev.iv_name, target_node))
7857
          elif not self.ignore_consistency:
7858
            raise errors.OpExecError("Disk %s is degraded on target node,"
7859
                                     " aborting failover" % dev.iv_name)
7860
    else:
7861
      self.feedback_fn("* not checking disk consistency as instance is not"
7862
                       " running")
7863

    
7864
    self.feedback_fn("* shutting down instance on source node")
7865
    logging.info("Shutting down instance %s on node %s",
7866
                 instance.name, source_node)
7867

    
7868
    result = self.rpc.call_instance_shutdown(source_node, instance,
7869
                                             self.shutdown_timeout)
7870
    msg = result.fail_msg
7871
    if msg:
7872
      if self.ignore_consistency or primary_node.offline:
7873
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7874
                           " proceeding anyway; please make sure node"
7875
                           " %s is down; error details: %s",
7876
                           instance.name, source_node, source_node, msg)
7877
      else:
7878
        raise errors.OpExecError("Could not shutdown instance %s on"
7879
                                 " node %s: %s" %
7880
                                 (instance.name, source_node, msg))
7881

    
7882
    self.feedback_fn("* deactivating the instance's disks on source node")
7883
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7884
      raise errors.OpExecError("Can't shut down the instance's disks")
7885

    
7886
    instance.primary_node = target_node
7887
    # distribute new instance config to the other nodes
7888
    self.cfg.Update(instance, self.feedback_fn)
7889

    
7890
    # Only start the instance if it's marked as up
7891
    if instance.admin_state == constants.ADMINST_UP:
7892
      self.feedback_fn("* activating the instance's disks on target node %s" %
7893
                       target_node)
7894
      logging.info("Starting instance %s on node %s",
7895
                   instance.name, target_node)
7896

    
7897
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7898
                                           ignore_secondaries=True)
7899
      if not disks_ok:
7900
        _ShutdownInstanceDisks(self.lu, instance)
7901
        raise errors.OpExecError("Can't activate the instance's disks")
7902

    
7903
      self.feedback_fn("* starting the instance on the target node %s" %
7904
                       target_node)
7905
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
7906
                                            False)
7907
      msg = result.fail_msg
7908
      if msg:
7909
        _ShutdownInstanceDisks(self.lu, instance)
7910
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7911
                                 (instance.name, target_node, msg))
7912

    
7913
  def Exec(self, feedback_fn):
7914
    """Perform the migration.
7915

7916
    """
7917
    self.feedback_fn = feedback_fn
7918
    self.source_node = self.instance.primary_node
7919

    
7920
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7921
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7922
      self.target_node = self.instance.secondary_nodes[0]
7923
      # Otherwise self.target_node has been populated either
7924
      # directly, or through an iallocator.
7925

    
7926
    self.all_nodes = [self.source_node, self.target_node]
7927
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7928
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7929

    
7930
    if self.failover:
7931
      feedback_fn("Failover instance %s" % self.instance.name)
7932
      self._ExecFailover()
7933
    else:
7934
      feedback_fn("Migrating instance %s" % self.instance.name)
7935

    
7936
      if self.cleanup:
7937
        return self._ExecCleanup()
7938
      else:
7939
        return self._ExecMigration()
7940

    
7941

    
7942
def _CreateBlockDev(lu, node, instance, device, force_create,
7943
                    info, force_open):
7944
  """Create a tree of block devices on a given node.
7945

7946
  If this device type has to be created on secondaries, create it and
7947
  all its children.
7948

7949
  If not, just recurse to children keeping the same 'force' value.
7950

7951
  @param lu: the lu on whose behalf we execute
7952
  @param node: the node on which to create the device
7953
  @type instance: L{objects.Instance}
7954
  @param instance: the instance which owns the device
7955
  @type device: L{objects.Disk}
7956
  @param device: the device to create
7957
  @type force_create: boolean
7958
  @param force_create: whether to force creation of this device; this
7959
      will be change to True whenever we find a device which has
7960
      CreateOnSecondary() attribute
7961
  @param info: the extra 'metadata' we should attach to the device
7962
      (this will be represented as a LVM tag)
7963
  @type force_open: boolean
7964
  @param force_open: this parameter will be passes to the
7965
      L{backend.BlockdevCreate} function where it specifies
7966
      whether we run on primary or not, and it affects both
7967
      the child assembly and the device own Open() execution
7968

7969
  """
7970
  if device.CreateOnSecondary():
7971
    force_create = True
7972

    
7973
  if device.children:
7974
    for child in device.children:
7975
      _CreateBlockDev(lu, node, instance, child, force_create,
7976
                      info, force_open)
7977

    
7978
  if not force_create:
7979
    return
7980

    
7981
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7982

    
7983

    
7984
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7985
  """Create a single block device on a given node.
7986

7987
  This will not recurse over children of the device, so they must be
7988
  created in advance.
7989

7990
  @param lu: the lu on whose behalf we execute
7991
  @param node: the node on which to create the device
7992
  @type instance: L{objects.Instance}
7993
  @param instance: the instance which owns the device
7994
  @type device: L{objects.Disk}
7995
  @param device: the device to create
7996
  @param info: the extra 'metadata' we should attach to the device
7997
      (this will be represented as a LVM tag)
7998
  @type force_open: boolean
7999
  @param force_open: this parameter will be passes to the
8000
      L{backend.BlockdevCreate} function where it specifies
8001
      whether we run on primary or not, and it affects both
8002
      the child assembly and the device own Open() execution
8003

8004
  """
8005
  lu.cfg.SetDiskID(device, node)
8006
  result = lu.rpc.call_blockdev_create(node, device, device.size,
8007
                                       instance.name, force_open, info)
8008
  result.Raise("Can't create block device %s on"
8009
               " node %s for instance %s" % (device, node, instance.name))
8010
  if device.physical_id is None:
8011
    device.physical_id = result.payload
8012

    
8013

    
8014
def _GenerateUniqueNames(lu, exts):
8015
  """Generate a suitable LV name.
8016

8017
  This will generate a logical volume name for the given instance.
8018

8019
  """
8020
  results = []
8021
  for val in exts:
8022
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8023
    results.append("%s%s" % (new_id, val))
8024
  return results
8025

    
8026

    
8027
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8028
                         iv_name, p_minor, s_minor):
8029
  """Generate a drbd8 device complete with its children.
8030

8031
  """
8032
  assert len(vgnames) == len(names) == 2
8033
  port = lu.cfg.AllocatePort()
8034
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8035
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8036
                          logical_id=(vgnames[0], names[0]))
8037
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8038
                          logical_id=(vgnames[1], names[1]))
8039
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8040
                          logical_id=(primary, secondary, port,
8041
                                      p_minor, s_minor,
8042
                                      shared_secret),
8043
                          children=[dev_data, dev_meta],
8044
                          iv_name=iv_name)
8045
  return drbd_dev
8046

    
8047

    
8048
def _GenerateDiskTemplate(lu, template_name,
8049
                          instance_name, primary_node,
8050
                          secondary_nodes, disk_info,
8051
                          file_storage_dir, file_driver,
8052
                          base_index, feedback_fn):
8053
  """Generate the entire disk layout for a given template type.
8054

8055
  """
8056
  #TODO: compute space requirements
8057

    
8058
  vgname = lu.cfg.GetVGName()
8059
  disk_count = len(disk_info)
8060
  disks = []
8061
  if template_name == constants.DT_DISKLESS:
8062
    pass
8063
  elif template_name == constants.DT_PLAIN:
8064
    if len(secondary_nodes) != 0:
8065
      raise errors.ProgrammerError("Wrong template configuration")
8066

    
8067
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8068
                                      for i in range(disk_count)])
8069
    for idx, disk in enumerate(disk_info):
8070
      disk_index = idx + base_index
8071
      vg = disk.get(constants.IDISK_VG, vgname)
8072
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8073
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
8074
                              size=disk[constants.IDISK_SIZE],
8075
                              logical_id=(vg, names[idx]),
8076
                              iv_name="disk/%d" % disk_index,
8077
                              mode=disk[constants.IDISK_MODE])
8078
      disks.append(disk_dev)
8079
  elif template_name == constants.DT_DRBD8:
8080
    if len(secondary_nodes) != 1:
8081
      raise errors.ProgrammerError("Wrong template configuration")
8082
    remote_node = secondary_nodes[0]
8083
    minors = lu.cfg.AllocateDRBDMinor(
8084
      [primary_node, remote_node] * len(disk_info), instance_name)
8085

    
8086
    names = []
8087
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8088
                                               for i in range(disk_count)]):
8089
      names.append(lv_prefix + "_data")
8090
      names.append(lv_prefix + "_meta")
8091
    for idx, disk in enumerate(disk_info):
8092
      disk_index = idx + base_index
8093
      data_vg = disk.get(constants.IDISK_VG, vgname)
8094
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
8095
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8096
                                      disk[constants.IDISK_SIZE],
8097
                                      [data_vg, meta_vg],
8098
                                      names[idx * 2:idx * 2 + 2],
8099
                                      "disk/%d" % disk_index,
8100
                                      minors[idx * 2], minors[idx * 2 + 1])
8101
      disk_dev.mode = disk[constants.IDISK_MODE]
8102
      disks.append(disk_dev)
8103
  elif template_name == constants.DT_FILE:
8104
    if len(secondary_nodes) != 0:
8105
      raise errors.ProgrammerError("Wrong template configuration")
8106

    
8107
    opcodes.RequireFileStorage()
8108

    
8109
    for idx, disk in enumerate(disk_info):
8110
      disk_index = idx + base_index
8111
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8112
                              size=disk[constants.IDISK_SIZE],
8113
                              iv_name="disk/%d" % disk_index,
8114
                              logical_id=(file_driver,
8115
                                          "%s/disk%d" % (file_storage_dir,
8116
                                                         disk_index)),
8117
                              mode=disk[constants.IDISK_MODE])
8118
      disks.append(disk_dev)
8119
  elif template_name == constants.DT_SHARED_FILE:
8120
    if len(secondary_nodes) != 0:
8121
      raise errors.ProgrammerError("Wrong template configuration")
8122

    
8123
    opcodes.RequireSharedFileStorage()
8124

    
8125
    for idx, disk in enumerate(disk_info):
8126
      disk_index = idx + base_index
8127
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8128
                              size=disk[constants.IDISK_SIZE],
8129
                              iv_name="disk/%d" % disk_index,
8130
                              logical_id=(file_driver,
8131
                                          "%s/disk%d" % (file_storage_dir,
8132
                                                         disk_index)),
8133
                              mode=disk[constants.IDISK_MODE])
8134
      disks.append(disk_dev)
8135
  elif template_name == constants.DT_BLOCK:
8136
    if len(secondary_nodes) != 0:
8137
      raise errors.ProgrammerError("Wrong template configuration")
8138

    
8139
    for idx, disk in enumerate(disk_info):
8140
      disk_index = idx + base_index
8141
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8142
                              size=disk[constants.IDISK_SIZE],
8143
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8144
                                          disk[constants.IDISK_ADOPT]),
8145
                              iv_name="disk/%d" % disk_index,
8146
                              mode=disk[constants.IDISK_MODE])
8147
      disks.append(disk_dev)
8148

    
8149
  else:
8150
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8151
  return disks
8152

    
8153

    
8154
def _GetInstanceInfoText(instance):
8155
  """Compute that text that should be added to the disk's metadata.
8156

8157
  """
8158
  return "originstname+%s" % instance.name
8159

    
8160

    
8161
def _CalcEta(time_taken, written, total_size):
8162
  """Calculates the ETA based on size written and total size.
8163

8164
  @param time_taken: The time taken so far
8165
  @param written: amount written so far
8166
  @param total_size: The total size of data to be written
8167
  @return: The remaining time in seconds
8168

8169
  """
8170
  avg_time = time_taken / float(written)
8171
  return (total_size - written) * avg_time
8172

    
8173

    
8174
def _WipeDisks(lu, instance):
8175
  """Wipes instance disks.
8176

8177
  @type lu: L{LogicalUnit}
8178
  @param lu: the logical unit on whose behalf we execute
8179
  @type instance: L{objects.Instance}
8180
  @param instance: the instance whose disks we should create
8181
  @return: the success of the wipe
8182

8183
  """
8184
  node = instance.primary_node
8185

    
8186
  for device in instance.disks:
8187
    lu.cfg.SetDiskID(device, node)
8188

    
8189
  logging.info("Pause sync of instance %s disks", instance.name)
8190
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8191

    
8192
  for idx, success in enumerate(result.payload):
8193
    if not success:
8194
      logging.warn("pause-sync of instance %s for disks %d failed",
8195
                   instance.name, idx)
8196

    
8197
  try:
8198
    for idx, device in enumerate(instance.disks):
8199
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8200
      # MAX_WIPE_CHUNK at max
8201
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8202
                            constants.MIN_WIPE_CHUNK_PERCENT)
8203
      # we _must_ make this an int, otherwise rounding errors will
8204
      # occur
8205
      wipe_chunk_size = int(wipe_chunk_size)
8206

    
8207
      lu.LogInfo("* Wiping disk %d", idx)
8208
      logging.info("Wiping disk %d for instance %s, node %s using"
8209
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8210

    
8211
      offset = 0
8212
      size = device.size
8213
      last_output = 0
8214
      start_time = time.time()
8215

    
8216
      while offset < size:
8217
        wipe_size = min(wipe_chunk_size, size - offset)
8218
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8219
                      idx, offset, wipe_size)
8220
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8221
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8222
                     (idx, offset, wipe_size))
8223
        now = time.time()
8224
        offset += wipe_size
8225
        if now - last_output >= 60:
8226
          eta = _CalcEta(now - start_time, offset, size)
8227
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8228
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8229
          last_output = now
8230
  finally:
8231
    logging.info("Resume sync of instance %s disks", instance.name)
8232

    
8233
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8234

    
8235
    for idx, success in enumerate(result.payload):
8236
      if not success:
8237
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8238
                      " look at the status and troubleshoot the issue", idx)
8239
        logging.warn("resume-sync of instance %s for disks %d failed",
8240
                     instance.name, idx)
8241

    
8242

    
8243
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8244
  """Create all disks for an instance.
8245

8246
  This abstracts away some work from AddInstance.
8247

8248
  @type lu: L{LogicalUnit}
8249
  @param lu: the logical unit on whose behalf we execute
8250
  @type instance: L{objects.Instance}
8251
  @param instance: the instance whose disks we should create
8252
  @type to_skip: list
8253
  @param to_skip: list of indices to skip
8254
  @type target_node: string
8255
  @param target_node: if passed, overrides the target node for creation
8256
  @rtype: boolean
8257
  @return: the success of the creation
8258

8259
  """
8260
  info = _GetInstanceInfoText(instance)
8261
  if target_node is None:
8262
    pnode = instance.primary_node
8263
    all_nodes = instance.all_nodes
8264
  else:
8265
    pnode = target_node
8266
    all_nodes = [pnode]
8267

    
8268
  if instance.disk_template in constants.DTS_FILEBASED:
8269
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8270
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8271

    
8272
    result.Raise("Failed to create directory '%s' on"
8273
                 " node %s" % (file_storage_dir, pnode))
8274

    
8275
  # Note: this needs to be kept in sync with adding of disks in
8276
  # LUInstanceSetParams
8277
  for idx, device in enumerate(instance.disks):
8278
    if to_skip and idx in to_skip:
8279
      continue
8280
    logging.info("Creating volume %s for instance %s",
8281
                 device.iv_name, instance.name)
8282
    #HARDCODE
8283
    for node in all_nodes:
8284
      f_create = node == pnode
8285
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8286

    
8287

    
8288
def _RemoveDisks(lu, instance, target_node=None):
8289
  """Remove all disks for an instance.
8290

8291
  This abstracts away some work from `AddInstance()` and
8292
  `RemoveInstance()`. Note that in case some of the devices couldn't
8293
  be removed, the removal will continue with the other ones (compare
8294
  with `_CreateDisks()`).
8295

8296
  @type lu: L{LogicalUnit}
8297
  @param lu: the logical unit on whose behalf we execute
8298
  @type instance: L{objects.Instance}
8299
  @param instance: the instance whose disks we should remove
8300
  @type target_node: string
8301
  @param target_node: used to override the node on which to remove the disks
8302
  @rtype: boolean
8303
  @return: the success of the removal
8304

8305
  """
8306
  logging.info("Removing block devices for instance %s", instance.name)
8307

    
8308
  all_result = True
8309
  for device in instance.disks:
8310
    if target_node:
8311
      edata = [(target_node, device)]
8312
    else:
8313
      edata = device.ComputeNodeTree(instance.primary_node)
8314
    for node, disk in edata:
8315
      lu.cfg.SetDiskID(disk, node)
8316
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8317
      if msg:
8318
        lu.LogWarning("Could not remove block device %s on node %s,"
8319
                      " continuing anyway: %s", device.iv_name, node, msg)
8320
        all_result = False
8321

    
8322
    # if this is a DRBD disk, return its port to the pool
8323
    if device.dev_type in constants.LDS_DRBD:
8324
      tcp_port = device.logical_id[2]
8325
      lu.cfg.AddTcpUdpPort(tcp_port)
8326

    
8327
  if instance.disk_template == constants.DT_FILE:
8328
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8329
    if target_node:
8330
      tgt = target_node
8331
    else:
8332
      tgt = instance.primary_node
8333
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8334
    if result.fail_msg:
8335
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8336
                    file_storage_dir, instance.primary_node, result.fail_msg)
8337
      all_result = False
8338

    
8339
  return all_result
8340

    
8341

    
8342
def _ComputeDiskSizePerVG(disk_template, disks):
8343
  """Compute disk size requirements in the volume group
8344

8345
  """
8346
  def _compute(disks, payload):
8347
    """Universal algorithm.
8348

8349
    """
8350
    vgs = {}
8351
    for disk in disks:
8352
      vgs[disk[constants.IDISK_VG]] = \
8353
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8354

    
8355
    return vgs
8356

    
8357
  # Required free disk space as a function of disk and swap space
8358
  req_size_dict = {
8359
    constants.DT_DISKLESS: {},
8360
    constants.DT_PLAIN: _compute(disks, 0),
8361
    # 128 MB are added for drbd metadata for each disk
8362
    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8363
    constants.DT_FILE: {},
8364
    constants.DT_SHARED_FILE: {},
8365
  }
8366

    
8367
  if disk_template not in req_size_dict:
8368
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8369
                                 " is unknown" % disk_template)
8370

    
8371
  return req_size_dict[disk_template]
8372

    
8373

    
8374
def _ComputeDiskSize(disk_template, disks):
8375
  """Compute disk size requirements in the volume group
8376

8377
  """
8378
  # Required free disk space as a function of disk and swap space
8379
  req_size_dict = {
8380
    constants.DT_DISKLESS: None,
8381
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8382
    # 128 MB are added for drbd metadata for each disk
8383
    constants.DT_DRBD8:
8384
      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8385
    constants.DT_FILE: None,
8386
    constants.DT_SHARED_FILE: 0,
8387
    constants.DT_BLOCK: 0,
8388
  }
8389

    
8390
  if disk_template not in req_size_dict:
8391
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8392
                                 " is unknown" % disk_template)
8393

    
8394
  return req_size_dict[disk_template]
8395

    
8396

    
8397
def _FilterVmNodes(lu, nodenames):
8398
  """Filters out non-vm_capable nodes from a list.
8399

8400
  @type lu: L{LogicalUnit}
8401
  @param lu: the logical unit for which we check
8402
  @type nodenames: list
8403
  @param nodenames: the list of nodes on which we should check
8404
  @rtype: list
8405
  @return: the list of vm-capable nodes
8406

8407
  """
8408
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8409
  return [name for name in nodenames if name not in vm_nodes]
8410

    
8411

    
8412
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8413
  """Hypervisor parameter validation.
8414

8415
  This function abstract the hypervisor parameter validation to be
8416
  used in both instance create and instance modify.
8417

8418
  @type lu: L{LogicalUnit}
8419
  @param lu: the logical unit for which we check
8420
  @type nodenames: list
8421
  @param nodenames: the list of nodes on which we should check
8422
  @type hvname: string
8423
  @param hvname: the name of the hypervisor we should use
8424
  @type hvparams: dict
8425
  @param hvparams: the parameters which we need to check
8426
  @raise errors.OpPrereqError: if the parameters are not valid
8427

8428
  """
8429
  nodenames = _FilterVmNodes(lu, nodenames)
8430

    
8431
  cluster = lu.cfg.GetClusterInfo()
8432
  hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8433

    
8434
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8435
  for node in nodenames:
8436
    info = hvinfo[node]
8437
    if info.offline:
8438
      continue
8439
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8440

    
8441

    
8442
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8443
  """OS parameters validation.
8444

8445
  @type lu: L{LogicalUnit}
8446
  @param lu: the logical unit for which we check
8447
  @type required: boolean
8448
  @param required: whether the validation should fail if the OS is not
8449
      found
8450
  @type nodenames: list
8451
  @param nodenames: the list of nodes on which we should check
8452
  @type osname: string
8453
  @param osname: the name of the hypervisor we should use
8454
  @type osparams: dict
8455
  @param osparams: the parameters which we need to check
8456
  @raise errors.OpPrereqError: if the parameters are not valid
8457

8458
  """
8459
  nodenames = _FilterVmNodes(lu, nodenames)
8460
  result = lu.rpc.call_os_validate(nodenames, required, osname,
8461
                                   [constants.OS_VALIDATE_PARAMETERS],
8462
                                   osparams)
8463
  for node, nres in result.items():
8464
    # we don't check for offline cases since this should be run only
8465
    # against the master node and/or an instance's nodes
8466
    nres.Raise("OS Parameters validation failed on node %s" % node)
8467
    if not nres.payload:
8468
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8469
                 osname, node)
8470

    
8471

    
8472
class LUInstanceCreate(LogicalUnit):
8473
  """Create an instance.
8474

8475
  """
8476
  HPATH = "instance-add"
8477
  HTYPE = constants.HTYPE_INSTANCE
8478
  REQ_BGL = False
8479

    
8480
  def CheckArguments(self):
8481
    """Check arguments.
8482

8483
    """
8484
    # do not require name_check to ease forward/backward compatibility
8485
    # for tools
8486
    if self.op.no_install and self.op.start:
8487
      self.LogInfo("No-installation mode selected, disabling startup")
8488
      self.op.start = False
8489
    # validate/normalize the instance name
8490
    self.op.instance_name = \
8491
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8492

    
8493
    if self.op.ip_check and not self.op.name_check:
8494
      # TODO: make the ip check more flexible and not depend on the name check
8495
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8496
                                 " check", errors.ECODE_INVAL)
8497

    
8498
    # check nics' parameter names
8499
    for nic in self.op.nics:
8500
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8501

    
8502
    # check disks. parameter names and consistent adopt/no-adopt strategy
8503
    has_adopt = has_no_adopt = False
8504
    for disk in self.op.disks:
8505
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8506
      if constants.IDISK_ADOPT in disk:
8507
        has_adopt = True
8508
      else:
8509
        has_no_adopt = True
8510
    if has_adopt and has_no_adopt:
8511
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8512
                                 errors.ECODE_INVAL)
8513
    if has_adopt:
8514
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8515
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8516
                                   " '%s' disk template" %
8517
                                   self.op.disk_template,
8518
                                   errors.ECODE_INVAL)
8519
      if self.op.iallocator is not None:
8520
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8521
                                   " iallocator script", errors.ECODE_INVAL)
8522
      if self.op.mode == constants.INSTANCE_IMPORT:
8523
        raise errors.OpPrereqError("Disk adoption not allowed for"
8524
                                   " instance import", errors.ECODE_INVAL)
8525
    else:
8526
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8527
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8528
                                   " but no 'adopt' parameter given" %
8529
                                   self.op.disk_template,
8530
                                   errors.ECODE_INVAL)
8531

    
8532
    self.adopt_disks = has_adopt
8533

    
8534
    # instance name verification
8535
    if self.op.name_check:
8536
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8537
      self.op.instance_name = self.hostname1.name
8538
      # used in CheckPrereq for ip ping check
8539
      self.check_ip = self.hostname1.ip
8540
    else:
8541
      self.check_ip = None
8542

    
8543
    # file storage checks
8544
    if (self.op.file_driver and
8545
        not self.op.file_driver in constants.FILE_DRIVER):
8546
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8547
                                 self.op.file_driver, errors.ECODE_INVAL)
8548

    
8549
    if self.op.disk_template == constants.DT_FILE:
8550
      opcodes.RequireFileStorage()
8551
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8552
      opcodes.RequireSharedFileStorage()
8553

    
8554
    ### Node/iallocator related checks
8555
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8556

    
8557
    if self.op.pnode is not None:
8558
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8559
        if self.op.snode is None:
8560
          raise errors.OpPrereqError("The networked disk templates need"
8561
                                     " a mirror node", errors.ECODE_INVAL)
8562
      elif self.op.snode:
8563
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8564
                        " template")
8565
        self.op.snode = None
8566

    
8567
    self._cds = _GetClusterDomainSecret()
8568

    
8569
    if self.op.mode == constants.INSTANCE_IMPORT:
8570
      # On import force_variant must be True, because if we forced it at
8571
      # initial install, our only chance when importing it back is that it
8572
      # works again!
8573
      self.op.force_variant = True
8574

    
8575
      if self.op.no_install:
8576
        self.LogInfo("No-installation mode has no effect during import")
8577

    
8578
    elif self.op.mode == constants.INSTANCE_CREATE:
8579
      if self.op.os_type is None:
8580
        raise errors.OpPrereqError("No guest OS specified",
8581
                                   errors.ECODE_INVAL)
8582
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8583
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8584
                                   " installation" % self.op.os_type,
8585
                                   errors.ECODE_STATE)
8586
      if self.op.disk_template is None:
8587
        raise errors.OpPrereqError("No disk template specified",
8588
                                   errors.ECODE_INVAL)
8589

    
8590
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8591
      # Check handshake to ensure both clusters have the same domain secret
8592
      src_handshake = self.op.source_handshake
8593
      if not src_handshake:
8594
        raise errors.OpPrereqError("Missing source handshake",
8595
                                   errors.ECODE_INVAL)
8596

    
8597
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8598
                                                           src_handshake)
8599
      if errmsg:
8600
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8601
                                   errors.ECODE_INVAL)
8602

    
8603
      # Load and check source CA
8604
      self.source_x509_ca_pem = self.op.source_x509_ca
8605
      if not self.source_x509_ca_pem:
8606
        raise errors.OpPrereqError("Missing source X509 CA",
8607
                                   errors.ECODE_INVAL)
8608

    
8609
      try:
8610
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8611
                                                    self._cds)
8612
      except OpenSSL.crypto.Error, err:
8613
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8614
                                   (err, ), errors.ECODE_INVAL)
8615

    
8616
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8617
      if errcode is not None:
8618
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8619
                                   errors.ECODE_INVAL)
8620

    
8621
      self.source_x509_ca = cert
8622

    
8623
      src_instance_name = self.op.source_instance_name
8624
      if not src_instance_name:
8625
        raise errors.OpPrereqError("Missing source instance name",
8626
                                   errors.ECODE_INVAL)
8627

    
8628
      self.source_instance_name = \
8629
          netutils.GetHostname(name=src_instance_name).name
8630

    
8631
    else:
8632
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8633
                                 self.op.mode, errors.ECODE_INVAL)
8634

    
8635
  def ExpandNames(self):
8636
    """ExpandNames for CreateInstance.
8637

8638
    Figure out the right locks for instance creation.
8639

8640
    """
8641
    self.needed_locks = {}
8642

    
8643
    instance_name = self.op.instance_name
8644
    # this is just a preventive check, but someone might still add this
8645
    # instance in the meantime, and creation will fail at lock-add time
8646
    if instance_name in self.cfg.GetInstanceList():
8647
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8648
                                 instance_name, errors.ECODE_EXISTS)
8649

    
8650
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8651

    
8652
    if self.op.iallocator:
8653
      # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8654
      # specifying a group on instance creation and then selecting nodes from
8655
      # that group
8656
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8657
      self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8658
    else:
8659
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8660
      nodelist = [self.op.pnode]
8661
      if self.op.snode is not None:
8662
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8663
        nodelist.append(self.op.snode)
8664
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8665
      # Lock resources of instance's primary and secondary nodes (copy to
8666
      # prevent accidential modification)
8667
      self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8668

    
8669
    # in case of import lock the source node too
8670
    if self.op.mode == constants.INSTANCE_IMPORT:
8671
      src_node = self.op.src_node
8672
      src_path = self.op.src_path
8673

    
8674
      if src_path is None:
8675
        self.op.src_path = src_path = self.op.instance_name
8676

    
8677
      if src_node is None:
8678
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8679
        self.op.src_node = None
8680
        if os.path.isabs(src_path):
8681
          raise errors.OpPrereqError("Importing an instance from a path"
8682
                                     " requires a source node option",
8683
                                     errors.ECODE_INVAL)
8684
      else:
8685
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8686
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8687
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8688
        if not os.path.isabs(src_path):
8689
          self.op.src_path = src_path = \
8690
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8691

    
8692
  def _RunAllocator(self):
8693
    """Run the allocator based on input opcode.
8694

8695
    """
8696
    nics = [n.ToDict() for n in self.nics]
8697
    ial = IAllocator(self.cfg, self.rpc,
8698
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8699
                     name=self.op.instance_name,
8700
                     disk_template=self.op.disk_template,
8701
                     tags=self.op.tags,
8702
                     os=self.op.os_type,
8703
                     vcpus=self.be_full[constants.BE_VCPUS],
8704
                     memory=self.be_full[constants.BE_MAXMEM],
8705
                     disks=self.disks,
8706
                     nics=nics,
8707
                     hypervisor=self.op.hypervisor,
8708
                     )
8709

    
8710
    ial.Run(self.op.iallocator)
8711

    
8712
    if not ial.success:
8713
      raise errors.OpPrereqError("Can't compute nodes using"
8714
                                 " iallocator '%s': %s" %
8715
                                 (self.op.iallocator, ial.info),
8716
                                 errors.ECODE_NORES)
8717
    if len(ial.result) != ial.required_nodes:
8718
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8719
                                 " of nodes (%s), required %s" %
8720
                                 (self.op.iallocator, len(ial.result),
8721
                                  ial.required_nodes), errors.ECODE_FAULT)
8722
    self.op.pnode = ial.result[0]
8723
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8724
                 self.op.instance_name, self.op.iallocator,
8725
                 utils.CommaJoin(ial.result))
8726
    if ial.required_nodes == 2:
8727
      self.op.snode = ial.result[1]
8728

    
8729
  def BuildHooksEnv(self):
8730
    """Build hooks env.
8731

8732
    This runs on master, primary and secondary nodes of the instance.
8733

8734
    """
8735
    env = {
8736
      "ADD_MODE": self.op.mode,
8737
      }
8738
    if self.op.mode == constants.INSTANCE_IMPORT:
8739
      env["SRC_NODE"] = self.op.src_node
8740
      env["SRC_PATH"] = self.op.src_path
8741
      env["SRC_IMAGES"] = self.src_images
8742

    
8743
    env.update(_BuildInstanceHookEnv(
8744
      name=self.op.instance_name,
8745
      primary_node=self.op.pnode,
8746
      secondary_nodes=self.secondaries,
8747
      status=self.op.start,
8748
      os_type=self.op.os_type,
8749
      minmem=self.be_full[constants.BE_MINMEM],
8750
      maxmem=self.be_full[constants.BE_MAXMEM],
8751
      vcpus=self.be_full[constants.BE_VCPUS],
8752
      nics=_NICListToTuple(self, self.nics),
8753
      disk_template=self.op.disk_template,
8754
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8755
             for d in self.disks],
8756
      bep=self.be_full,
8757
      hvp=self.hv_full,
8758
      hypervisor_name=self.op.hypervisor,
8759
      tags=self.op.tags,
8760
    ))
8761

    
8762
    return env
8763

    
8764
  def BuildHooksNodes(self):
8765
    """Build hooks nodes.
8766

8767
    """
8768
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8769
    return nl, nl
8770

    
8771
  def _ReadExportInfo(self):
8772
    """Reads the export information from disk.
8773

8774
    It will override the opcode source node and path with the actual
8775
    information, if these two were not specified before.
8776

8777
    @return: the export information
8778

8779
    """
8780
    assert self.op.mode == constants.INSTANCE_IMPORT
8781

    
8782
    src_node = self.op.src_node
8783
    src_path = self.op.src_path
8784

    
8785
    if src_node is None:
8786
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8787
      exp_list = self.rpc.call_export_list(locked_nodes)
8788
      found = False
8789
      for node in exp_list:
8790
        if exp_list[node].fail_msg:
8791
          continue
8792
        if src_path in exp_list[node].payload:
8793
          found = True
8794
          self.op.src_node = src_node = node
8795
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8796
                                                       src_path)
8797
          break
8798
      if not found:
8799
        raise errors.OpPrereqError("No export found for relative path %s" %
8800
                                    src_path, errors.ECODE_INVAL)
8801

    
8802
    _CheckNodeOnline(self, src_node)
8803
    result = self.rpc.call_export_info(src_node, src_path)
8804
    result.Raise("No export or invalid export found in dir %s" % src_path)
8805

    
8806
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8807
    if not export_info.has_section(constants.INISECT_EXP):
8808
      raise errors.ProgrammerError("Corrupted export config",
8809
                                   errors.ECODE_ENVIRON)
8810

    
8811
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8812
    if (int(ei_version) != constants.EXPORT_VERSION):
8813
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8814
                                 (ei_version, constants.EXPORT_VERSION),
8815
                                 errors.ECODE_ENVIRON)
8816
    return export_info
8817

    
8818
  def _ReadExportParams(self, einfo):
8819
    """Use export parameters as defaults.
8820

8821
    In case the opcode doesn't specify (as in override) some instance
8822
    parameters, then try to use them from the export information, if
8823
    that declares them.
8824

8825
    """
8826
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8827

    
8828
    if self.op.disk_template is None:
8829
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8830
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8831
                                          "disk_template")
8832
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8833
          raise errors.OpPrereqError("Disk template specified in configuration"
8834
                                     " file is not one of the allowed values:"
8835
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8836
      else:
8837
        raise errors.OpPrereqError("No disk template specified and the export"
8838
                                   " is missing the disk_template information",
8839
                                   errors.ECODE_INVAL)
8840

    
8841
    if not self.op.disks:
8842
      disks = []
8843
      # TODO: import the disk iv_name too
8844
      for idx in range(constants.MAX_DISKS):
8845
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8846
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8847
          disks.append({constants.IDISK_SIZE: disk_sz})
8848
      self.op.disks = disks
8849
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8850
        raise errors.OpPrereqError("No disk info specified and the export"
8851
                                   " is missing the disk information",
8852
                                   errors.ECODE_INVAL)
8853

    
8854
    if not self.op.nics:
8855
      nics = []
8856
      for idx in range(constants.MAX_NICS):
8857
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8858
          ndict = {}
8859
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8860
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8861
            ndict[name] = v
8862
          nics.append(ndict)
8863
        else:
8864
          break
8865
      self.op.nics = nics
8866

    
8867
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8868
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8869

    
8870
    if (self.op.hypervisor is None and
8871
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8872
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8873

    
8874
    if einfo.has_section(constants.INISECT_HYP):
8875
      # use the export parameters but do not override the ones
8876
      # specified by the user
8877
      for name, value in einfo.items(constants.INISECT_HYP):
8878
        if name not in self.op.hvparams:
8879
          self.op.hvparams[name] = value
8880

    
8881
    if einfo.has_section(constants.INISECT_BEP):
8882
      # use the parameters, without overriding
8883
      for name, value in einfo.items(constants.INISECT_BEP):
8884
        if name not in self.op.beparams:
8885
          self.op.beparams[name] = value
8886
        # Compatibility for the old "memory" be param
8887
        if name == constants.BE_MEMORY:
8888
          if constants.BE_MAXMEM not in self.op.beparams:
8889
            self.op.beparams[constants.BE_MAXMEM] = value
8890
          if constants.BE_MINMEM not in self.op.beparams:
8891
            self.op.beparams[constants.BE_MINMEM] = value
8892
    else:
8893
      # try to read the parameters old style, from the main section
8894
      for name in constants.BES_PARAMETERS:
8895
        if (name not in self.op.beparams and
8896
            einfo.has_option(constants.INISECT_INS, name)):
8897
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8898

    
8899
    if einfo.has_section(constants.INISECT_OSP):
8900
      # use the parameters, without overriding
8901
      for name, value in einfo.items(constants.INISECT_OSP):
8902
        if name not in self.op.osparams:
8903
          self.op.osparams[name] = value
8904

    
8905
  def _RevertToDefaults(self, cluster):
8906
    """Revert the instance parameters to the default values.
8907

8908
    """
8909
    # hvparams
8910
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8911
    for name in self.op.hvparams.keys():
8912
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8913
        del self.op.hvparams[name]
8914
    # beparams
8915
    be_defs = cluster.SimpleFillBE({})
8916
    for name in self.op.beparams.keys():
8917
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8918
        del self.op.beparams[name]
8919
    # nic params
8920
    nic_defs = cluster.SimpleFillNIC({})
8921
    for nic in self.op.nics:
8922
      for name in constants.NICS_PARAMETERS:
8923
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8924
          del nic[name]
8925
    # osparams
8926
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8927
    for name in self.op.osparams.keys():
8928
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8929
        del self.op.osparams[name]
8930

    
8931
  def _CalculateFileStorageDir(self):
8932
    """Calculate final instance file storage dir.
8933

8934
    """
8935
    # file storage dir calculation/check
8936
    self.instance_file_storage_dir = None
8937
    if self.op.disk_template in constants.DTS_FILEBASED:
8938
      # build the full file storage dir path
8939
      joinargs = []
8940

    
8941
      if self.op.disk_template == constants.DT_SHARED_FILE:
8942
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8943
      else:
8944
        get_fsd_fn = self.cfg.GetFileStorageDir
8945

    
8946
      cfg_storagedir = get_fsd_fn()
8947
      if not cfg_storagedir:
8948
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8949
      joinargs.append(cfg_storagedir)
8950

    
8951
      if self.op.file_storage_dir is not None:
8952
        joinargs.append(self.op.file_storage_dir)
8953

    
8954
      joinargs.append(self.op.instance_name)
8955

    
8956
      # pylint: disable=W0142
8957
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8958

    
8959
  def CheckPrereq(self):
8960
    """Check prerequisites.
8961

8962
    """
8963
    self._CalculateFileStorageDir()
8964

    
8965
    if self.op.mode == constants.INSTANCE_IMPORT:
8966
      export_info = self._ReadExportInfo()
8967
      self._ReadExportParams(export_info)
8968

    
8969
    if (not self.cfg.GetVGName() and
8970
        self.op.disk_template not in constants.DTS_NOT_LVM):
8971
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8972
                                 " instances", errors.ECODE_STATE)
8973

    
8974
    if (self.op.hypervisor is None or
8975
        self.op.hypervisor == constants.VALUE_AUTO):
8976
      self.op.hypervisor = self.cfg.GetHypervisorType()
8977

    
8978
    cluster = self.cfg.GetClusterInfo()
8979
    enabled_hvs = cluster.enabled_hypervisors
8980
    if self.op.hypervisor not in enabled_hvs:
8981
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8982
                                 " cluster (%s)" % (self.op.hypervisor,
8983
                                  ",".join(enabled_hvs)),
8984
                                 errors.ECODE_STATE)
8985

    
8986
    # Check tag validity
8987
    for tag in self.op.tags:
8988
      objects.TaggableObject.ValidateTag(tag)
8989

    
8990
    # check hypervisor parameter syntax (locally)
8991
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8992
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8993
                                      self.op.hvparams)
8994
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8995
    hv_type.CheckParameterSyntax(filled_hvp)
8996
    self.hv_full = filled_hvp
8997
    # check that we don't specify global parameters on an instance
8998
    _CheckGlobalHvParams(self.op.hvparams)
8999

    
9000
    # fill and remember the beparams dict
9001
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
9002
    for param, value in self.op.beparams.iteritems():
9003
      if value == constants.VALUE_AUTO:
9004
        self.op.beparams[param] = default_beparams[param]
9005
    objects.UpgradeBeParams(self.op.beparams)
9006
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9007
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
9008

    
9009
    # build os parameters
9010
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9011

    
9012
    # now that hvp/bep are in final format, let's reset to defaults,
9013
    # if told to do so
9014
    if self.op.identify_defaults:
9015
      self._RevertToDefaults(cluster)
9016

    
9017
    # NIC buildup
9018
    self.nics = []
9019
    for idx, nic in enumerate(self.op.nics):
9020
      nic_mode_req = nic.get(constants.INIC_MODE, None)
9021
      nic_mode = nic_mode_req
9022
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9023
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9024

    
9025
      # in routed mode, for the first nic, the default ip is 'auto'
9026
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9027
        default_ip_mode = constants.VALUE_AUTO
9028
      else:
9029
        default_ip_mode = constants.VALUE_NONE
9030

    
9031
      # ip validity checks
9032
      ip = nic.get(constants.INIC_IP, default_ip_mode)
9033
      if ip is None or ip.lower() == constants.VALUE_NONE:
9034
        nic_ip = None
9035
      elif ip.lower() == constants.VALUE_AUTO:
9036
        if not self.op.name_check:
9037
          raise errors.OpPrereqError("IP address set to auto but name checks"
9038
                                     " have been skipped",
9039
                                     errors.ECODE_INVAL)
9040
        nic_ip = self.hostname1.ip
9041
      else:
9042
        if not netutils.IPAddress.IsValid(ip):
9043
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9044
                                     errors.ECODE_INVAL)
9045
        nic_ip = ip
9046

    
9047
      # TODO: check the ip address for uniqueness
9048
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9049
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
9050
                                   errors.ECODE_INVAL)
9051

    
9052
      # MAC address verification
9053
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9054
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9055
        mac = utils.NormalizeAndValidateMac(mac)
9056

    
9057
        try:
9058
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
9059
        except errors.ReservationError:
9060
          raise errors.OpPrereqError("MAC address %s already in use"
9061
                                     " in cluster" % mac,
9062
                                     errors.ECODE_NOTUNIQUE)
9063

    
9064
      #  Build nic parameters
9065
      link = nic.get(constants.INIC_LINK, None)
9066
      if link == constants.VALUE_AUTO:
9067
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9068
      nicparams = {}
9069
      if nic_mode_req:
9070
        nicparams[constants.NIC_MODE] = nic_mode
9071
      if link:
9072
        nicparams[constants.NIC_LINK] = link
9073

    
9074
      check_params = cluster.SimpleFillNIC(nicparams)
9075
      objects.NIC.CheckParameterSyntax(check_params)
9076
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9077

    
9078
    # disk checks/pre-build
9079
    default_vg = self.cfg.GetVGName()
9080
    self.disks = []
9081
    for disk in self.op.disks:
9082
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9083
      if mode not in constants.DISK_ACCESS_SET:
9084
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9085
                                   mode, errors.ECODE_INVAL)
9086
      size = disk.get(constants.IDISK_SIZE, None)
9087
      if size is None:
9088
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9089
      try:
9090
        size = int(size)
9091
      except (TypeError, ValueError):
9092
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9093
                                   errors.ECODE_INVAL)
9094

    
9095
      data_vg = disk.get(constants.IDISK_VG, default_vg)
9096
      new_disk = {
9097
        constants.IDISK_SIZE: size,
9098
        constants.IDISK_MODE: mode,
9099
        constants.IDISK_VG: data_vg,
9100
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
9101
        }
9102
      if constants.IDISK_ADOPT in disk:
9103
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9104
      self.disks.append(new_disk)
9105

    
9106
    if self.op.mode == constants.INSTANCE_IMPORT:
9107
      disk_images = []
9108
      for idx in range(len(self.disks)):
9109
        option = "disk%d_dump" % idx
9110
        if export_info.has_option(constants.INISECT_INS, option):
9111
          # FIXME: are the old os-es, disk sizes, etc. useful?
9112
          export_name = export_info.get(constants.INISECT_INS, option)
9113
          image = utils.PathJoin(self.op.src_path, export_name)
9114
          disk_images.append(image)
9115
        else:
9116
          disk_images.append(False)
9117

    
9118
      self.src_images = disk_images
9119

    
9120
      old_name = export_info.get(constants.INISECT_INS, "name")
9121
      if self.op.instance_name == old_name:
9122
        for idx, nic in enumerate(self.nics):
9123
          if nic.mac == constants.VALUE_AUTO:
9124
            nic_mac_ini = "nic%d_mac" % idx
9125
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9126

    
9127
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9128

    
9129
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
9130
    if self.op.ip_check:
9131
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9132
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
9133
                                   (self.check_ip, self.op.instance_name),
9134
                                   errors.ECODE_NOTUNIQUE)
9135

    
9136
    #### mac address generation
9137
    # By generating here the mac address both the allocator and the hooks get
9138
    # the real final mac address rather than the 'auto' or 'generate' value.
9139
    # There is a race condition between the generation and the instance object
9140
    # creation, which means that we know the mac is valid now, but we're not
9141
    # sure it will be when we actually add the instance. If things go bad
9142
    # adding the instance will abort because of a duplicate mac, and the
9143
    # creation job will fail.
9144
    for nic in self.nics:
9145
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9146
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9147

    
9148
    #### allocator run
9149

    
9150
    if self.op.iallocator is not None:
9151
      self._RunAllocator()
9152

    
9153
    # Release all unneeded node locks
9154
    _ReleaseLocks(self, locking.LEVEL_NODE,
9155
                  keep=filter(None, [self.op.pnode, self.op.snode,
9156
                                     self.op.src_node]))
9157

    
9158
    #### node related checks
9159

    
9160
    # check primary node
9161
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9162
    assert self.pnode is not None, \
9163
      "Cannot retrieve locked node %s" % self.op.pnode
9164
    if pnode.offline:
9165
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9166
                                 pnode.name, errors.ECODE_STATE)
9167
    if pnode.drained:
9168
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9169
                                 pnode.name, errors.ECODE_STATE)
9170
    if not pnode.vm_capable:
9171
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9172
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
9173

    
9174
    self.secondaries = []
9175

    
9176
    # mirror node verification
9177
    if self.op.disk_template in constants.DTS_INT_MIRROR:
9178
      if self.op.snode == pnode.name:
9179
        raise errors.OpPrereqError("The secondary node cannot be the"
9180
                                   " primary node", errors.ECODE_INVAL)
9181
      _CheckNodeOnline(self, self.op.snode)
9182
      _CheckNodeNotDrained(self, self.op.snode)
9183
      _CheckNodeVmCapable(self, self.op.snode)
9184
      self.secondaries.append(self.op.snode)
9185

    
9186
    nodenames = [pnode.name] + self.secondaries
9187

    
9188
    if not self.adopt_disks:
9189
      # Check lv size requirements, if not adopting
9190
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9191
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9192

    
9193
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9194
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9195
                                disk[constants.IDISK_ADOPT])
9196
                     for disk in self.disks])
9197
      if len(all_lvs) != len(self.disks):
9198
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
9199
                                   errors.ECODE_INVAL)
9200
      for lv_name in all_lvs:
9201
        try:
9202
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9203
          # to ReserveLV uses the same syntax
9204
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9205
        except errors.ReservationError:
9206
          raise errors.OpPrereqError("LV named %s used by another instance" %
9207
                                     lv_name, errors.ECODE_NOTUNIQUE)
9208

    
9209
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9210
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9211

    
9212
      node_lvs = self.rpc.call_lv_list([pnode.name],
9213
                                       vg_names.payload.keys())[pnode.name]
9214
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9215
      node_lvs = node_lvs.payload
9216

    
9217
      delta = all_lvs.difference(node_lvs.keys())
9218
      if delta:
9219
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
9220
                                   utils.CommaJoin(delta),
9221
                                   errors.ECODE_INVAL)
9222
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9223
      if online_lvs:
9224
        raise errors.OpPrereqError("Online logical volumes found, cannot"
9225
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
9226
                                   errors.ECODE_STATE)
9227
      # update the size of disk based on what is found
9228
      for dsk in self.disks:
9229
        dsk[constants.IDISK_SIZE] = \
9230
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9231
                                        dsk[constants.IDISK_ADOPT])][0]))
9232

    
9233
    elif self.op.disk_template == constants.DT_BLOCK:
9234
      # Normalize and de-duplicate device paths
9235
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9236
                       for disk in self.disks])
9237
      if len(all_disks) != len(self.disks):
9238
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9239
                                   errors.ECODE_INVAL)
9240
      baddisks = [d for d in all_disks
9241
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9242
      if baddisks:
9243
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9244
                                   " cannot be adopted" %
9245
                                   (", ".join(baddisks),
9246
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9247
                                   errors.ECODE_INVAL)
9248

    
9249
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9250
                                            list(all_disks))[pnode.name]
9251
      node_disks.Raise("Cannot get block device information from node %s" %
9252
                       pnode.name)
9253
      node_disks = node_disks.payload
9254
      delta = all_disks.difference(node_disks.keys())
9255
      if delta:
9256
        raise errors.OpPrereqError("Missing block device(s): %s" %
9257
                                   utils.CommaJoin(delta),
9258
                                   errors.ECODE_INVAL)
9259
      for dsk in self.disks:
9260
        dsk[constants.IDISK_SIZE] = \
9261
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9262

    
9263
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9264

    
9265
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9266
    # check OS parameters (remotely)
9267
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9268

    
9269
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9270

    
9271
    # memory check on primary node
9272
    #TODO(dynmem): use MINMEM for checking
9273
    if self.op.start:
9274
      _CheckNodeFreeMemory(self, self.pnode.name,
9275
                           "creating instance %s" % self.op.instance_name,
9276
                           self.be_full[constants.BE_MAXMEM],
9277
                           self.op.hypervisor)
9278

    
9279
    self.dry_run_result = list(nodenames)
9280

    
9281
  def Exec(self, feedback_fn):
9282
    """Create and add the instance to the cluster.
9283

9284
    """
9285
    instance = self.op.instance_name
9286
    pnode_name = self.pnode.name
9287

    
9288
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9289
                self.owned_locks(locking.LEVEL_NODE)), \
9290
      "Node locks differ from node resource locks"
9291

    
9292
    ht_kind = self.op.hypervisor
9293
    if ht_kind in constants.HTS_REQ_PORT:
9294
      network_port = self.cfg.AllocatePort()
9295
    else:
9296
      network_port = None
9297

    
9298
    disks = _GenerateDiskTemplate(self,
9299
                                  self.op.disk_template,
9300
                                  instance, pnode_name,
9301
                                  self.secondaries,
9302
                                  self.disks,
9303
                                  self.instance_file_storage_dir,
9304
                                  self.op.file_driver,
9305
                                  0,
9306
                                  feedback_fn)
9307

    
9308
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9309
                            primary_node=pnode_name,
9310
                            nics=self.nics, disks=disks,
9311
                            disk_template=self.op.disk_template,
9312
                            admin_state=constants.ADMINST_DOWN,
9313
                            network_port=network_port,
9314
                            beparams=self.op.beparams,
9315
                            hvparams=self.op.hvparams,
9316
                            hypervisor=self.op.hypervisor,
9317
                            osparams=self.op.osparams,
9318
                            )
9319

    
9320
    if self.op.tags:
9321
      for tag in self.op.tags:
9322
        iobj.AddTag(tag)
9323

    
9324
    if self.adopt_disks:
9325
      if self.op.disk_template == constants.DT_PLAIN:
9326
        # rename LVs to the newly-generated names; we need to construct
9327
        # 'fake' LV disks with the old data, plus the new unique_id
9328
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9329
        rename_to = []
9330
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9331
          rename_to.append(t_dsk.logical_id)
9332
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9333
          self.cfg.SetDiskID(t_dsk, pnode_name)
9334
        result = self.rpc.call_blockdev_rename(pnode_name,
9335
                                               zip(tmp_disks, rename_to))
9336
        result.Raise("Failed to rename adoped LVs")
9337
    else:
9338
      feedback_fn("* creating instance disks...")
9339
      try:
9340
        _CreateDisks(self, iobj)
9341
      except errors.OpExecError:
9342
        self.LogWarning("Device creation failed, reverting...")
9343
        try:
9344
          _RemoveDisks(self, iobj)
9345
        finally:
9346
          self.cfg.ReleaseDRBDMinors(instance)
9347
          raise
9348

    
9349
    feedback_fn("adding instance %s to cluster config" % instance)
9350

    
9351
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9352

    
9353
    # Declare that we don't want to remove the instance lock anymore, as we've
9354
    # added the instance to the config
9355
    del self.remove_locks[locking.LEVEL_INSTANCE]
9356

    
9357
    if self.op.mode == constants.INSTANCE_IMPORT:
9358
      # Release unused nodes
9359
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9360
    else:
9361
      # Release all nodes
9362
      _ReleaseLocks(self, locking.LEVEL_NODE)
9363

    
9364
    disk_abort = False
9365
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9366
      feedback_fn("* wiping instance disks...")
9367
      try:
9368
        _WipeDisks(self, iobj)
9369
      except errors.OpExecError, err:
9370
        logging.exception("Wiping disks failed")
9371
        self.LogWarning("Wiping instance disks failed (%s)", err)
9372
        disk_abort = True
9373

    
9374
    if disk_abort:
9375
      # Something is already wrong with the disks, don't do anything else
9376
      pass
9377
    elif self.op.wait_for_sync:
9378
      disk_abort = not _WaitForSync(self, iobj)
9379
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9380
      # make sure the disks are not degraded (still sync-ing is ok)
9381
      feedback_fn("* checking mirrors status")
9382
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9383
    else:
9384
      disk_abort = False
9385

    
9386
    if disk_abort:
9387
      _RemoveDisks(self, iobj)
9388
      self.cfg.RemoveInstance(iobj.name)
9389
      # Make sure the instance lock gets removed
9390
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9391
      raise errors.OpExecError("There are some degraded disks for"
9392
                               " this instance")
9393

    
9394
    # Release all node resource locks
9395
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9396

    
9397
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9398
      if self.op.mode == constants.INSTANCE_CREATE:
9399
        if not self.op.no_install:
9400
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9401
                        not self.op.wait_for_sync)
9402
          if pause_sync:
9403
            feedback_fn("* pausing disk sync to install instance OS")
9404
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9405
                                                              iobj.disks, True)
9406
            for idx, success in enumerate(result.payload):
9407
              if not success:
9408
                logging.warn("pause-sync of instance %s for disk %d failed",
9409
                             instance, idx)
9410

    
9411
          feedback_fn("* running the instance OS create scripts...")
9412
          # FIXME: pass debug option from opcode to backend
9413
          os_add_result = \
9414
            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9415
                                          self.op.debug_level)
9416
          if pause_sync:
9417
            feedback_fn("* resuming disk sync")
9418
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9419
                                                              iobj.disks, False)
9420
            for idx, success in enumerate(result.payload):
9421
              if not success:
9422
                logging.warn("resume-sync of instance %s for disk %d failed",
9423
                             instance, idx)
9424

    
9425
          os_add_result.Raise("Could not add os for instance %s"
9426
                              " on node %s" % (instance, pnode_name))
9427

    
9428
      elif self.op.mode == constants.INSTANCE_IMPORT:
9429
        feedback_fn("* running the instance OS import scripts...")
9430

    
9431
        transfers = []
9432

    
9433
        for idx, image in enumerate(self.src_images):
9434
          if not image:
9435
            continue
9436

    
9437
          # FIXME: pass debug option from opcode to backend
9438
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9439
                                             constants.IEIO_FILE, (image, ),
9440
                                             constants.IEIO_SCRIPT,
9441
                                             (iobj.disks[idx], idx),
9442
                                             None)
9443
          transfers.append(dt)
9444

    
9445
        import_result = \
9446
          masterd.instance.TransferInstanceData(self, feedback_fn,
9447
                                                self.op.src_node, pnode_name,
9448
                                                self.pnode.secondary_ip,
9449
                                                iobj, transfers)
9450
        if not compat.all(import_result):
9451
          self.LogWarning("Some disks for instance %s on node %s were not"
9452
                          " imported successfully" % (instance, pnode_name))
9453

    
9454
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9455
        feedback_fn("* preparing remote import...")
9456
        # The source cluster will stop the instance before attempting to make a
9457
        # connection. In some cases stopping an instance can take a long time,
9458
        # hence the shutdown timeout is added to the connection timeout.
9459
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9460
                           self.op.source_shutdown_timeout)
9461
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9462

    
9463
        assert iobj.primary_node == self.pnode.name
9464
        disk_results = \
9465
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9466
                                        self.source_x509_ca,
9467
                                        self._cds, timeouts)
9468
        if not compat.all(disk_results):
9469
          # TODO: Should the instance still be started, even if some disks
9470
          # failed to import (valid for local imports, too)?
9471
          self.LogWarning("Some disks for instance %s on node %s were not"
9472
                          " imported successfully" % (instance, pnode_name))
9473

    
9474
        # Run rename script on newly imported instance
9475
        assert iobj.name == instance
9476
        feedback_fn("Running rename script for %s" % instance)
9477
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9478
                                                   self.source_instance_name,
9479
                                                   self.op.debug_level)
9480
        if result.fail_msg:
9481
          self.LogWarning("Failed to run rename script for %s on node"
9482
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9483

    
9484
      else:
9485
        # also checked in the prereq part
9486
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9487
                                     % self.op.mode)
9488

    
9489
    assert not self.owned_locks(locking.LEVEL_NODE_RES)
9490

    
9491
    if self.op.start:
9492
      iobj.admin_state = constants.ADMINST_UP
9493
      self.cfg.Update(iobj, feedback_fn)
9494
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9495
      feedback_fn("* starting instance...")
9496
      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9497
                                            False)
9498
      result.Raise("Could not start instance")
9499

    
9500
    return list(iobj.all_nodes)
9501

    
9502

    
9503
class LUInstanceConsole(NoHooksLU):
9504
  """Connect to an instance's console.
9505

9506
  This is somewhat special in that it returns the command line that
9507
  you need to run on the master node in order to connect to the
9508
  console.
9509

9510
  """
9511
  REQ_BGL = False
9512

    
9513
  def ExpandNames(self):
9514
    self.share_locks = _ShareAll()
9515
    self._ExpandAndLockInstance()
9516

    
9517
  def CheckPrereq(self):
9518
    """Check prerequisites.
9519

9520
    This checks that the instance is in the cluster.
9521

9522
    """
9523
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9524
    assert self.instance is not None, \
9525
      "Cannot retrieve locked instance %s" % self.op.instance_name
9526
    _CheckNodeOnline(self, self.instance.primary_node)
9527

    
9528
  def Exec(self, feedback_fn):
9529
    """Connect to the console of an instance
9530

9531
    """
9532
    instance = self.instance
9533
    node = instance.primary_node
9534

    
9535
    node_insts = self.rpc.call_instance_list([node],
9536
                                             [instance.hypervisor])[node]
9537
    node_insts.Raise("Can't get node information from %s" % node)
9538

    
9539
    if instance.name not in node_insts.payload:
9540
      if instance.admin_state == constants.ADMINST_UP:
9541
        state = constants.INSTST_ERRORDOWN
9542
      elif instance.admin_state == constants.ADMINST_DOWN:
9543
        state = constants.INSTST_ADMINDOWN
9544
      else:
9545
        state = constants.INSTST_ADMINOFFLINE
9546
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9547
                               (instance.name, state))
9548

    
9549
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9550

    
9551
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9552

    
9553

    
9554
def _GetInstanceConsole(cluster, instance):
9555
  """Returns console information for an instance.
9556

9557
  @type cluster: L{objects.Cluster}
9558
  @type instance: L{objects.Instance}
9559
  @rtype: dict
9560

9561
  """
9562
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9563
  # beparams and hvparams are passed separately, to avoid editing the
9564
  # instance and then saving the defaults in the instance itself.
9565
  hvparams = cluster.FillHV(instance)
9566
  beparams = cluster.FillBE(instance)
9567
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9568

    
9569
  assert console.instance == instance.name
9570
  assert console.Validate()
9571

    
9572
  return console.ToDict()
9573

    
9574

    
9575
class LUInstanceReplaceDisks(LogicalUnit):
9576
  """Replace the disks of an instance.
9577

9578
  """
9579
  HPATH = "mirrors-replace"
9580
  HTYPE = constants.HTYPE_INSTANCE
9581
  REQ_BGL = False
9582

    
9583
  def CheckArguments(self):
9584
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9585
                                  self.op.iallocator)
9586

    
9587
  def ExpandNames(self):
9588
    self._ExpandAndLockInstance()
9589

    
9590
    assert locking.LEVEL_NODE not in self.needed_locks
9591
    assert locking.LEVEL_NODE_RES not in self.needed_locks
9592
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9593

    
9594
    assert self.op.iallocator is None or self.op.remote_node is None, \
9595
      "Conflicting options"
9596

    
9597
    if self.op.remote_node is not None:
9598
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9599

    
9600
      # Warning: do not remove the locking of the new secondary here
9601
      # unless DRBD8.AddChildren is changed to work in parallel;
9602
      # currently it doesn't since parallel invocations of
9603
      # FindUnusedMinor will conflict
9604
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9605
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9606
    else:
9607
      self.needed_locks[locking.LEVEL_NODE] = []
9608
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9609

    
9610
      if self.op.iallocator is not None:
9611
        # iallocator will select a new node in the same group
9612
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9613

    
9614
    self.needed_locks[locking.LEVEL_NODE_RES] = []
9615

    
9616
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9617
                                   self.op.iallocator, self.op.remote_node,
9618
                                   self.op.disks, False, self.op.early_release)
9619

    
9620
    self.tasklets = [self.replacer]
9621

    
9622
  def DeclareLocks(self, level):
9623
    if level == locking.LEVEL_NODEGROUP:
9624
      assert self.op.remote_node is None
9625
      assert self.op.iallocator is not None
9626
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9627

    
9628
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9629
      # Lock all groups used by instance optimistically; this requires going
9630
      # via the node before it's locked, requiring verification later on
9631
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9632
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9633

    
9634
    elif level == locking.LEVEL_NODE:
9635
      if self.op.iallocator is not None:
9636
        assert self.op.remote_node is None
9637
        assert not self.needed_locks[locking.LEVEL_NODE]
9638

    
9639
        # Lock member nodes of all locked groups
9640
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9641
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9642
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9643
      else:
9644
        self._LockInstancesNodes()
9645
    elif level == locking.LEVEL_NODE_RES:
9646
      # Reuse node locks
9647
      self.needed_locks[locking.LEVEL_NODE_RES] = \
9648
        self.needed_locks[locking.LEVEL_NODE]
9649

    
9650
  def BuildHooksEnv(self):
9651
    """Build hooks env.
9652

9653
    This runs on the master, the primary and all the secondaries.
9654

9655
    """
9656
    instance = self.replacer.instance
9657
    env = {
9658
      "MODE": self.op.mode,
9659
      "NEW_SECONDARY": self.op.remote_node,
9660
      "OLD_SECONDARY": instance.secondary_nodes[0],
9661
      }
9662
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9663
    return env
9664

    
9665
  def BuildHooksNodes(self):
9666
    """Build hooks nodes.
9667

9668
    """
9669
    instance = self.replacer.instance
9670
    nl = [
9671
      self.cfg.GetMasterNode(),
9672
      instance.primary_node,
9673
      ]
9674
    if self.op.remote_node is not None:
9675
      nl.append(self.op.remote_node)
9676
    return nl, nl
9677

    
9678
  def CheckPrereq(self):
9679
    """Check prerequisites.
9680

9681
    """
9682
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9683
            self.op.iallocator is None)
9684

    
9685
    # Verify if node group locks are still correct
9686
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9687
    if owned_groups:
9688
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9689

    
9690
    return LogicalUnit.CheckPrereq(self)
9691

    
9692

    
9693
class TLReplaceDisks(Tasklet):
9694
  """Replaces disks for an instance.
9695

9696
  Note: Locking is not within the scope of this class.
9697

9698
  """
9699
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9700
               disks, delay_iallocator, early_release):
9701
    """Initializes this class.
9702

9703
    """
9704
    Tasklet.__init__(self, lu)
9705

    
9706
    # Parameters
9707
    self.instance_name = instance_name
9708
    self.mode = mode
9709
    self.iallocator_name = iallocator_name
9710
    self.remote_node = remote_node
9711
    self.disks = disks
9712
    self.delay_iallocator = delay_iallocator
9713
    self.early_release = early_release
9714

    
9715
    # Runtime data
9716
    self.instance = None
9717
    self.new_node = None
9718
    self.target_node = None
9719
    self.other_node = None
9720
    self.remote_node_info = None
9721
    self.node_secondary_ip = None
9722

    
9723
  @staticmethod
9724
  def CheckArguments(mode, remote_node, iallocator):
9725
    """Helper function for users of this class.
9726

9727
    """
9728
    # check for valid parameter combination
9729
    if mode == constants.REPLACE_DISK_CHG:
9730
      if remote_node is None and iallocator is None:
9731
        raise errors.OpPrereqError("When changing the secondary either an"
9732
                                   " iallocator script must be used or the"
9733
                                   " new node given", errors.ECODE_INVAL)
9734

    
9735
      if remote_node is not None and iallocator is not None:
9736
        raise errors.OpPrereqError("Give either the iallocator or the new"
9737
                                   " secondary, not both", errors.ECODE_INVAL)
9738

    
9739
    elif remote_node is not None or iallocator is not None:
9740
      # Not replacing the secondary
9741
      raise errors.OpPrereqError("The iallocator and new node options can"
9742
                                 " only be used when changing the"
9743
                                 " secondary node", errors.ECODE_INVAL)
9744

    
9745
  @staticmethod
9746
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9747
    """Compute a new secondary node using an IAllocator.
9748

9749
    """
9750
    ial = IAllocator(lu.cfg, lu.rpc,
9751
                     mode=constants.IALLOCATOR_MODE_RELOC,
9752
                     name=instance_name,
9753
                     relocate_from=list(relocate_from))
9754

    
9755
    ial.Run(iallocator_name)
9756

    
9757
    if not ial.success:
9758
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9759
                                 " %s" % (iallocator_name, ial.info),
9760
                                 errors.ECODE_NORES)
9761

    
9762
    if len(ial.result) != ial.required_nodes:
9763
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9764
                                 " of nodes (%s), required %s" %
9765
                                 (iallocator_name,
9766
                                  len(ial.result), ial.required_nodes),
9767
                                 errors.ECODE_FAULT)
9768

    
9769
    remote_node_name = ial.result[0]
9770

    
9771
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9772
               instance_name, remote_node_name)
9773

    
9774
    return remote_node_name
9775

    
9776
  def _FindFaultyDisks(self, node_name):
9777
    """Wrapper for L{_FindFaultyInstanceDisks}.
9778

9779
    """
9780
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9781
                                    node_name, True)
9782

    
9783
  def _CheckDisksActivated(self, instance):
9784
    """Checks if the instance disks are activated.
9785

9786
    @param instance: The instance to check disks
9787
    @return: True if they are activated, False otherwise
9788

9789
    """
9790
    nodes = instance.all_nodes
9791

    
9792
    for idx, dev in enumerate(instance.disks):
9793
      for node in nodes:
9794
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9795
        self.cfg.SetDiskID(dev, node)
9796

    
9797
        result = self.rpc.call_blockdev_find(node, dev)
9798

    
9799
        if result.offline:
9800
          continue
9801
        elif result.fail_msg or not result.payload:
9802
          return False
9803

    
9804
    return True
9805

    
9806
  def CheckPrereq(self):
9807
    """Check prerequisites.
9808

9809
    This checks that the instance is in the cluster.
9810

9811
    """
9812
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9813
    assert instance is not None, \
9814
      "Cannot retrieve locked instance %s" % self.instance_name
9815

    
9816
    if instance.disk_template != constants.DT_DRBD8:
9817
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9818
                                 " instances", errors.ECODE_INVAL)
9819

    
9820
    if len(instance.secondary_nodes) != 1:
9821
      raise errors.OpPrereqError("The instance has a strange layout,"
9822
                                 " expected one secondary but found %d" %
9823
                                 len(instance.secondary_nodes),
9824
                                 errors.ECODE_FAULT)
9825

    
9826
    if not self.delay_iallocator:
9827
      self._CheckPrereq2()
9828

    
9829
  def _CheckPrereq2(self):
9830
    """Check prerequisites, second part.
9831

9832
    This function should always be part of CheckPrereq. It was separated and is
9833
    now called from Exec because during node evacuation iallocator was only
9834
    called with an unmodified cluster model, not taking planned changes into
9835
    account.
9836

9837
    """
9838
    instance = self.instance
9839
    secondary_node = instance.secondary_nodes[0]
9840

    
9841
    if self.iallocator_name is None:
9842
      remote_node = self.remote_node
9843
    else:
9844
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9845
                                       instance.name, instance.secondary_nodes)
9846

    
9847
    if remote_node is None:
9848
      self.remote_node_info = None
9849
    else:
9850
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9851
             "Remote node '%s' is not locked" % remote_node
9852

    
9853
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9854
      assert self.remote_node_info is not None, \
9855
        "Cannot retrieve locked node %s" % remote_node
9856

    
9857
    if remote_node == self.instance.primary_node:
9858
      raise errors.OpPrereqError("The specified node is the primary node of"
9859
                                 " the instance", errors.ECODE_INVAL)
9860

    
9861
    if remote_node == secondary_node:
9862
      raise errors.OpPrereqError("The specified node is already the"
9863
                                 " secondary node of the instance",
9864
                                 errors.ECODE_INVAL)
9865

    
9866
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9867
                                    constants.REPLACE_DISK_CHG):
9868
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9869
                                 errors.ECODE_INVAL)
9870

    
9871
    if self.mode == constants.REPLACE_DISK_AUTO:
9872
      if not self._CheckDisksActivated(instance):
9873
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9874
                                   " first" % self.instance_name,
9875
                                   errors.ECODE_STATE)
9876
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9877
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9878

    
9879
      if faulty_primary and faulty_secondary:
9880
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9881
                                   " one node and can not be repaired"
9882
                                   " automatically" % self.instance_name,
9883
                                   errors.ECODE_STATE)
9884

    
9885
      if faulty_primary:
9886
        self.disks = faulty_primary
9887
        self.target_node = instance.primary_node
9888
        self.other_node = secondary_node
9889
        check_nodes = [self.target_node, self.other_node]
9890
      elif faulty_secondary:
9891
        self.disks = faulty_secondary
9892
        self.target_node = secondary_node
9893
        self.other_node = instance.primary_node
9894
        check_nodes = [self.target_node, self.other_node]
9895
      else:
9896
        self.disks = []
9897
        check_nodes = []
9898

    
9899
    else:
9900
      # Non-automatic modes
9901
      if self.mode == constants.REPLACE_DISK_PRI:
9902
        self.target_node = instance.primary_node
9903
        self.other_node = secondary_node
9904
        check_nodes = [self.target_node, self.other_node]
9905

    
9906
      elif self.mode == constants.REPLACE_DISK_SEC:
9907
        self.target_node = secondary_node
9908
        self.other_node = instance.primary_node
9909
        check_nodes = [self.target_node, self.other_node]
9910

    
9911
      elif self.mode == constants.REPLACE_DISK_CHG:
9912
        self.new_node = remote_node
9913
        self.other_node = instance.primary_node
9914
        self.target_node = secondary_node
9915
        check_nodes = [self.new_node, self.other_node]
9916

    
9917
        _CheckNodeNotDrained(self.lu, remote_node)
9918
        _CheckNodeVmCapable(self.lu, remote_node)
9919

    
9920
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9921
        assert old_node_info is not None
9922
        if old_node_info.offline and not self.early_release:
9923
          # doesn't make sense to delay the release
9924
          self.early_release = True
9925
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9926
                          " early-release mode", secondary_node)
9927

    
9928
      else:
9929
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9930
                                     self.mode)
9931

    
9932
      # If not specified all disks should be replaced
9933
      if not self.disks:
9934
        self.disks = range(len(self.instance.disks))
9935

    
9936
    for node in check_nodes:
9937
      _CheckNodeOnline(self.lu, node)
9938

    
9939
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9940
                                                          self.other_node,
9941
                                                          self.target_node]
9942
                              if node_name is not None)
9943

    
9944
    # Release unneeded node and node resource locks
9945
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9946
    _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
9947

    
9948
    # Release any owned node group
9949
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9950
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9951

    
9952
    # Check whether disks are valid
9953
    for disk_idx in self.disks:
9954
      instance.FindDisk(disk_idx)
9955

    
9956
    # Get secondary node IP addresses
9957
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9958
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9959

    
9960
  def Exec(self, feedback_fn):
9961
    """Execute disk replacement.
9962

9963
    This dispatches the disk replacement to the appropriate handler.
9964

9965
    """
9966
    if self.delay_iallocator:
9967
      self._CheckPrereq2()
9968

    
9969
    if __debug__:
9970
      # Verify owned locks before starting operation
9971
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9972
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9973
          ("Incorrect node locks, owning %s, expected %s" %
9974
           (owned_nodes, self.node_secondary_ip.keys()))
9975
      assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
9976
              self.lu.owned_locks(locking.LEVEL_NODE_RES))
9977

    
9978
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9979
      assert list(owned_instances) == [self.instance_name], \
9980
          "Instance '%s' not locked" % self.instance_name
9981

    
9982
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9983
          "Should not own any node group lock at this point"
9984

    
9985
    if not self.disks:
9986
      feedback_fn("No disks need replacement")
9987
      return
9988

    
9989
    feedback_fn("Replacing disk(s) %s for %s" %
9990
                (utils.CommaJoin(self.disks), self.instance.name))
9991

    
9992
    activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
9993

    
9994
    # Activate the instance disks if we're replacing them on a down instance
9995
    if activate_disks:
9996
      _StartInstanceDisks(self.lu, self.instance, True)
9997

    
9998
    try:
9999
      # Should we replace the secondary node?
10000
      if self.new_node is not None:
10001
        fn = self._ExecDrbd8Secondary
10002
      else:
10003
        fn = self._ExecDrbd8DiskOnly
10004

    
10005
      result = fn(feedback_fn)
10006
    finally:
10007
      # Deactivate the instance disks if we're replacing them on a
10008
      # down instance
10009
      if activate_disks:
10010
        _SafeShutdownInstanceDisks(self.lu, self.instance)
10011

    
10012
    assert not self.lu.owned_locks(locking.LEVEL_NODE)
10013

    
10014
    if __debug__:
10015
      # Verify owned locks
10016
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10017
      nodes = frozenset(self.node_secondary_ip)
10018
      assert ((self.early_release and not owned_nodes) or
10019
              (not self.early_release and not (set(owned_nodes) - nodes))), \
10020
        ("Not owning the correct locks, early_release=%s, owned=%r,"
10021
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
10022

    
10023
    return result
10024

    
10025
  def _CheckVolumeGroup(self, nodes):
10026
    self.lu.LogInfo("Checking volume groups")
10027

    
10028
    vgname = self.cfg.GetVGName()
10029

    
10030
    # Make sure volume group exists on all involved nodes
10031
    results = self.rpc.call_vg_list(nodes)
10032
    if not results:
10033
      raise errors.OpExecError("Can't list volume groups on the nodes")
10034

    
10035
    for node in nodes:
10036
      res = results[node]
10037
      res.Raise("Error checking node %s" % node)
10038
      if vgname not in res.payload:
10039
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
10040
                                 (vgname, node))
10041

    
10042
  def _CheckDisksExistence(self, nodes):
10043
    # Check disk existence
10044
    for idx, dev in enumerate(self.instance.disks):
10045
      if idx not in self.disks:
10046
        continue
10047

    
10048
      for node in nodes:
10049
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10050
        self.cfg.SetDiskID(dev, node)
10051

    
10052
        result = self.rpc.call_blockdev_find(node, dev)
10053

    
10054
        msg = result.fail_msg
10055
        if msg or not result.payload:
10056
          if not msg:
10057
            msg = "disk not found"
10058
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10059
                                   (idx, node, msg))
10060

    
10061
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10062
    for idx, dev in enumerate(self.instance.disks):
10063
      if idx not in self.disks:
10064
        continue
10065

    
10066
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10067
                      (idx, node_name))
10068

    
10069
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10070
                                   ldisk=ldisk):
10071
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10072
                                 " replace disks for instance %s" %
10073
                                 (node_name, self.instance.name))
10074

    
10075
  def _CreateNewStorage(self, node_name):
10076
    """Create new storage on the primary or secondary node.
10077

10078
    This is only used for same-node replaces, not for changing the
10079
    secondary node, hence we don't want to modify the existing disk.
10080

10081
    """
10082
    iv_names = {}
10083

    
10084
    for idx, dev in enumerate(self.instance.disks):
10085
      if idx not in self.disks:
10086
        continue
10087

    
10088
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10089

    
10090
      self.cfg.SetDiskID(dev, node_name)
10091

    
10092
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10093
      names = _GenerateUniqueNames(self.lu, lv_names)
10094

    
10095
      vg_data = dev.children[0].logical_id[0]
10096
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10097
                             logical_id=(vg_data, names[0]))
10098
      vg_meta = dev.children[1].logical_id[0]
10099
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10100
                             logical_id=(vg_meta, names[1]))
10101

    
10102
      new_lvs = [lv_data, lv_meta]
10103
      old_lvs = [child.Copy() for child in dev.children]
10104
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10105

    
10106
      # we pass force_create=True to force the LVM creation
10107
      for new_lv in new_lvs:
10108
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10109
                        _GetInstanceInfoText(self.instance), False)
10110

    
10111
    return iv_names
10112

    
10113
  def _CheckDevices(self, node_name, iv_names):
10114
    for name, (dev, _, _) in iv_names.iteritems():
10115
      self.cfg.SetDiskID(dev, node_name)
10116

    
10117
      result = self.rpc.call_blockdev_find(node_name, dev)
10118

    
10119
      msg = result.fail_msg
10120
      if msg or not result.payload:
10121
        if not msg:
10122
          msg = "disk not found"
10123
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
10124
                                 (name, msg))
10125

    
10126
      if result.payload.is_degraded:
10127
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
10128

    
10129
  def _RemoveOldStorage(self, node_name, iv_names):
10130
    for name, (_, old_lvs, _) in iv_names.iteritems():
10131
      self.lu.LogInfo("Remove logical volumes for %s" % name)
10132

    
10133
      for lv in old_lvs:
10134
        self.cfg.SetDiskID(lv, node_name)
10135

    
10136
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10137
        if msg:
10138
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
10139
                             hint="remove unused LVs manually")
10140

    
10141
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10142
    """Replace a disk on the primary or secondary for DRBD 8.
10143

10144
    The algorithm for replace is quite complicated:
10145

10146
      1. for each disk to be replaced:
10147

10148
        1. create new LVs on the target node with unique names
10149
        1. detach old LVs from the drbd device
10150
        1. rename old LVs to name_replaced.<time_t>
10151
        1. rename new LVs to old LVs
10152
        1. attach the new LVs (with the old names now) to the drbd device
10153

10154
      1. wait for sync across all devices
10155

10156
      1. for each modified disk:
10157

10158
        1. remove old LVs (which have the name name_replaces.<time_t>)
10159

10160
    Failures are not very well handled.
10161

10162
    """
10163
    steps_total = 6
10164

    
10165
    # Step: check device activation
10166
    self.lu.LogStep(1, steps_total, "Check device existence")
10167
    self._CheckDisksExistence([self.other_node, self.target_node])
10168
    self._CheckVolumeGroup([self.target_node, self.other_node])
10169

    
10170
    # Step: check other node consistency
10171
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10172
    self._CheckDisksConsistency(self.other_node,
10173
                                self.other_node == self.instance.primary_node,
10174
                                False)
10175

    
10176
    # Step: create new storage
10177
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10178
    iv_names = self._CreateNewStorage(self.target_node)
10179

    
10180
    # Step: for each lv, detach+rename*2+attach
10181
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10182
    for dev, old_lvs, new_lvs in iv_names.itervalues():
10183
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10184

    
10185
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10186
                                                     old_lvs)
10187
      result.Raise("Can't detach drbd from local storage on node"
10188
                   " %s for device %s" % (self.target_node, dev.iv_name))
10189
      #dev.children = []
10190
      #cfg.Update(instance)
10191

    
10192
      # ok, we created the new LVs, so now we know we have the needed
10193
      # storage; as such, we proceed on the target node to rename
10194
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10195
      # using the assumption that logical_id == physical_id (which in
10196
      # turn is the unique_id on that node)
10197

    
10198
      # FIXME(iustin): use a better name for the replaced LVs
10199
      temp_suffix = int(time.time())
10200
      ren_fn = lambda d, suff: (d.physical_id[0],
10201
                                d.physical_id[1] + "_replaced-%s" % suff)
10202

    
10203
      # Build the rename list based on what LVs exist on the node
10204
      rename_old_to_new = []
10205
      for to_ren in old_lvs:
10206
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10207
        if not result.fail_msg and result.payload:
10208
          # device exists
10209
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10210

    
10211
      self.lu.LogInfo("Renaming the old LVs on the target node")
10212
      result = self.rpc.call_blockdev_rename(self.target_node,
10213
                                             rename_old_to_new)
10214
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
10215

    
10216
      # Now we rename the new LVs to the old LVs
10217
      self.lu.LogInfo("Renaming the new LVs on the target node")
10218
      rename_new_to_old = [(new, old.physical_id)
10219
                           for old, new in zip(old_lvs, new_lvs)]
10220
      result = self.rpc.call_blockdev_rename(self.target_node,
10221
                                             rename_new_to_old)
10222
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
10223

    
10224
      # Intermediate steps of in memory modifications
10225
      for old, new in zip(old_lvs, new_lvs):
10226
        new.logical_id = old.logical_id
10227
        self.cfg.SetDiskID(new, self.target_node)
10228

    
10229
      # We need to modify old_lvs so that removal later removes the
10230
      # right LVs, not the newly added ones; note that old_lvs is a
10231
      # copy here
10232
      for disk in old_lvs:
10233
        disk.logical_id = ren_fn(disk, temp_suffix)
10234
        self.cfg.SetDiskID(disk, self.target_node)
10235

    
10236
      # Now that the new lvs have the old name, we can add them to the device
10237
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10238
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10239
                                                  new_lvs)
10240
      msg = result.fail_msg
10241
      if msg:
10242
        for new_lv in new_lvs:
10243
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
10244
                                               new_lv).fail_msg
10245
          if msg2:
10246
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10247
                               hint=("cleanup manually the unused logical"
10248
                                     "volumes"))
10249
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10250

    
10251
    cstep = itertools.count(5)
10252

    
10253
    if self.early_release:
10254
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10255
      self._RemoveOldStorage(self.target_node, iv_names)
10256
      # TODO: Check if releasing locks early still makes sense
10257
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10258
    else:
10259
      # Release all resource locks except those used by the instance
10260
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10261
                    keep=self.node_secondary_ip.keys())
10262

    
10263
    # Release all node locks while waiting for sync
10264
    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10265

    
10266
    # TODO: Can the instance lock be downgraded here? Take the optional disk
10267
    # shutdown in the caller into consideration.
10268

    
10269
    # Wait for sync
10270
    # This can fail as the old devices are degraded and _WaitForSync
10271
    # does a combined result over all disks, so we don't check its return value
10272
    self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10273
    _WaitForSync(self.lu, self.instance)
10274

    
10275
    # Check all devices manually
10276
    self._CheckDevices(self.instance.primary_node, iv_names)
10277

    
10278
    # Step: remove old storage
10279
    if not self.early_release:
10280
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10281
      self._RemoveOldStorage(self.target_node, iv_names)
10282

    
10283
  def _ExecDrbd8Secondary(self, feedback_fn):
10284
    """Replace the secondary node for DRBD 8.
10285

10286
    The algorithm for replace is quite complicated:
10287
      - for all disks of the instance:
10288
        - create new LVs on the new node with same names
10289
        - shutdown the drbd device on the old secondary
10290
        - disconnect the drbd network on the primary
10291
        - create the drbd device on the new secondary
10292
        - network attach the drbd on the primary, using an artifice:
10293
          the drbd code for Attach() will connect to the network if it
10294
          finds a device which is connected to the good local disks but
10295
          not network enabled
10296
      - wait for sync across all devices
10297
      - remove all disks from the old secondary
10298

10299
    Failures are not very well handled.
10300

10301
    """
10302
    steps_total = 6
10303

    
10304
    pnode = self.instance.primary_node
10305

    
10306
    # Step: check device activation
10307
    self.lu.LogStep(1, steps_total, "Check device existence")
10308
    self._CheckDisksExistence([self.instance.primary_node])
10309
    self._CheckVolumeGroup([self.instance.primary_node])
10310

    
10311
    # Step: check other node consistency
10312
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10313
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10314

    
10315
    # Step: create new storage
10316
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10317
    for idx, dev in enumerate(self.instance.disks):
10318
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10319
                      (self.new_node, idx))
10320
      # we pass force_create=True to force LVM creation
10321
      for new_lv in dev.children:
10322
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10323
                        _GetInstanceInfoText(self.instance), False)
10324

    
10325
    # Step 4: dbrd minors and drbd setups changes
10326
    # after this, we must manually remove the drbd minors on both the
10327
    # error and the success paths
10328
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10329
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10330
                                         for dev in self.instance.disks],
10331
                                        self.instance.name)
10332
    logging.debug("Allocated minors %r", minors)
10333

    
10334
    iv_names = {}
10335
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10336
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10337
                      (self.new_node, idx))
10338
      # create new devices on new_node; note that we create two IDs:
10339
      # one without port, so the drbd will be activated without
10340
      # networking information on the new node at this stage, and one
10341
      # with network, for the latter activation in step 4
10342
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10343
      if self.instance.primary_node == o_node1:
10344
        p_minor = o_minor1
10345
      else:
10346
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10347
        p_minor = o_minor2
10348

    
10349
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10350
                      p_minor, new_minor, o_secret)
10351
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10352
                    p_minor, new_minor, o_secret)
10353

    
10354
      iv_names[idx] = (dev, dev.children, new_net_id)
10355
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10356
                    new_net_id)
10357
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10358
                              logical_id=new_alone_id,
10359
                              children=dev.children,
10360
                              size=dev.size)
10361
      try:
10362
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10363
                              _GetInstanceInfoText(self.instance), False)
10364
      except errors.GenericError:
10365
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10366
        raise
10367

    
10368
    # We have new devices, shutdown the drbd on the old secondary
10369
    for idx, dev in enumerate(self.instance.disks):
10370
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10371
      self.cfg.SetDiskID(dev, self.target_node)
10372
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10373
      if msg:
10374
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10375
                           "node: %s" % (idx, msg),
10376
                           hint=("Please cleanup this device manually as"
10377
                                 " soon as possible"))
10378

    
10379
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10380
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10381
                                               self.instance.disks)[pnode]
10382

    
10383
    msg = result.fail_msg
10384
    if msg:
10385
      # detaches didn't succeed (unlikely)
10386
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10387
      raise errors.OpExecError("Can't detach the disks from the network on"
10388
                               " old node: %s" % (msg,))
10389

    
10390
    # if we managed to detach at least one, we update all the disks of
10391
    # the instance to point to the new secondary
10392
    self.lu.LogInfo("Updating instance configuration")
10393
    for dev, _, new_logical_id in iv_names.itervalues():
10394
      dev.logical_id = new_logical_id
10395
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10396

    
10397
    self.cfg.Update(self.instance, feedback_fn)
10398

    
10399
    # Release all node locks (the configuration has been updated)
10400
    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10401

    
10402
    # and now perform the drbd attach
10403
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10404
                    " (standalone => connected)")
10405
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10406
                                            self.new_node],
10407
                                           self.node_secondary_ip,
10408
                                           self.instance.disks,
10409
                                           self.instance.name,
10410
                                           False)
10411
    for to_node, to_result in result.items():
10412
      msg = to_result.fail_msg
10413
      if msg:
10414
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10415
                           to_node, msg,
10416
                           hint=("please do a gnt-instance info to see the"
10417
                                 " status of disks"))
10418

    
10419
    cstep = itertools.count(5)
10420

    
10421
    if self.early_release:
10422
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10423
      self._RemoveOldStorage(self.target_node, iv_names)
10424
      # TODO: Check if releasing locks early still makes sense
10425
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10426
    else:
10427
      # Release all resource locks except those used by the instance
10428
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10429
                    keep=self.node_secondary_ip.keys())
10430

    
10431
    # TODO: Can the instance lock be downgraded here? Take the optional disk
10432
    # shutdown in the caller into consideration.
10433

    
10434
    # Wait for sync
10435
    # This can fail as the old devices are degraded and _WaitForSync
10436
    # does a combined result over all disks, so we don't check its return value
10437
    self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10438
    _WaitForSync(self.lu, self.instance)
10439

    
10440
    # Check all devices manually
10441
    self._CheckDevices(self.instance.primary_node, iv_names)
10442

    
10443
    # Step: remove old storage
10444
    if not self.early_release:
10445
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10446
      self._RemoveOldStorage(self.target_node, iv_names)
10447

    
10448

    
10449
class LURepairNodeStorage(NoHooksLU):
10450
  """Repairs the volume group on a node.
10451

10452
  """
10453
  REQ_BGL = False
10454

    
10455
  def CheckArguments(self):
10456
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10457

    
10458
    storage_type = self.op.storage_type
10459

    
10460
    if (constants.SO_FIX_CONSISTENCY not in
10461
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10462
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10463
                                 " repaired" % storage_type,
10464
                                 errors.ECODE_INVAL)
10465

    
10466
  def ExpandNames(self):
10467
    self.needed_locks = {
10468
      locking.LEVEL_NODE: [self.op.node_name],
10469
      }
10470

    
10471
  def _CheckFaultyDisks(self, instance, node_name):
10472
    """Ensure faulty disks abort the opcode or at least warn."""
10473
    try:
10474
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10475
                                  node_name, True):
10476
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10477
                                   " node '%s'" % (instance.name, node_name),
10478
                                   errors.ECODE_STATE)
10479
    except errors.OpPrereqError, err:
10480
      if self.op.ignore_consistency:
10481
        self.proc.LogWarning(str(err.args[0]))
10482
      else:
10483
        raise
10484

    
10485
  def CheckPrereq(self):
10486
    """Check prerequisites.
10487

10488
    """
10489
    # Check whether any instance on this node has faulty disks
10490
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10491
      if inst.admin_state != constants.ADMINST_UP:
10492
        continue
10493
      check_nodes = set(inst.all_nodes)
10494
      check_nodes.discard(self.op.node_name)
10495
      for inst_node_name in check_nodes:
10496
        self._CheckFaultyDisks(inst, inst_node_name)
10497

    
10498
  def Exec(self, feedback_fn):
10499
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10500
                (self.op.name, self.op.node_name))
10501

    
10502
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10503
    result = self.rpc.call_storage_execute(self.op.node_name,
10504
                                           self.op.storage_type, st_args,
10505
                                           self.op.name,
10506
                                           constants.SO_FIX_CONSISTENCY)
10507
    result.Raise("Failed to repair storage unit '%s' on %s" %
10508
                 (self.op.name, self.op.node_name))
10509

    
10510

    
10511
class LUNodeEvacuate(NoHooksLU):
10512
  """Evacuates instances off a list of nodes.
10513

10514
  """
10515
  REQ_BGL = False
10516

    
10517
  _MODE2IALLOCATOR = {
10518
    constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10519
    constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10520
    constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10521
    }
10522
  assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10523
  assert (frozenset(_MODE2IALLOCATOR.values()) ==
10524
          constants.IALLOCATOR_NEVAC_MODES)
10525

    
10526
  def CheckArguments(self):
10527
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10528

    
10529
  def ExpandNames(self):
10530
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10531

    
10532
    if self.op.remote_node is not None:
10533
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10534
      assert self.op.remote_node
10535

    
10536
      if self.op.remote_node == self.op.node_name:
10537
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10538
                                   " secondary node", errors.ECODE_INVAL)
10539

    
10540
      if self.op.mode != constants.NODE_EVAC_SEC:
10541
        raise errors.OpPrereqError("Without the use of an iallocator only"
10542
                                   " secondary instances can be evacuated",
10543
                                   errors.ECODE_INVAL)
10544

    
10545
    # Declare locks
10546
    self.share_locks = _ShareAll()
10547
    self.needed_locks = {
10548
      locking.LEVEL_INSTANCE: [],
10549
      locking.LEVEL_NODEGROUP: [],
10550
      locking.LEVEL_NODE: [],
10551
      }
10552

    
10553
    # Determine nodes (via group) optimistically, needs verification once locks
10554
    # have been acquired
10555
    self.lock_nodes = self._DetermineNodes()
10556

    
10557
  def _DetermineNodes(self):
10558
    """Gets the list of nodes to operate on.
10559

10560
    """
10561
    if self.op.remote_node is None:
10562
      # Iallocator will choose any node(s) in the same group
10563
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10564
    else:
10565
      group_nodes = frozenset([self.op.remote_node])
10566

    
10567
    # Determine nodes to be locked
10568
    return set([self.op.node_name]) | group_nodes
10569

    
10570
  def _DetermineInstances(self):
10571
    """Builds list of instances to operate on.
10572

10573
    """
10574
    assert self.op.mode in constants.NODE_EVAC_MODES
10575

    
10576
    if self.op.mode == constants.NODE_EVAC_PRI:
10577
      # Primary instances only
10578
      inst_fn = _GetNodePrimaryInstances
10579
      assert self.op.remote_node is None, \
10580
        "Evacuating primary instances requires iallocator"
10581
    elif self.op.mode == constants.NODE_EVAC_SEC:
10582
      # Secondary instances only
10583
      inst_fn = _GetNodeSecondaryInstances
10584
    else:
10585
      # All instances
10586
      assert self.op.mode == constants.NODE_EVAC_ALL
10587
      inst_fn = _GetNodeInstances
10588
      # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10589
      # per instance
10590
      raise errors.OpPrereqError("Due to an issue with the iallocator"
10591
                                 " interface it is not possible to evacuate"
10592
                                 " all instances at once; specify explicitly"
10593
                                 " whether to evacuate primary or secondary"
10594
                                 " instances",
10595
                                 errors.ECODE_INVAL)
10596

    
10597
    return inst_fn(self.cfg, self.op.node_name)
10598

    
10599
  def DeclareLocks(self, level):
10600
    if level == locking.LEVEL_INSTANCE:
10601
      # Lock instances optimistically, needs verification once node and group
10602
      # locks have been acquired
10603
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10604
        set(i.name for i in self._DetermineInstances())
10605

    
10606
    elif level == locking.LEVEL_NODEGROUP:
10607
      # Lock node groups for all potential target nodes optimistically, needs
10608
      # verification once nodes have been acquired
10609
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10610
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10611

    
10612
    elif level == locking.LEVEL_NODE:
10613
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10614

    
10615
  def CheckPrereq(self):
10616
    # Verify locks
10617
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10618
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10619
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10620

    
10621
    need_nodes = self._DetermineNodes()
10622

    
10623
    if not owned_nodes.issuperset(need_nodes):
10624
      raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10625
                                 " locks were acquired, current nodes are"
10626
                                 " are '%s', used to be '%s'; retry the"
10627
                                 " operation" %
10628
                                 (self.op.node_name,
10629
                                  utils.CommaJoin(need_nodes),
10630
                                  utils.CommaJoin(owned_nodes)),
10631
                                 errors.ECODE_STATE)
10632

    
10633
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10634
    if owned_groups != wanted_groups:
10635
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10636
                               " current groups are '%s', used to be '%s';"
10637
                               " retry the operation" %
10638
                               (utils.CommaJoin(wanted_groups),
10639
                                utils.CommaJoin(owned_groups)))
10640

    
10641
    # Determine affected instances
10642
    self.instances = self._DetermineInstances()
10643
    self.instance_names = [i.name for i in self.instances]
10644

    
10645
    if set(self.instance_names) != owned_instances:
10646
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10647
                               " were acquired, current instances are '%s',"
10648
                               " used to be '%s'; retry the operation" %
10649
                               (self.op.node_name,
10650
                                utils.CommaJoin(self.instance_names),
10651
                                utils.CommaJoin(owned_instances)))
10652

    
10653
    if self.instance_names:
10654
      self.LogInfo("Evacuating instances from node '%s': %s",
10655
                   self.op.node_name,
10656
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10657
    else:
10658
      self.LogInfo("No instances to evacuate from node '%s'",
10659
                   self.op.node_name)
10660

    
10661
    if self.op.remote_node is not None:
10662
      for i in self.instances:
10663
        if i.primary_node == self.op.remote_node:
10664
          raise errors.OpPrereqError("Node %s is the primary node of"
10665
                                     " instance %s, cannot use it as"
10666
                                     " secondary" %
10667
                                     (self.op.remote_node, i.name),
10668
                                     errors.ECODE_INVAL)
10669

    
10670
  def Exec(self, feedback_fn):
10671
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10672

    
10673
    if not self.instance_names:
10674
      # No instances to evacuate
10675
      jobs = []
10676

    
10677
    elif self.op.iallocator is not None:
10678
      # TODO: Implement relocation to other group
10679
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10680
                       evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10681
                       instances=list(self.instance_names))
10682

    
10683
      ial.Run(self.op.iallocator)
10684

    
10685
      if not ial.success:
10686
        raise errors.OpPrereqError("Can't compute node evacuation using"
10687
                                   " iallocator '%s': %s" %
10688
                                   (self.op.iallocator, ial.info),
10689
                                   errors.ECODE_NORES)
10690

    
10691
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10692

    
10693
    elif self.op.remote_node is not None:
10694
      assert self.op.mode == constants.NODE_EVAC_SEC
10695
      jobs = [
10696
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10697
                                        remote_node=self.op.remote_node,
10698
                                        disks=[],
10699
                                        mode=constants.REPLACE_DISK_CHG,
10700
                                        early_release=self.op.early_release)]
10701
        for instance_name in self.instance_names
10702
        ]
10703

    
10704
    else:
10705
      raise errors.ProgrammerError("No iallocator or remote node")
10706

    
10707
    return ResultWithJobs(jobs)
10708

    
10709

    
10710
def _SetOpEarlyRelease(early_release, op):
10711
  """Sets C{early_release} flag on opcodes if available.
10712

10713
  """
10714
  try:
10715
    op.early_release = early_release
10716
  except AttributeError:
10717
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10718

    
10719
  return op
10720

    
10721

    
10722
def _NodeEvacDest(use_nodes, group, nodes):
10723
  """Returns group or nodes depending on caller's choice.
10724

10725
  """
10726
  if use_nodes:
10727
    return utils.CommaJoin(nodes)
10728
  else:
10729
    return group
10730

    
10731

    
10732
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10733
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10734

10735
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10736
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10737

10738
  @type lu: L{LogicalUnit}
10739
  @param lu: Logical unit instance
10740
  @type alloc_result: tuple/list
10741
  @param alloc_result: Result from iallocator
10742
  @type early_release: bool
10743
  @param early_release: Whether to release locks early if possible
10744
  @type use_nodes: bool
10745
  @param use_nodes: Whether to display node names instead of groups
10746

10747
  """
10748
  (moved, failed, jobs) = alloc_result
10749

    
10750
  if failed:
10751
    failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10752
                                 for (name, reason) in failed)
10753
    lu.LogWarning("Unable to evacuate instances %s", failreason)
10754
    raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10755

    
10756
  if moved:
10757
    lu.LogInfo("Instances to be moved: %s",
10758
               utils.CommaJoin("%s (to %s)" %
10759
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10760
                               for (name, group, nodes) in moved))
10761

    
10762
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10763
              map(opcodes.OpCode.LoadOpCode, ops))
10764
          for ops in jobs]
10765

    
10766

    
10767
class LUInstanceGrowDisk(LogicalUnit):
10768
  """Grow a disk of an instance.
10769

10770
  """
10771
  HPATH = "disk-grow"
10772
  HTYPE = constants.HTYPE_INSTANCE
10773
  REQ_BGL = False
10774

    
10775
  def ExpandNames(self):
10776
    self._ExpandAndLockInstance()
10777
    self.needed_locks[locking.LEVEL_NODE] = []
10778
    self.needed_locks[locking.LEVEL_NODE_RES] = []
10779
    self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
10780

    
10781
  def DeclareLocks(self, level):
10782
    if level == locking.LEVEL_NODE:
10783
      self._LockInstancesNodes()
10784
    elif level == locking.LEVEL_NODE_RES:
10785
      # Copy node locks
10786
      self.needed_locks[locking.LEVEL_NODE_RES] = \
10787
        self.needed_locks[locking.LEVEL_NODE][:]
10788

    
10789
  def BuildHooksEnv(self):
10790
    """Build hooks env.
10791

10792
    This runs on the master, the primary and all the secondaries.
10793

10794
    """
10795
    env = {
10796
      "DISK": self.op.disk,
10797
      "AMOUNT": self.op.amount,
10798
      }
10799
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10800
    return env
10801

    
10802
  def BuildHooksNodes(self):
10803
    """Build hooks nodes.
10804

10805
    """
10806
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10807
    return (nl, nl)
10808

    
10809
  def CheckPrereq(self):
10810
    """Check prerequisites.
10811

10812
    This checks that the instance is in the cluster.
10813

10814
    """
10815
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10816
    assert instance is not None, \
10817
      "Cannot retrieve locked instance %s" % self.op.instance_name
10818
    nodenames = list(instance.all_nodes)
10819
    for node in nodenames:
10820
      _CheckNodeOnline(self, node)
10821

    
10822
    self.instance = instance
10823

    
10824
    if instance.disk_template not in constants.DTS_GROWABLE:
10825
      raise errors.OpPrereqError("Instance's disk layout does not support"
10826
                                 " growing", errors.ECODE_INVAL)
10827

    
10828
    self.disk = instance.FindDisk(self.op.disk)
10829

    
10830
    if instance.disk_template not in (constants.DT_FILE,
10831
                                      constants.DT_SHARED_FILE):
10832
      # TODO: check the free disk space for file, when that feature will be
10833
      # supported
10834
      _CheckNodesFreeDiskPerVG(self, nodenames,
10835
                               self.disk.ComputeGrowth(self.op.amount))
10836

    
10837
  def Exec(self, feedback_fn):
10838
    """Execute disk grow.
10839

10840
    """
10841
    instance = self.instance
10842
    disk = self.disk
10843

    
10844
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10845
    assert (self.owned_locks(locking.LEVEL_NODE) ==
10846
            self.owned_locks(locking.LEVEL_NODE_RES))
10847

    
10848
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10849
    if not disks_ok:
10850
      raise errors.OpExecError("Cannot activate block device to grow")
10851

    
10852
    feedback_fn("Growing disk %s of instance '%s' by %s" %
10853
                (self.op.disk, instance.name,
10854
                 utils.FormatUnit(self.op.amount, "h")))
10855

    
10856
    # First run all grow ops in dry-run mode
10857
    for node in instance.all_nodes:
10858
      self.cfg.SetDiskID(disk, node)
10859
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10860
      result.Raise("Grow request failed to node %s" % node)
10861

    
10862
    # We know that (as far as we can test) operations across different
10863
    # nodes will succeed, time to run it for real
10864
    for node in instance.all_nodes:
10865
      self.cfg.SetDiskID(disk, node)
10866
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10867
      result.Raise("Grow request failed to node %s" % node)
10868

    
10869
      # TODO: Rewrite code to work properly
10870
      # DRBD goes into sync mode for a short amount of time after executing the
10871
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10872
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10873
      # time is a work-around.
10874
      time.sleep(5)
10875

    
10876
    disk.RecordGrow(self.op.amount)
10877
    self.cfg.Update(instance, feedback_fn)
10878

    
10879
    # Changes have been recorded, release node lock
10880
    _ReleaseLocks(self, locking.LEVEL_NODE)
10881

    
10882
    # Downgrade lock while waiting for sync
10883
    self.glm.downgrade(locking.LEVEL_INSTANCE)
10884

    
10885
    if self.op.wait_for_sync:
10886
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10887
      if disk_abort:
10888
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10889
                             " status; please check the instance")
10890
      if instance.admin_state != constants.ADMINST_UP:
10891
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10892
    elif instance.admin_state != constants.ADMINST_UP:
10893
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10894
                           " not supposed to be running because no wait for"
10895
                           " sync mode was requested")
10896

    
10897
    assert self.owned_locks(locking.LEVEL_NODE_RES)
10898
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10899

    
10900

    
10901
class LUInstanceQueryData(NoHooksLU):
10902
  """Query runtime instance data.
10903

10904
  """
10905
  REQ_BGL = False
10906

    
10907
  def ExpandNames(self):
10908
    self.needed_locks = {}
10909

    
10910
    # Use locking if requested or when non-static information is wanted
10911
    if not (self.op.static or self.op.use_locking):
10912
      self.LogWarning("Non-static data requested, locks need to be acquired")
10913
      self.op.use_locking = True
10914

    
10915
    if self.op.instances or not self.op.use_locking:
10916
      # Expand instance names right here
10917
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10918
    else:
10919
      # Will use acquired locks
10920
      self.wanted_names = None
10921

    
10922
    if self.op.use_locking:
10923
      self.share_locks = _ShareAll()
10924

    
10925
      if self.wanted_names is None:
10926
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10927
      else:
10928
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10929

    
10930
      self.needed_locks[locking.LEVEL_NODE] = []
10931
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10932

    
10933
  def DeclareLocks(self, level):
10934
    if self.op.use_locking and level == locking.LEVEL_NODE:
10935
      self._LockInstancesNodes()
10936

    
10937
  def CheckPrereq(self):
10938
    """Check prerequisites.
10939

10940
    This only checks the optional instance list against the existing names.
10941

10942
    """
10943
    if self.wanted_names is None:
10944
      assert self.op.use_locking, "Locking was not used"
10945
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10946

    
10947
    self.wanted_instances = \
10948
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10949

    
10950
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10951
    """Returns the status of a block device
10952

10953
    """
10954
    if self.op.static or not node:
10955
      return None
10956

    
10957
    self.cfg.SetDiskID(dev, node)
10958

    
10959
    result = self.rpc.call_blockdev_find(node, dev)
10960
    if result.offline:
10961
      return None
10962

    
10963
    result.Raise("Can't compute disk status for %s" % instance_name)
10964

    
10965
    status = result.payload
10966
    if status is None:
10967
      return None
10968

    
10969
    return (status.dev_path, status.major, status.minor,
10970
            status.sync_percent, status.estimated_time,
10971
            status.is_degraded, status.ldisk_status)
10972

    
10973
  def _ComputeDiskStatus(self, instance, snode, dev):
10974
    """Compute block device status.
10975

10976
    """
10977
    if dev.dev_type in constants.LDS_DRBD:
10978
      # we change the snode then (otherwise we use the one passed in)
10979
      if dev.logical_id[0] == instance.primary_node:
10980
        snode = dev.logical_id[1]
10981
      else:
10982
        snode = dev.logical_id[0]
10983

    
10984
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10985
                                              instance.name, dev)
10986
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10987

    
10988
    if dev.children:
10989
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10990
                                        instance, snode),
10991
                         dev.children)
10992
    else:
10993
      dev_children = []
10994

    
10995
    return {
10996
      "iv_name": dev.iv_name,
10997
      "dev_type": dev.dev_type,
10998
      "logical_id": dev.logical_id,
10999
      "physical_id": dev.physical_id,
11000
      "pstatus": dev_pstatus,
11001
      "sstatus": dev_sstatus,
11002
      "children": dev_children,
11003
      "mode": dev.mode,
11004
      "size": dev.size,
11005
      }
11006

    
11007
  def Exec(self, feedback_fn):
11008
    """Gather and return data"""
11009
    result = {}
11010

    
11011
    cluster = self.cfg.GetClusterInfo()
11012

    
11013
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11014
                                          for i in self.wanted_instances)
11015
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11016
      if self.op.static or pnode.offline:
11017
        remote_state = None
11018
        if pnode.offline:
11019
          self.LogWarning("Primary node %s is marked offline, returning static"
11020
                          " information only for instance %s" %
11021
                          (pnode.name, instance.name))
11022
      else:
11023
        remote_info = self.rpc.call_instance_info(instance.primary_node,
11024
                                                  instance.name,
11025
                                                  instance.hypervisor)
11026
        remote_info.Raise("Error checking node %s" % instance.primary_node)
11027
        remote_info = remote_info.payload
11028
        if remote_info and "state" in remote_info:
11029
          remote_state = "up"
11030
        else:
11031
          if instance.admin_state == constants.ADMINST_UP:
11032
            remote_state = "down"
11033
          else:
11034
            remote_state = instance.admin_state
11035

    
11036
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11037
                  instance.disks)
11038

    
11039
      result[instance.name] = {
11040
        "name": instance.name,
11041
        "config_state": instance.admin_state,
11042
        "run_state": remote_state,
11043
        "pnode": instance.primary_node,
11044
        "snodes": instance.secondary_nodes,
11045
        "os": instance.os,
11046
        # this happens to be the same format used for hooks
11047
        "nics": _NICListToTuple(self, instance.nics),
11048
        "disk_template": instance.disk_template,
11049
        "disks": disks,
11050
        "hypervisor": instance.hypervisor,
11051
        "network_port": instance.network_port,
11052
        "hv_instance": instance.hvparams,
11053
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
11054
        "be_instance": instance.beparams,
11055
        "be_actual": cluster.FillBE(instance),
11056
        "os_instance": instance.osparams,
11057
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11058
        "serial_no": instance.serial_no,
11059
        "mtime": instance.mtime,
11060
        "ctime": instance.ctime,
11061
        "uuid": instance.uuid,
11062
        }
11063

    
11064
    return result
11065

    
11066

    
11067
class LUInstanceSetParams(LogicalUnit):
11068
  """Modifies an instances's parameters.
11069

11070
  """
11071
  HPATH = "instance-modify"
11072
  HTYPE = constants.HTYPE_INSTANCE
11073
  REQ_BGL = False
11074

    
11075
  def CheckArguments(self):
11076
    if not (self.op.nics or self.op.disks or self.op.disk_template or
11077
            self.op.hvparams or self.op.beparams or self.op.os_name or
11078
            self.op.online_inst or self.op.offline_inst):
11079
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11080

    
11081
    if self.op.hvparams:
11082
      _CheckGlobalHvParams(self.op.hvparams)
11083

    
11084
    # Disk validation
11085
    disk_addremove = 0
11086
    for disk_op, disk_dict in self.op.disks:
11087
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11088
      if disk_op == constants.DDM_REMOVE:
11089
        disk_addremove += 1
11090
        continue
11091
      elif disk_op == constants.DDM_ADD:
11092
        disk_addremove += 1
11093
      else:
11094
        if not isinstance(disk_op, int):
11095
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11096
        if not isinstance(disk_dict, dict):
11097
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11098
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11099

    
11100
      if disk_op == constants.DDM_ADD:
11101
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11102
        if mode not in constants.DISK_ACCESS_SET:
11103
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11104
                                     errors.ECODE_INVAL)
11105
        size = disk_dict.get(constants.IDISK_SIZE, None)
11106
        if size is None:
11107
          raise errors.OpPrereqError("Required disk parameter size missing",
11108
                                     errors.ECODE_INVAL)
11109
        try:
11110
          size = int(size)
11111
        except (TypeError, ValueError), err:
11112
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11113
                                     str(err), errors.ECODE_INVAL)
11114
        disk_dict[constants.IDISK_SIZE] = size
11115
      else:
11116
        # modification of disk
11117
        if constants.IDISK_SIZE in disk_dict:
11118
          raise errors.OpPrereqError("Disk size change not possible, use"
11119
                                     " grow-disk", errors.ECODE_INVAL)
11120

    
11121
    if disk_addremove > 1:
11122
      raise errors.OpPrereqError("Only one disk add or remove operation"
11123
                                 " supported at a time", errors.ECODE_INVAL)
11124

    
11125
    if self.op.disks and self.op.disk_template is not None:
11126
      raise errors.OpPrereqError("Disk template conversion and other disk"
11127
                                 " changes not supported at the same time",
11128
                                 errors.ECODE_INVAL)
11129

    
11130
    if (self.op.disk_template and
11131
        self.op.disk_template in constants.DTS_INT_MIRROR and
11132
        self.op.remote_node is None):
11133
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
11134
                                 " one requires specifying a secondary node",
11135
                                 errors.ECODE_INVAL)
11136

    
11137
    # NIC validation
11138
    nic_addremove = 0
11139
    for nic_op, nic_dict in self.op.nics:
11140
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11141
      if nic_op == constants.DDM_REMOVE:
11142
        nic_addremove += 1
11143
        continue
11144
      elif nic_op == constants.DDM_ADD:
11145
        nic_addremove += 1
11146
      else:
11147
        if not isinstance(nic_op, int):
11148
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11149
        if not isinstance(nic_dict, dict):
11150
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11151
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11152

    
11153
      # nic_dict should be a dict
11154
      nic_ip = nic_dict.get(constants.INIC_IP, None)
11155
      if nic_ip is not None:
11156
        if nic_ip.lower() == constants.VALUE_NONE:
11157
          nic_dict[constants.INIC_IP] = None
11158
        else:
11159
          if not netutils.IPAddress.IsValid(nic_ip):
11160
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11161
                                       errors.ECODE_INVAL)
11162

    
11163
      nic_bridge = nic_dict.get("bridge", None)
11164
      nic_link = nic_dict.get(constants.INIC_LINK, None)
11165
      if nic_bridge and nic_link:
11166
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11167
                                   " at the same time", errors.ECODE_INVAL)
11168
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11169
        nic_dict["bridge"] = None
11170
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11171
        nic_dict[constants.INIC_LINK] = None
11172

    
11173
      if nic_op == constants.DDM_ADD:
11174
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
11175
        if nic_mac is None:
11176
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11177

    
11178
      if constants.INIC_MAC in nic_dict:
11179
        nic_mac = nic_dict[constants.INIC_MAC]
11180
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11181
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11182

    
11183
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11184
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11185
                                     " modifying an existing nic",
11186
                                     errors.ECODE_INVAL)
11187

    
11188
    if nic_addremove > 1:
11189
      raise errors.OpPrereqError("Only one NIC add or remove operation"
11190
                                 " supported at a time", errors.ECODE_INVAL)
11191

    
11192
  def ExpandNames(self):
11193
    self._ExpandAndLockInstance()
11194
    # Can't even acquire node locks in shared mode as upcoming changes in
11195
    # Ganeti 2.6 will start to modify the node object on disk conversion
11196
    self.needed_locks[locking.LEVEL_NODE] = []
11197
    self.needed_locks[locking.LEVEL_NODE_RES] = []
11198
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11199

    
11200
  def DeclareLocks(self, level):
11201
    if level == locking.LEVEL_NODE:
11202
      self._LockInstancesNodes()
11203
      if self.op.disk_template and self.op.remote_node:
11204
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11205
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11206
    elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11207
      # Copy node locks
11208
      self.needed_locks[locking.LEVEL_NODE_RES] = \
11209
        self.needed_locks[locking.LEVEL_NODE][:]
11210

    
11211
  def BuildHooksEnv(self):
11212
    """Build hooks env.
11213

11214
    This runs on the master, primary and secondaries.
11215

11216
    """
11217
    args = dict()
11218
    if constants.BE_MINMEM in self.be_new:
11219
      args["minmem"] = self.be_new[constants.BE_MINMEM]
11220
    if constants.BE_MAXMEM in self.be_new:
11221
      args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11222
    if constants.BE_VCPUS in self.be_new:
11223
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
11224
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11225
    # information at all.
11226
    if self.op.nics:
11227
      args["nics"] = []
11228
      nic_override = dict(self.op.nics)
11229
      for idx, nic in enumerate(self.instance.nics):
11230
        if idx in nic_override:
11231
          this_nic_override = nic_override[idx]
11232
        else:
11233
          this_nic_override = {}
11234
        if constants.INIC_IP in this_nic_override:
11235
          ip = this_nic_override[constants.INIC_IP]
11236
        else:
11237
          ip = nic.ip
11238
        if constants.INIC_MAC in this_nic_override:
11239
          mac = this_nic_override[constants.INIC_MAC]
11240
        else:
11241
          mac = nic.mac
11242
        if idx in self.nic_pnew:
11243
          nicparams = self.nic_pnew[idx]
11244
        else:
11245
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11246
        mode = nicparams[constants.NIC_MODE]
11247
        link = nicparams[constants.NIC_LINK]
11248
        args["nics"].append((ip, mac, mode, link))
11249
      if constants.DDM_ADD in nic_override:
11250
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11251
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11252
        nicparams = self.nic_pnew[constants.DDM_ADD]
11253
        mode = nicparams[constants.NIC_MODE]
11254
        link = nicparams[constants.NIC_LINK]
11255
        args["nics"].append((ip, mac, mode, link))
11256
      elif constants.DDM_REMOVE in nic_override:
11257
        del args["nics"][-1]
11258

    
11259
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11260
    if self.op.disk_template:
11261
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11262

    
11263
    return env
11264

    
11265
  def BuildHooksNodes(self):
11266
    """Build hooks nodes.
11267

11268
    """
11269
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11270
    return (nl, nl)
11271

    
11272
  def CheckPrereq(self):
11273
    """Check prerequisites.
11274

11275
    This only checks the instance list against the existing names.
11276

11277
    """
11278
    # checking the new params on the primary/secondary nodes
11279

    
11280
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11281
    cluster = self.cluster = self.cfg.GetClusterInfo()
11282
    assert self.instance is not None, \
11283
      "Cannot retrieve locked instance %s" % self.op.instance_name
11284
    pnode = instance.primary_node
11285
    nodelist = list(instance.all_nodes)
11286

    
11287
    # OS change
11288
    if self.op.os_name and not self.op.force:
11289
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11290
                      self.op.force_variant)
11291
      instance_os = self.op.os_name
11292
    else:
11293
      instance_os = instance.os
11294

    
11295
    if self.op.disk_template:
11296
      if instance.disk_template == self.op.disk_template:
11297
        raise errors.OpPrereqError("Instance already has disk template %s" %
11298
                                   instance.disk_template, errors.ECODE_INVAL)
11299

    
11300
      if (instance.disk_template,
11301
          self.op.disk_template) not in self._DISK_CONVERSIONS:
11302
        raise errors.OpPrereqError("Unsupported disk template conversion from"
11303
                                   " %s to %s" % (instance.disk_template,
11304
                                                  self.op.disk_template),
11305
                                   errors.ECODE_INVAL)
11306
      _CheckInstanceState(self, instance, INSTANCE_DOWN,
11307
                          msg="cannot change disk template")
11308
      if self.op.disk_template in constants.DTS_INT_MIRROR:
11309
        if self.op.remote_node == pnode:
11310
          raise errors.OpPrereqError("Given new secondary node %s is the same"
11311
                                     " as the primary node of the instance" %
11312
                                     self.op.remote_node, errors.ECODE_STATE)
11313
        _CheckNodeOnline(self, self.op.remote_node)
11314
        _CheckNodeNotDrained(self, self.op.remote_node)
11315
        # FIXME: here we assume that the old instance type is DT_PLAIN
11316
        assert instance.disk_template == constants.DT_PLAIN
11317
        disks = [{constants.IDISK_SIZE: d.size,
11318
                  constants.IDISK_VG: d.logical_id[0]}
11319
                 for d in instance.disks]
11320
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11321
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11322

    
11323
    # hvparams processing
11324
    if self.op.hvparams:
11325
      hv_type = instance.hypervisor
11326
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11327
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11328
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11329

    
11330
      # local check
11331
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11332
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11333
      self.hv_proposed = self.hv_new = hv_new # the new actual values
11334
      self.hv_inst = i_hvdict # the new dict (without defaults)
11335
    else:
11336
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11337
                                              instance.hvparams)
11338
      self.hv_new = self.hv_inst = {}
11339

    
11340
    # beparams processing
11341
    if self.op.beparams:
11342
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11343
                                   use_none=True)
11344
      objects.UpgradeBeParams(i_bedict)
11345
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11346
      be_new = cluster.SimpleFillBE(i_bedict)
11347
      self.be_proposed = self.be_new = be_new # the new actual values
11348
      self.be_inst = i_bedict # the new dict (without defaults)
11349
    else:
11350
      self.be_new = self.be_inst = {}
11351
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11352
    be_old = cluster.FillBE(instance)
11353

    
11354
    # CPU param validation -- checking every time a paramtere is
11355
    # changed to cover all cases where either CPU mask or vcpus have
11356
    # changed
11357
    if (constants.BE_VCPUS in self.be_proposed and
11358
        constants.HV_CPU_MASK in self.hv_proposed):
11359
      cpu_list = \
11360
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11361
      # Verify mask is consistent with number of vCPUs. Can skip this
11362
      # test if only 1 entry in the CPU mask, which means same mask
11363
      # is applied to all vCPUs.
11364
      if (len(cpu_list) > 1 and
11365
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11366
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11367
                                   " CPU mask [%s]" %
11368
                                   (self.be_proposed[constants.BE_VCPUS],
11369
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11370
                                   errors.ECODE_INVAL)
11371

    
11372
      # Only perform this test if a new CPU mask is given
11373
      if constants.HV_CPU_MASK in self.hv_new:
11374
        # Calculate the largest CPU number requested
11375
        max_requested_cpu = max(map(max, cpu_list))
11376
        # Check that all of the instance's nodes have enough physical CPUs to
11377
        # satisfy the requested CPU mask
11378
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11379
                                max_requested_cpu + 1, instance.hypervisor)
11380

    
11381
    # osparams processing
11382
    if self.op.osparams:
11383
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11384
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11385
      self.os_inst = i_osdict # the new dict (without defaults)
11386
    else:
11387
      self.os_inst = {}
11388

    
11389
    self.warn = []
11390

    
11391
    #TODO(dynmem): do the appropriate check involving MINMEM
11392
    if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11393
        be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11394
      mem_check_list = [pnode]
11395
      if be_new[constants.BE_AUTO_BALANCE]:
11396
        # either we changed auto_balance to yes or it was from before
11397
        mem_check_list.extend(instance.secondary_nodes)
11398
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11399
                                                  instance.hypervisor)
11400
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11401
                                         [instance.hypervisor])
11402
      pninfo = nodeinfo[pnode]
11403
      msg = pninfo.fail_msg
11404
      if msg:
11405
        # Assume the primary node is unreachable and go ahead
11406
        self.warn.append("Can't get info from primary node %s: %s" %
11407
                         (pnode, msg))
11408
      else:
11409
        (_, _, (pnhvinfo, )) = pninfo.payload
11410
        if not isinstance(pnhvinfo.get("memory_free", None), int):
11411
          self.warn.append("Node data from primary node %s doesn't contain"
11412
                           " free memory information" % pnode)
11413
        elif instance_info.fail_msg:
11414
          self.warn.append("Can't get instance runtime information: %s" %
11415
                          instance_info.fail_msg)
11416
        else:
11417
          if instance_info.payload:
11418
            current_mem = int(instance_info.payload["memory"])
11419
          else:
11420
            # Assume instance not running
11421
            # (there is a slight race condition here, but it's not very
11422
            # probable, and we have no other way to check)
11423
            # TODO: Describe race condition
11424
            current_mem = 0
11425
          #TODO(dynmem): do the appropriate check involving MINMEM
11426
          miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11427
                      pnhvinfo["memory_free"])
11428
          if miss_mem > 0:
11429
            raise errors.OpPrereqError("This change will prevent the instance"
11430
                                       " from starting, due to %d MB of memory"
11431
                                       " missing on its primary node" %
11432
                                       miss_mem,
11433
                                       errors.ECODE_NORES)
11434

    
11435
      if be_new[constants.BE_AUTO_BALANCE]:
11436
        for node, nres in nodeinfo.items():
11437
          if node not in instance.secondary_nodes:
11438
            continue
11439
          nres.Raise("Can't get info from secondary node %s" % node,
11440
                     prereq=True, ecode=errors.ECODE_STATE)
11441
          (_, _, (nhvinfo, )) = nres.payload
11442
          if not isinstance(nhvinfo.get("memory_free", None), int):
11443
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11444
                                       " memory information" % node,
11445
                                       errors.ECODE_STATE)
11446
          #TODO(dynmem): do the appropriate check involving MINMEM
11447
          elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11448
            raise errors.OpPrereqError("This change will prevent the instance"
11449
                                       " from failover to its secondary node"
11450
                                       " %s, due to not enough memory" % node,
11451
                                       errors.ECODE_STATE)
11452

    
11453
    # NIC processing
11454
    self.nic_pnew = {}
11455
    self.nic_pinst = {}
11456
    for nic_op, nic_dict in self.op.nics:
11457
      if nic_op == constants.DDM_REMOVE:
11458
        if not instance.nics:
11459
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11460
                                     errors.ECODE_INVAL)
11461
        continue
11462
      if nic_op != constants.DDM_ADD:
11463
        # an existing nic
11464
        if not instance.nics:
11465
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11466
                                     " no NICs" % nic_op,
11467
                                     errors.ECODE_INVAL)
11468
        if nic_op < 0 or nic_op >= len(instance.nics):
11469
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11470
                                     " are 0 to %d" %
11471
                                     (nic_op, len(instance.nics) - 1),
11472
                                     errors.ECODE_INVAL)
11473
        old_nic_params = instance.nics[nic_op].nicparams
11474
        old_nic_ip = instance.nics[nic_op].ip
11475
      else:
11476
        old_nic_params = {}
11477
        old_nic_ip = None
11478

    
11479
      update_params_dict = dict([(key, nic_dict[key])
11480
                                 for key in constants.NICS_PARAMETERS
11481
                                 if key in nic_dict])
11482

    
11483
      if "bridge" in nic_dict:
11484
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11485

    
11486
      new_nic_params = _GetUpdatedParams(old_nic_params,
11487
                                         update_params_dict)
11488
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11489
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11490
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11491
      self.nic_pinst[nic_op] = new_nic_params
11492
      self.nic_pnew[nic_op] = new_filled_nic_params
11493
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11494

    
11495
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11496
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11497
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11498
        if msg:
11499
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11500
          if self.op.force:
11501
            self.warn.append(msg)
11502
          else:
11503
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11504
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11505
        if constants.INIC_IP in nic_dict:
11506
          nic_ip = nic_dict[constants.INIC_IP]
11507
        else:
11508
          nic_ip = old_nic_ip
11509
        if nic_ip is None:
11510
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11511
                                     " on a routed nic", errors.ECODE_INVAL)
11512
      if constants.INIC_MAC in nic_dict:
11513
        nic_mac = nic_dict[constants.INIC_MAC]
11514
        if nic_mac is None:
11515
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11516
                                     errors.ECODE_INVAL)
11517
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11518
          # otherwise generate the mac
11519
          nic_dict[constants.INIC_MAC] = \
11520
            self.cfg.GenerateMAC(self.proc.GetECId())
11521
        else:
11522
          # or validate/reserve the current one
11523
          try:
11524
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11525
          except errors.ReservationError:
11526
            raise errors.OpPrereqError("MAC address %s already in use"
11527
                                       " in cluster" % nic_mac,
11528
                                       errors.ECODE_NOTUNIQUE)
11529

    
11530
    # DISK processing
11531
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11532
      raise errors.OpPrereqError("Disk operations not supported for"
11533
                                 " diskless instances",
11534
                                 errors.ECODE_INVAL)
11535
    for disk_op, _ in self.op.disks:
11536
      if disk_op == constants.DDM_REMOVE:
11537
        if len(instance.disks) == 1:
11538
          raise errors.OpPrereqError("Cannot remove the last disk of"
11539
                                     " an instance", errors.ECODE_INVAL)
11540
        _CheckInstanceState(self, instance, INSTANCE_DOWN,
11541
                            msg="cannot remove disks")
11542

    
11543
      if (disk_op == constants.DDM_ADD and
11544
          len(instance.disks) >= constants.MAX_DISKS):
11545
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11546
                                   " add more" % constants.MAX_DISKS,
11547
                                   errors.ECODE_STATE)
11548
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11549
        # an existing disk
11550
        if disk_op < 0 or disk_op >= len(instance.disks):
11551
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11552
                                     " are 0 to %d" %
11553
                                     (disk_op, len(instance.disks)),
11554
                                     errors.ECODE_INVAL)
11555

    
11556
    # disabling the instance
11557
    if self.op.offline_inst:
11558
      _CheckInstanceState(self, instance, INSTANCE_DOWN,
11559
                          msg="cannot change instance state to offline")
11560

    
11561
    # enabling the instance
11562
    if self.op.online_inst:
11563
      _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11564
                          msg="cannot make instance go online")
11565

    
11566
  def _ConvertPlainToDrbd(self, feedback_fn):
11567
    """Converts an instance from plain to drbd.
11568

11569
    """
11570
    feedback_fn("Converting template to drbd")
11571
    instance = self.instance
11572
    pnode = instance.primary_node
11573
    snode = self.op.remote_node
11574

    
11575
    assert instance.disk_template == constants.DT_PLAIN
11576

    
11577
    # create a fake disk info for _GenerateDiskTemplate
11578
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11579
                  constants.IDISK_VG: d.logical_id[0]}
11580
                 for d in instance.disks]
11581
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11582
                                      instance.name, pnode, [snode],
11583
                                      disk_info, None, None, 0, feedback_fn)
11584
    info = _GetInstanceInfoText(instance)
11585
    feedback_fn("Creating aditional volumes...")
11586
    # first, create the missing data and meta devices
11587
    for disk in new_disks:
11588
      # unfortunately this is... not too nice
11589
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11590
                            info, True)
11591
      for child in disk.children:
11592
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11593
    # at this stage, all new LVs have been created, we can rename the
11594
    # old ones
11595
    feedback_fn("Renaming original volumes...")
11596
    rename_list = [(o, n.children[0].logical_id)
11597
                   for (o, n) in zip(instance.disks, new_disks)]
11598
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11599
    result.Raise("Failed to rename original LVs")
11600

    
11601
    feedback_fn("Initializing DRBD devices...")
11602
    # all child devices are in place, we can now create the DRBD devices
11603
    for disk in new_disks:
11604
      for node in [pnode, snode]:
11605
        f_create = node == pnode
11606
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11607

    
11608
    # at this point, the instance has been modified
11609
    instance.disk_template = constants.DT_DRBD8
11610
    instance.disks = new_disks
11611
    self.cfg.Update(instance, feedback_fn)
11612

    
11613
    # Release node locks while waiting for sync
11614
    _ReleaseLocks(self, locking.LEVEL_NODE)
11615

    
11616
    # disks are created, waiting for sync
11617
    disk_abort = not _WaitForSync(self, instance,
11618
                                  oneshot=not self.op.wait_for_sync)
11619
    if disk_abort:
11620
      raise errors.OpExecError("There are some degraded disks for"
11621
                               " this instance, please cleanup manually")
11622

    
11623
    # Node resource locks will be released by caller
11624

    
11625
  def _ConvertDrbdToPlain(self, feedback_fn):
11626
    """Converts an instance from drbd to plain.
11627

11628
    """
11629
    instance = self.instance
11630

    
11631
    assert len(instance.secondary_nodes) == 1
11632
    assert instance.disk_template == constants.DT_DRBD8
11633

    
11634
    pnode = instance.primary_node
11635
    snode = instance.secondary_nodes[0]
11636
    feedback_fn("Converting template to plain")
11637

    
11638
    old_disks = instance.disks
11639
    new_disks = [d.children[0] for d in old_disks]
11640

    
11641
    # copy over size and mode
11642
    for parent, child in zip(old_disks, new_disks):
11643
      child.size = parent.size
11644
      child.mode = parent.mode
11645

    
11646
    # update instance structure
11647
    instance.disks = new_disks
11648
    instance.disk_template = constants.DT_PLAIN
11649
    self.cfg.Update(instance, feedback_fn)
11650

    
11651
    # Release locks in case removing disks takes a while
11652
    _ReleaseLocks(self, locking.LEVEL_NODE)
11653

    
11654
    feedback_fn("Removing volumes on the secondary node...")
11655
    for disk in old_disks:
11656
      self.cfg.SetDiskID(disk, snode)
11657
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11658
      if msg:
11659
        self.LogWarning("Could not remove block device %s on node %s,"
11660
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11661

    
11662
    feedback_fn("Removing unneeded volumes on the primary node...")
11663
    for idx, disk in enumerate(old_disks):
11664
      meta = disk.children[1]
11665
      self.cfg.SetDiskID(meta, pnode)
11666
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11667
      if msg:
11668
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11669
                        " continuing anyway: %s", idx, pnode, msg)
11670

    
11671
    # this is a DRBD disk, return its port to the pool
11672
    for disk in old_disks:
11673
      tcp_port = disk.logical_id[2]
11674
      self.cfg.AddTcpUdpPort(tcp_port)
11675

    
11676
    # Node resource locks will be released by caller
11677

    
11678
  def Exec(self, feedback_fn):
11679
    """Modifies an instance.
11680

11681
    All parameters take effect only at the next restart of the instance.
11682

11683
    """
11684
    # Process here the warnings from CheckPrereq, as we don't have a
11685
    # feedback_fn there.
11686
    for warn in self.warn:
11687
      feedback_fn("WARNING: %s" % warn)
11688

    
11689
    assert ((self.op.disk_template is None) ^
11690
            bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
11691
      "Not owning any node resource locks"
11692

    
11693
    result = []
11694
    instance = self.instance
11695
    # disk changes
11696
    for disk_op, disk_dict in self.op.disks:
11697
      if disk_op == constants.DDM_REMOVE:
11698
        # remove the last disk
11699
        device = instance.disks.pop()
11700
        device_idx = len(instance.disks)
11701
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11702
          self.cfg.SetDiskID(disk, node)
11703
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11704
          if msg:
11705
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11706
                            " continuing anyway", device_idx, node, msg)
11707
        result.append(("disk/%d" % device_idx, "remove"))
11708

    
11709
        # if this is a DRBD disk, return its port to the pool
11710
        if device.dev_type in constants.LDS_DRBD:
11711
          tcp_port = device.logical_id[2]
11712
          self.cfg.AddTcpUdpPort(tcp_port)
11713
      elif disk_op == constants.DDM_ADD:
11714
        # add a new disk
11715
        if instance.disk_template in (constants.DT_FILE,
11716
                                        constants.DT_SHARED_FILE):
11717
          file_driver, file_path = instance.disks[0].logical_id
11718
          file_path = os.path.dirname(file_path)
11719
        else:
11720
          file_driver = file_path = None
11721
        disk_idx_base = len(instance.disks)
11722
        new_disk = _GenerateDiskTemplate(self,
11723
                                         instance.disk_template,
11724
                                         instance.name, instance.primary_node,
11725
                                         instance.secondary_nodes,
11726
                                         [disk_dict],
11727
                                         file_path,
11728
                                         file_driver,
11729
                                         disk_idx_base, feedback_fn)[0]
11730
        instance.disks.append(new_disk)
11731
        info = _GetInstanceInfoText(instance)
11732

    
11733
        logging.info("Creating volume %s for instance %s",
11734
                     new_disk.iv_name, instance.name)
11735
        # Note: this needs to be kept in sync with _CreateDisks
11736
        #HARDCODE
11737
        for node in instance.all_nodes:
11738
          f_create = node == instance.primary_node
11739
          try:
11740
            _CreateBlockDev(self, node, instance, new_disk,
11741
                            f_create, info, f_create)
11742
          except errors.OpExecError, err:
11743
            self.LogWarning("Failed to create volume %s (%s) on"
11744
                            " node %s: %s",
11745
                            new_disk.iv_name, new_disk, node, err)
11746
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11747
                       (new_disk.size, new_disk.mode)))
11748
      else:
11749
        # change a given disk
11750
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11751
        result.append(("disk.mode/%d" % disk_op,
11752
                       disk_dict[constants.IDISK_MODE]))
11753

    
11754
    if self.op.disk_template:
11755
      if __debug__:
11756
        check_nodes = set(instance.all_nodes)
11757
        if self.op.remote_node:
11758
          check_nodes.add(self.op.remote_node)
11759
        for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
11760
          owned = self.owned_locks(level)
11761
          assert not (check_nodes - owned), \
11762
            ("Not owning the correct locks, owning %r, expected at least %r" %
11763
             (owned, check_nodes))
11764

    
11765
      r_shut = _ShutdownInstanceDisks(self, instance)
11766
      if not r_shut:
11767
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11768
                                 " proceed with disk template conversion")
11769
      mode = (instance.disk_template, self.op.disk_template)
11770
      try:
11771
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11772
      except:
11773
        self.cfg.ReleaseDRBDMinors(instance.name)
11774
        raise
11775
      result.append(("disk_template", self.op.disk_template))
11776

    
11777
      assert instance.disk_template == self.op.disk_template, \
11778
        ("Expected disk template '%s', found '%s'" %
11779
         (self.op.disk_template, instance.disk_template))
11780

    
11781
    # Release node and resource locks if there are any (they might already have
11782
    # been released during disk conversion)
11783
    _ReleaseLocks(self, locking.LEVEL_NODE)
11784
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11785

    
11786
    # NIC changes
11787
    for nic_op, nic_dict in self.op.nics:
11788
      if nic_op == constants.DDM_REMOVE:
11789
        # remove the last nic
11790
        del instance.nics[-1]
11791
        result.append(("nic.%d" % len(instance.nics), "remove"))
11792
      elif nic_op == constants.DDM_ADD:
11793
        # mac and bridge should be set, by now
11794
        mac = nic_dict[constants.INIC_MAC]
11795
        ip = nic_dict.get(constants.INIC_IP, None)
11796
        nicparams = self.nic_pinst[constants.DDM_ADD]
11797
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11798
        instance.nics.append(new_nic)
11799
        result.append(("nic.%d" % (len(instance.nics) - 1),
11800
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11801
                       (new_nic.mac, new_nic.ip,
11802
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11803
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11804
                       )))
11805
      else:
11806
        for key in (constants.INIC_MAC, constants.INIC_IP):
11807
          if key in nic_dict:
11808
            setattr(instance.nics[nic_op], key, nic_dict[key])
11809
        if nic_op in self.nic_pinst:
11810
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11811
        for key, val in nic_dict.iteritems():
11812
          result.append(("nic.%s/%d" % (key, nic_op), val))
11813

    
11814
    # hvparams changes
11815
    if self.op.hvparams:
11816
      instance.hvparams = self.hv_inst
11817
      for key, val in self.op.hvparams.iteritems():
11818
        result.append(("hv/%s" % key, val))
11819

    
11820
    # beparams changes
11821
    if self.op.beparams:
11822
      instance.beparams = self.be_inst
11823
      for key, val in self.op.beparams.iteritems():
11824
        result.append(("be/%s" % key, val))
11825

    
11826
    # OS change
11827
    if self.op.os_name:
11828
      instance.os = self.op.os_name
11829

    
11830
    # osparams changes
11831
    if self.op.osparams:
11832
      instance.osparams = self.os_inst
11833
      for key, val in self.op.osparams.iteritems():
11834
        result.append(("os/%s" % key, val))
11835

    
11836
    # online/offline instance
11837
    if self.op.online_inst:
11838
      self.cfg.MarkInstanceDown(instance.name)
11839
      result.append(("admin_state", constants.ADMINST_DOWN))
11840
    if self.op.offline_inst:
11841
      self.cfg.MarkInstanceOffline(instance.name)
11842
      result.append(("admin_state", constants.ADMINST_OFFLINE))
11843

    
11844
    self.cfg.Update(instance, feedback_fn)
11845

    
11846
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
11847
                self.owned_locks(locking.LEVEL_NODE)), \
11848
      "All node locks should have been released by now"
11849

    
11850
    return result
11851

    
11852
  _DISK_CONVERSIONS = {
11853
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11854
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11855
    }
11856

    
11857

    
11858
class LUInstanceChangeGroup(LogicalUnit):
11859
  HPATH = "instance-change-group"
11860
  HTYPE = constants.HTYPE_INSTANCE
11861
  REQ_BGL = False
11862

    
11863
  def ExpandNames(self):
11864
    self.share_locks = _ShareAll()
11865
    self.needed_locks = {
11866
      locking.LEVEL_NODEGROUP: [],
11867
      locking.LEVEL_NODE: [],
11868
      }
11869

    
11870
    self._ExpandAndLockInstance()
11871

    
11872
    if self.op.target_groups:
11873
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11874
                                  self.op.target_groups)
11875
    else:
11876
      self.req_target_uuids = None
11877

    
11878
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11879

    
11880
  def DeclareLocks(self, level):
11881
    if level == locking.LEVEL_NODEGROUP:
11882
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11883

    
11884
      if self.req_target_uuids:
11885
        lock_groups = set(self.req_target_uuids)
11886

    
11887
        # Lock all groups used by instance optimistically; this requires going
11888
        # via the node before it's locked, requiring verification later on
11889
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11890
        lock_groups.update(instance_groups)
11891
      else:
11892
        # No target groups, need to lock all of them
11893
        lock_groups = locking.ALL_SET
11894

    
11895
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11896

    
11897
    elif level == locking.LEVEL_NODE:
11898
      if self.req_target_uuids:
11899
        # Lock all nodes used by instances
11900
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11901
        self._LockInstancesNodes()
11902

    
11903
        # Lock all nodes in all potential target groups
11904
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11905
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11906
        member_nodes = [node_name
11907
                        for group in lock_groups
11908
                        for node_name in self.cfg.GetNodeGroup(group).members]
11909
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11910
      else:
11911
        # Lock all nodes as all groups are potential targets
11912
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11913

    
11914
  def CheckPrereq(self):
11915
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11916
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11917
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11918

    
11919
    assert (self.req_target_uuids is None or
11920
            owned_groups.issuperset(self.req_target_uuids))
11921
    assert owned_instances == set([self.op.instance_name])
11922

    
11923
    # Get instance information
11924
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11925

    
11926
    # Check if node groups for locked instance are still correct
11927
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11928
      ("Instance %s's nodes changed while we kept the lock" %
11929
       self.op.instance_name)
11930

    
11931
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11932
                                           owned_groups)
11933

    
11934
    if self.req_target_uuids:
11935
      # User requested specific target groups
11936
      self.target_uuids = self.req_target_uuids
11937
    else:
11938
      # All groups except those used by the instance are potential targets
11939
      self.target_uuids = owned_groups - inst_groups
11940

    
11941
    conflicting_groups = self.target_uuids & inst_groups
11942
    if conflicting_groups:
11943
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11944
                                 " used by the instance '%s'" %
11945
                                 (utils.CommaJoin(conflicting_groups),
11946
                                  self.op.instance_name),
11947
                                 errors.ECODE_INVAL)
11948

    
11949
    if not self.target_uuids:
11950
      raise errors.OpPrereqError("There are no possible target groups",
11951
                                 errors.ECODE_INVAL)
11952

    
11953
  def BuildHooksEnv(self):
11954
    """Build hooks env.
11955

11956
    """
11957
    assert self.target_uuids
11958

    
11959
    env = {
11960
      "TARGET_GROUPS": " ".join(self.target_uuids),
11961
      }
11962

    
11963
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11964

    
11965
    return env
11966

    
11967
  def BuildHooksNodes(self):
11968
    """Build hooks nodes.
11969

11970
    """
11971
    mn = self.cfg.GetMasterNode()
11972
    return ([mn], [mn])
11973

    
11974
  def Exec(self, feedback_fn):
11975
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11976

    
11977
    assert instances == [self.op.instance_name], "Instance not locked"
11978

    
11979
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11980
                     instances=instances, target_groups=list(self.target_uuids))
11981

    
11982
    ial.Run(self.op.iallocator)
11983

    
11984
    if not ial.success:
11985
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11986
                                 " instance '%s' using iallocator '%s': %s" %
11987
                                 (self.op.instance_name, self.op.iallocator,
11988
                                  ial.info),
11989
                                 errors.ECODE_NORES)
11990

    
11991
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11992

    
11993
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11994
                 " instance '%s'", len(jobs), self.op.instance_name)
11995

    
11996
    return ResultWithJobs(jobs)
11997

    
11998

    
11999
class LUBackupQuery(NoHooksLU):
12000
  """Query the exports list
12001

12002
  """
12003
  REQ_BGL = False
12004

    
12005
  def ExpandNames(self):
12006
    self.needed_locks = {}
12007
    self.share_locks[locking.LEVEL_NODE] = 1
12008
    if not self.op.nodes:
12009
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12010
    else:
12011
      self.needed_locks[locking.LEVEL_NODE] = \
12012
        _GetWantedNodes(self, self.op.nodes)
12013

    
12014
  def Exec(self, feedback_fn):
12015
    """Compute the list of all the exported system images.
12016

12017
    @rtype: dict
12018
    @return: a dictionary with the structure node->(export-list)
12019
        where export-list is a list of the instances exported on
12020
        that node.
12021

12022
    """
12023
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
12024
    rpcresult = self.rpc.call_export_list(self.nodes)
12025
    result = {}
12026
    for node in rpcresult:
12027
      if rpcresult[node].fail_msg:
12028
        result[node] = False
12029
      else:
12030
        result[node] = rpcresult[node].payload
12031

    
12032
    return result
12033

    
12034

    
12035
class LUBackupPrepare(NoHooksLU):
12036
  """Prepares an instance for an export and returns useful information.
12037

12038
  """
12039
  REQ_BGL = False
12040

    
12041
  def ExpandNames(self):
12042
    self._ExpandAndLockInstance()
12043

    
12044
  def CheckPrereq(self):
12045
    """Check prerequisites.
12046

12047
    """
12048
    instance_name = self.op.instance_name
12049

    
12050
    self.instance = self.cfg.GetInstanceInfo(instance_name)
12051
    assert self.instance is not None, \
12052
          "Cannot retrieve locked instance %s" % self.op.instance_name
12053
    _CheckNodeOnline(self, self.instance.primary_node)
12054

    
12055
    self._cds = _GetClusterDomainSecret()
12056

    
12057
  def Exec(self, feedback_fn):
12058
    """Prepares an instance for an export.
12059

12060
    """
12061
    instance = self.instance
12062

    
12063
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
12064
      salt = utils.GenerateSecret(8)
12065

    
12066
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12067
      result = self.rpc.call_x509_cert_create(instance.primary_node,
12068
                                              constants.RIE_CERT_VALIDITY)
12069
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
12070

    
12071
      (name, cert_pem) = result.payload
12072

    
12073
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12074
                                             cert_pem)
12075

    
12076
      return {
12077
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12078
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12079
                          salt),
12080
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12081
        }
12082

    
12083
    return None
12084

    
12085

    
12086
class LUBackupExport(LogicalUnit):
12087
  """Export an instance to an image in the cluster.
12088

12089
  """
12090
  HPATH = "instance-export"
12091
  HTYPE = constants.HTYPE_INSTANCE
12092
  REQ_BGL = False
12093

    
12094
  def CheckArguments(self):
12095
    """Check the arguments.
12096

12097
    """
12098
    self.x509_key_name = self.op.x509_key_name
12099
    self.dest_x509_ca_pem = self.op.destination_x509_ca
12100

    
12101
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
12102
      if not self.x509_key_name:
12103
        raise errors.OpPrereqError("Missing X509 key name for encryption",
12104
                                   errors.ECODE_INVAL)
12105

    
12106
      if not self.dest_x509_ca_pem:
12107
        raise errors.OpPrereqError("Missing destination X509 CA",
12108
                                   errors.ECODE_INVAL)
12109

    
12110
  def ExpandNames(self):
12111
    self._ExpandAndLockInstance()
12112

    
12113
    # Lock all nodes for local exports
12114
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12115
      # FIXME: lock only instance primary and destination node
12116
      #
12117
      # Sad but true, for now we have do lock all nodes, as we don't know where
12118
      # the previous export might be, and in this LU we search for it and
12119
      # remove it from its current node. In the future we could fix this by:
12120
      #  - making a tasklet to search (share-lock all), then create the
12121
      #    new one, then one to remove, after
12122
      #  - removing the removal operation altogether
12123
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12124

    
12125
  def DeclareLocks(self, level):
12126
    """Last minute lock declaration."""
12127
    # All nodes are locked anyway, so nothing to do here.
12128

    
12129
  def BuildHooksEnv(self):
12130
    """Build hooks env.
12131

12132
    This will run on the master, primary node and target node.
12133

12134
    """
12135
    env = {
12136
      "EXPORT_MODE": self.op.mode,
12137
      "EXPORT_NODE": self.op.target_node,
12138
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12139
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12140
      # TODO: Generic function for boolean env variables
12141
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12142
      }
12143

    
12144
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12145

    
12146
    return env
12147

    
12148
  def BuildHooksNodes(self):
12149
    """Build hooks nodes.
12150

12151
    """
12152
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12153

    
12154
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12155
      nl.append(self.op.target_node)
12156

    
12157
    return (nl, nl)
12158

    
12159
  def CheckPrereq(self):
12160
    """Check prerequisites.
12161

12162
    This checks that the instance and node names are valid.
12163

12164
    """
12165
    instance_name = self.op.instance_name
12166

    
12167
    self.instance = self.cfg.GetInstanceInfo(instance_name)
12168
    assert self.instance is not None, \
12169
          "Cannot retrieve locked instance %s" % self.op.instance_name
12170
    _CheckNodeOnline(self, self.instance.primary_node)
12171

    
12172
    if (self.op.remove_instance and
12173
        self.instance.admin_state == constants.ADMINST_UP and
12174
        not self.op.shutdown):
12175
      raise errors.OpPrereqError("Can not remove instance without shutting it"
12176
                                 " down before")
12177

    
12178
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12179
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12180
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12181
      assert self.dst_node is not None
12182

    
12183
      _CheckNodeOnline(self, self.dst_node.name)
12184
      _CheckNodeNotDrained(self, self.dst_node.name)
12185

    
12186
      self._cds = None
12187
      self.dest_disk_info = None
12188
      self.dest_x509_ca = None
12189

    
12190
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12191
      self.dst_node = None
12192

    
12193
      if len(self.op.target_node) != len(self.instance.disks):
12194
        raise errors.OpPrereqError(("Received destination information for %s"
12195
                                    " disks, but instance %s has %s disks") %
12196
                                   (len(self.op.target_node), instance_name,
12197
                                    len(self.instance.disks)),
12198
                                   errors.ECODE_INVAL)
12199

    
12200
      cds = _GetClusterDomainSecret()
12201

    
12202
      # Check X509 key name
12203
      try:
12204
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12205
      except (TypeError, ValueError), err:
12206
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12207

    
12208
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12209
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12210
                                   errors.ECODE_INVAL)
12211

    
12212
      # Load and verify CA
12213
      try:
12214
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12215
      except OpenSSL.crypto.Error, err:
12216
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12217
                                   (err, ), errors.ECODE_INVAL)
12218

    
12219
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12220
      if errcode is not None:
12221
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12222
                                   (msg, ), errors.ECODE_INVAL)
12223

    
12224
      self.dest_x509_ca = cert
12225

    
12226
      # Verify target information
12227
      disk_info = []
12228
      for idx, disk_data in enumerate(self.op.target_node):
12229
        try:
12230
          (host, port, magic) = \
12231
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12232
        except errors.GenericError, err:
12233
          raise errors.OpPrereqError("Target info for disk %s: %s" %
12234
                                     (idx, err), errors.ECODE_INVAL)
12235

    
12236
        disk_info.append((host, port, magic))
12237

    
12238
      assert len(disk_info) == len(self.op.target_node)
12239
      self.dest_disk_info = disk_info
12240

    
12241
    else:
12242
      raise errors.ProgrammerError("Unhandled export mode %r" %
12243
                                   self.op.mode)
12244

    
12245
    # instance disk type verification
12246
    # TODO: Implement export support for file-based disks
12247
    for disk in self.instance.disks:
12248
      if disk.dev_type == constants.LD_FILE:
12249
        raise errors.OpPrereqError("Export not supported for instances with"
12250
                                   " file-based disks", errors.ECODE_INVAL)
12251

    
12252
  def _CleanupExports(self, feedback_fn):
12253
    """Removes exports of current instance from all other nodes.
12254

12255
    If an instance in a cluster with nodes A..D was exported to node C, its
12256
    exports will be removed from the nodes A, B and D.
12257

12258
    """
12259
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
12260

    
12261
    nodelist = self.cfg.GetNodeList()
12262
    nodelist.remove(self.dst_node.name)
12263

    
12264
    # on one-node clusters nodelist will be empty after the removal
12265
    # if we proceed the backup would be removed because OpBackupQuery
12266
    # substitutes an empty list with the full cluster node list.
12267
    iname = self.instance.name
12268
    if nodelist:
12269
      feedback_fn("Removing old exports for instance %s" % iname)
12270
      exportlist = self.rpc.call_export_list(nodelist)
12271
      for node in exportlist:
12272
        if exportlist[node].fail_msg:
12273
          continue
12274
        if iname in exportlist[node].payload:
12275
          msg = self.rpc.call_export_remove(node, iname).fail_msg
12276
          if msg:
12277
            self.LogWarning("Could not remove older export for instance %s"
12278
                            " on node %s: %s", iname, node, msg)
12279

    
12280
  def Exec(self, feedback_fn):
12281
    """Export an instance to an image in the cluster.
12282

12283
    """
12284
    assert self.op.mode in constants.EXPORT_MODES
12285

    
12286
    instance = self.instance
12287
    src_node = instance.primary_node
12288

    
12289
    if self.op.shutdown:
12290
      # shutdown the instance, but not the disks
12291
      feedback_fn("Shutting down instance %s" % instance.name)
12292
      result = self.rpc.call_instance_shutdown(src_node, instance,
12293
                                               self.op.shutdown_timeout)
12294
      # TODO: Maybe ignore failures if ignore_remove_failures is set
12295
      result.Raise("Could not shutdown instance %s on"
12296
                   " node %s" % (instance.name, src_node))
12297

    
12298
    # set the disks ID correctly since call_instance_start needs the
12299
    # correct drbd minor to create the symlinks
12300
    for disk in instance.disks:
12301
      self.cfg.SetDiskID(disk, src_node)
12302

    
12303
    activate_disks = (instance.admin_state != constants.ADMINST_UP)
12304

    
12305
    if activate_disks:
12306
      # Activate the instance disks if we'exporting a stopped instance
12307
      feedback_fn("Activating disks for %s" % instance.name)
12308
      _StartInstanceDisks(self, instance, None)
12309

    
12310
    try:
12311
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12312
                                                     instance)
12313

    
12314
      helper.CreateSnapshots()
12315
      try:
12316
        if (self.op.shutdown and
12317
            instance.admin_state == constants.ADMINST_UP and
12318
            not self.op.remove_instance):
12319
          assert not activate_disks
12320
          feedback_fn("Starting instance %s" % instance.name)
12321
          result = self.rpc.call_instance_start(src_node,
12322
                                                (instance, None, None), False)
12323
          msg = result.fail_msg
12324
          if msg:
12325
            feedback_fn("Failed to start instance: %s" % msg)
12326
            _ShutdownInstanceDisks(self, instance)
12327
            raise errors.OpExecError("Could not start instance: %s" % msg)
12328

    
12329
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
12330
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12331
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12332
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
12333
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12334

    
12335
          (key_name, _, _) = self.x509_key_name
12336

    
12337
          dest_ca_pem = \
12338
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12339
                                            self.dest_x509_ca)
12340

    
12341
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12342
                                                     key_name, dest_ca_pem,
12343
                                                     timeouts)
12344
      finally:
12345
        helper.Cleanup()
12346

    
12347
      # Check for backwards compatibility
12348
      assert len(dresults) == len(instance.disks)
12349
      assert compat.all(isinstance(i, bool) for i in dresults), \
12350
             "Not all results are boolean: %r" % dresults
12351

    
12352
    finally:
12353
      if activate_disks:
12354
        feedback_fn("Deactivating disks for %s" % instance.name)
12355
        _ShutdownInstanceDisks(self, instance)
12356

    
12357
    if not (compat.all(dresults) and fin_resu):
12358
      failures = []
12359
      if not fin_resu:
12360
        failures.append("export finalization")
12361
      if not compat.all(dresults):
12362
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12363
                               if not dsk)
12364
        failures.append("disk export: disk(s) %s" % fdsk)
12365

    
12366
      raise errors.OpExecError("Export failed, errors in %s" %
12367
                               utils.CommaJoin(failures))
12368

    
12369
    # At this point, the export was successful, we can cleanup/finish
12370

    
12371
    # Remove instance if requested
12372
    if self.op.remove_instance:
12373
      feedback_fn("Removing instance %s" % instance.name)
12374
      _RemoveInstance(self, feedback_fn, instance,
12375
                      self.op.ignore_remove_failures)
12376

    
12377
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12378
      self._CleanupExports(feedback_fn)
12379

    
12380
    return fin_resu, dresults
12381

    
12382

    
12383
class LUBackupRemove(NoHooksLU):
12384
  """Remove exports related to the named instance.
12385

12386
  """
12387
  REQ_BGL = False
12388

    
12389
  def ExpandNames(self):
12390
    self.needed_locks = {}
12391
    # We need all nodes to be locked in order for RemoveExport to work, but we
12392
    # don't need to lock the instance itself, as nothing will happen to it (and
12393
    # we can remove exports also for a removed instance)
12394
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12395

    
12396
  def Exec(self, feedback_fn):
12397
    """Remove any export.
12398

12399
    """
12400
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12401
    # If the instance was not found we'll try with the name that was passed in.
12402
    # This will only work if it was an FQDN, though.
12403
    fqdn_warn = False
12404
    if not instance_name:
12405
      fqdn_warn = True
12406
      instance_name = self.op.instance_name
12407

    
12408
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12409
    exportlist = self.rpc.call_export_list(locked_nodes)
12410
    found = False
12411
    for node in exportlist:
12412
      msg = exportlist[node].fail_msg
12413
      if msg:
12414
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12415
        continue
12416
      if instance_name in exportlist[node].payload:
12417
        found = True
12418
        result = self.rpc.call_export_remove(node, instance_name)
12419
        msg = result.fail_msg
12420
        if msg:
12421
          logging.error("Could not remove export for instance %s"
12422
                        " on node %s: %s", instance_name, node, msg)
12423

    
12424
    if fqdn_warn and not found:
12425
      feedback_fn("Export not found. If trying to remove an export belonging"
12426
                  " to a deleted instance please use its Fully Qualified"
12427
                  " Domain Name.")
12428

    
12429

    
12430
class LUGroupAdd(LogicalUnit):
12431
  """Logical unit for creating node groups.
12432

12433
  """
12434
  HPATH = "group-add"
12435
  HTYPE = constants.HTYPE_GROUP
12436
  REQ_BGL = False
12437

    
12438
  def ExpandNames(self):
12439
    # We need the new group's UUID here so that we can create and acquire the
12440
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12441
    # that it should not check whether the UUID exists in the configuration.
12442
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12443
    self.needed_locks = {}
12444
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12445

    
12446
  def CheckPrereq(self):
12447
    """Check prerequisites.
12448

12449
    This checks that the given group name is not an existing node group
12450
    already.
12451

12452
    """
12453
    try:
12454
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12455
    except errors.OpPrereqError:
12456
      pass
12457
    else:
12458
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12459
                                 " node group (UUID: %s)" %
12460
                                 (self.op.group_name, existing_uuid),
12461
                                 errors.ECODE_EXISTS)
12462

    
12463
    if self.op.ndparams:
12464
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12465

    
12466
  def BuildHooksEnv(self):
12467
    """Build hooks env.
12468

12469
    """
12470
    return {
12471
      "GROUP_NAME": self.op.group_name,
12472
      }
12473

    
12474
  def BuildHooksNodes(self):
12475
    """Build hooks nodes.
12476

12477
    """
12478
    mn = self.cfg.GetMasterNode()
12479
    return ([mn], [mn])
12480

    
12481
  def Exec(self, feedback_fn):
12482
    """Add the node group to the cluster.
12483

12484
    """
12485
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12486
                                  uuid=self.group_uuid,
12487
                                  alloc_policy=self.op.alloc_policy,
12488
                                  ndparams=self.op.ndparams)
12489

    
12490
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12491
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12492

    
12493

    
12494
class LUGroupAssignNodes(NoHooksLU):
12495
  """Logical unit for assigning nodes to groups.
12496

12497
  """
12498
  REQ_BGL = False
12499

    
12500
  def ExpandNames(self):
12501
    # These raise errors.OpPrereqError on their own:
12502
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12503
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12504

    
12505
    # We want to lock all the affected nodes and groups. We have readily
12506
    # available the list of nodes, and the *destination* group. To gather the
12507
    # list of "source" groups, we need to fetch node information later on.
12508
    self.needed_locks = {
12509
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12510
      locking.LEVEL_NODE: self.op.nodes,
12511
      }
12512

    
12513
  def DeclareLocks(self, level):
12514
    if level == locking.LEVEL_NODEGROUP:
12515
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12516

    
12517
      # Try to get all affected nodes' groups without having the group or node
12518
      # lock yet. Needs verification later in the code flow.
12519
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12520

    
12521
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12522

    
12523
  def CheckPrereq(self):
12524
    """Check prerequisites.
12525

12526
    """
12527
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12528
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12529
            frozenset(self.op.nodes))
12530

    
12531
    expected_locks = (set([self.group_uuid]) |
12532
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12533
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12534
    if actual_locks != expected_locks:
12535
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12536
                               " current groups are '%s', used to be '%s'" %
12537
                               (utils.CommaJoin(expected_locks),
12538
                                utils.CommaJoin(actual_locks)))
12539

    
12540
    self.node_data = self.cfg.GetAllNodesInfo()
12541
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12542
    instance_data = self.cfg.GetAllInstancesInfo()
12543

    
12544
    if self.group is None:
12545
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12546
                               (self.op.group_name, self.group_uuid))
12547

    
12548
    (new_splits, previous_splits) = \
12549
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12550
                                             for node in self.op.nodes],
12551
                                            self.node_data, instance_data)
12552

    
12553
    if new_splits:
12554
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12555

    
12556
      if not self.op.force:
12557
        raise errors.OpExecError("The following instances get split by this"
12558
                                 " change and --force was not given: %s" %
12559
                                 fmt_new_splits)
12560
      else:
12561
        self.LogWarning("This operation will split the following instances: %s",
12562
                        fmt_new_splits)
12563

    
12564
        if previous_splits:
12565
          self.LogWarning("In addition, these already-split instances continue"
12566
                          " to be split across groups: %s",
12567
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12568

    
12569
  def Exec(self, feedback_fn):
12570
    """Assign nodes to a new group.
12571

12572
    """
12573
    mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12574

    
12575
    self.cfg.AssignGroupNodes(mods)
12576

    
12577
  @staticmethod
12578
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12579
    """Check for split instances after a node assignment.
12580

12581
    This method considers a series of node assignments as an atomic operation,
12582
    and returns information about split instances after applying the set of
12583
    changes.
12584

12585
    In particular, it returns information about newly split instances, and
12586
    instances that were already split, and remain so after the change.
12587

12588
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12589
    considered.
12590

12591
    @type changes: list of (node_name, new_group_uuid) pairs.
12592
    @param changes: list of node assignments to consider.
12593
    @param node_data: a dict with data for all nodes
12594
    @param instance_data: a dict with all instances to consider
12595
    @rtype: a two-tuple
12596
    @return: a list of instances that were previously okay and result split as a
12597
      consequence of this change, and a list of instances that were previously
12598
      split and this change does not fix.
12599

12600
    """
12601
    changed_nodes = dict((node, group) for node, group in changes
12602
                         if node_data[node].group != group)
12603

    
12604
    all_split_instances = set()
12605
    previously_split_instances = set()
12606

    
12607
    def InstanceNodes(instance):
12608
      return [instance.primary_node] + list(instance.secondary_nodes)
12609

    
12610
    for inst in instance_data.values():
12611
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12612
        continue
12613

    
12614
      instance_nodes = InstanceNodes(inst)
12615

    
12616
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12617
        previously_split_instances.add(inst.name)
12618

    
12619
      if len(set(changed_nodes.get(node, node_data[node].group)
12620
                 for node in instance_nodes)) > 1:
12621
        all_split_instances.add(inst.name)
12622

    
12623
    return (list(all_split_instances - previously_split_instances),
12624
            list(previously_split_instances & all_split_instances))
12625

    
12626

    
12627
class _GroupQuery(_QueryBase):
12628
  FIELDS = query.GROUP_FIELDS
12629

    
12630
  def ExpandNames(self, lu):
12631
    lu.needed_locks = {}
12632

    
12633
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12634
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12635

    
12636
    if not self.names:
12637
      self.wanted = [name_to_uuid[name]
12638
                     for name in utils.NiceSort(name_to_uuid.keys())]
12639
    else:
12640
      # Accept names to be either names or UUIDs.
12641
      missing = []
12642
      self.wanted = []
12643
      all_uuid = frozenset(self._all_groups.keys())
12644

    
12645
      for name in self.names:
12646
        if name in all_uuid:
12647
          self.wanted.append(name)
12648
        elif name in name_to_uuid:
12649
          self.wanted.append(name_to_uuid[name])
12650
        else:
12651
          missing.append(name)
12652

    
12653
      if missing:
12654
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12655
                                   utils.CommaJoin(missing),
12656
                                   errors.ECODE_NOENT)
12657

    
12658
  def DeclareLocks(self, lu, level):
12659
    pass
12660

    
12661
  def _GetQueryData(self, lu):
12662
    """Computes the list of node groups and their attributes.
12663

12664
    """
12665
    do_nodes = query.GQ_NODE in self.requested_data
12666
    do_instances = query.GQ_INST in self.requested_data
12667

    
12668
    group_to_nodes = None
12669
    group_to_instances = None
12670

    
12671
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12672
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12673
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12674
    # instance->node. Hence, we will need to process nodes even if we only need
12675
    # instance information.
12676
    if do_nodes or do_instances:
12677
      all_nodes = lu.cfg.GetAllNodesInfo()
12678
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12679
      node_to_group = {}
12680

    
12681
      for node in all_nodes.values():
12682
        if node.group in group_to_nodes:
12683
          group_to_nodes[node.group].append(node.name)
12684
          node_to_group[node.name] = node.group
12685

    
12686
      if do_instances:
12687
        all_instances = lu.cfg.GetAllInstancesInfo()
12688
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12689

    
12690
        for instance in all_instances.values():
12691
          node = instance.primary_node
12692
          if node in node_to_group:
12693
            group_to_instances[node_to_group[node]].append(instance.name)
12694

    
12695
        if not do_nodes:
12696
          # Do not pass on node information if it was not requested.
12697
          group_to_nodes = None
12698

    
12699
    return query.GroupQueryData([self._all_groups[uuid]
12700
                                 for uuid in self.wanted],
12701
                                group_to_nodes, group_to_instances)
12702

    
12703

    
12704
class LUGroupQuery(NoHooksLU):
12705
  """Logical unit for querying node groups.
12706

12707
  """
12708
  REQ_BGL = False
12709

    
12710
  def CheckArguments(self):
12711
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12712
                          self.op.output_fields, False)
12713

    
12714
  def ExpandNames(self):
12715
    self.gq.ExpandNames(self)
12716

    
12717
  def DeclareLocks(self, level):
12718
    self.gq.DeclareLocks(self, level)
12719

    
12720
  def Exec(self, feedback_fn):
12721
    return self.gq.OldStyleQuery(self)
12722

    
12723

    
12724
class LUGroupSetParams(LogicalUnit):
12725
  """Modifies the parameters of a node group.
12726

12727
  """
12728
  HPATH = "group-modify"
12729
  HTYPE = constants.HTYPE_GROUP
12730
  REQ_BGL = False
12731

    
12732
  def CheckArguments(self):
12733
    all_changes = [
12734
      self.op.ndparams,
12735
      self.op.alloc_policy,
12736
      ]
12737

    
12738
    if all_changes.count(None) == len(all_changes):
12739
      raise errors.OpPrereqError("Please pass at least one modification",
12740
                                 errors.ECODE_INVAL)
12741

    
12742
  def ExpandNames(self):
12743
    # This raises errors.OpPrereqError on its own:
12744
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12745

    
12746
    self.needed_locks = {
12747
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12748
      }
12749

    
12750
  def CheckPrereq(self):
12751
    """Check prerequisites.
12752

12753
    """
12754
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12755

    
12756
    if self.group is None:
12757
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12758
                               (self.op.group_name, self.group_uuid))
12759

    
12760
    if self.op.ndparams:
12761
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12762
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12763
      self.new_ndparams = new_ndparams
12764

    
12765
  def BuildHooksEnv(self):
12766
    """Build hooks env.
12767

12768
    """
12769
    return {
12770
      "GROUP_NAME": self.op.group_name,
12771
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12772
      }
12773

    
12774
  def BuildHooksNodes(self):
12775
    """Build hooks nodes.
12776

12777
    """
12778
    mn = self.cfg.GetMasterNode()
12779
    return ([mn], [mn])
12780

    
12781
  def Exec(self, feedback_fn):
12782
    """Modifies the node group.
12783

12784
    """
12785
    result = []
12786

    
12787
    if self.op.ndparams:
12788
      self.group.ndparams = self.new_ndparams
12789
      result.append(("ndparams", str(self.group.ndparams)))
12790

    
12791
    if self.op.alloc_policy:
12792
      self.group.alloc_policy = self.op.alloc_policy
12793

    
12794
    self.cfg.Update(self.group, feedback_fn)
12795
    return result
12796

    
12797

    
12798
class LUGroupRemove(LogicalUnit):
12799
  HPATH = "group-remove"
12800
  HTYPE = constants.HTYPE_GROUP
12801
  REQ_BGL = False
12802

    
12803
  def ExpandNames(self):
12804
    # This will raises errors.OpPrereqError on its own:
12805
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12806
    self.needed_locks = {
12807
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12808
      }
12809

    
12810
  def CheckPrereq(self):
12811
    """Check prerequisites.
12812

12813
    This checks that the given group name exists as a node group, that is
12814
    empty (i.e., contains no nodes), and that is not the last group of the
12815
    cluster.
12816

12817
    """
12818
    # Verify that the group is empty.
12819
    group_nodes = [node.name
12820
                   for node in self.cfg.GetAllNodesInfo().values()
12821
                   if node.group == self.group_uuid]
12822

    
12823
    if group_nodes:
12824
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12825
                                 " nodes: %s" %
12826
                                 (self.op.group_name,
12827
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12828
                                 errors.ECODE_STATE)
12829

    
12830
    # Verify the cluster would not be left group-less.
12831
    if len(self.cfg.GetNodeGroupList()) == 1:
12832
      raise errors.OpPrereqError("Group '%s' is the only group,"
12833
                                 " cannot be removed" %
12834
                                 self.op.group_name,
12835
                                 errors.ECODE_STATE)
12836

    
12837
  def BuildHooksEnv(self):
12838
    """Build hooks env.
12839

12840
    """
12841
    return {
12842
      "GROUP_NAME": self.op.group_name,
12843
      }
12844

    
12845
  def BuildHooksNodes(self):
12846
    """Build hooks nodes.
12847

12848
    """
12849
    mn = self.cfg.GetMasterNode()
12850
    return ([mn], [mn])
12851

    
12852
  def Exec(self, feedback_fn):
12853
    """Remove the node group.
12854

12855
    """
12856
    try:
12857
      self.cfg.RemoveNodeGroup(self.group_uuid)
12858
    except errors.ConfigurationError:
12859
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12860
                               (self.op.group_name, self.group_uuid))
12861

    
12862
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12863

    
12864

    
12865
class LUGroupRename(LogicalUnit):
12866
  HPATH = "group-rename"
12867
  HTYPE = constants.HTYPE_GROUP
12868
  REQ_BGL = False
12869

    
12870
  def ExpandNames(self):
12871
    # This raises errors.OpPrereqError on its own:
12872
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12873

    
12874
    self.needed_locks = {
12875
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12876
      }
12877

    
12878
  def CheckPrereq(self):
12879
    """Check prerequisites.
12880

12881
    Ensures requested new name is not yet used.
12882

12883
    """
12884
    try:
12885
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12886
    except errors.OpPrereqError:
12887
      pass
12888
    else:
12889
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12890
                                 " node group (UUID: %s)" %
12891
                                 (self.op.new_name, new_name_uuid),
12892
                                 errors.ECODE_EXISTS)
12893

    
12894
  def BuildHooksEnv(self):
12895
    """Build hooks env.
12896

12897
    """
12898
    return {
12899
      "OLD_NAME": self.op.group_name,
12900
      "NEW_NAME": self.op.new_name,
12901
      }
12902

    
12903
  def BuildHooksNodes(self):
12904
    """Build hooks nodes.
12905

12906
    """
12907
    mn = self.cfg.GetMasterNode()
12908

    
12909
    all_nodes = self.cfg.GetAllNodesInfo()
12910
    all_nodes.pop(mn, None)
12911

    
12912
    run_nodes = [mn]
12913
    run_nodes.extend(node.name for node in all_nodes.values()
12914
                     if node.group == self.group_uuid)
12915

    
12916
    return (run_nodes, run_nodes)
12917

    
12918
  def Exec(self, feedback_fn):
12919
    """Rename the node group.
12920

12921
    """
12922
    group = self.cfg.GetNodeGroup(self.group_uuid)
12923

    
12924
    if group is None:
12925
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12926
                               (self.op.group_name, self.group_uuid))
12927

    
12928
    group.name = self.op.new_name
12929
    self.cfg.Update(group, feedback_fn)
12930

    
12931
    return self.op.new_name
12932

    
12933

    
12934
class LUGroupEvacuate(LogicalUnit):
12935
  HPATH = "group-evacuate"
12936
  HTYPE = constants.HTYPE_GROUP
12937
  REQ_BGL = False
12938

    
12939
  def ExpandNames(self):
12940
    # This raises errors.OpPrereqError on its own:
12941
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12942

    
12943
    if self.op.target_groups:
12944
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12945
                                  self.op.target_groups)
12946
    else:
12947
      self.req_target_uuids = []
12948

    
12949
    if self.group_uuid in self.req_target_uuids:
12950
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12951
                                 " as a target group (targets are %s)" %
12952
                                 (self.group_uuid,
12953
                                  utils.CommaJoin(self.req_target_uuids)),
12954
                                 errors.ECODE_INVAL)
12955

    
12956
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12957

    
12958
    self.share_locks = _ShareAll()
12959
    self.needed_locks = {
12960
      locking.LEVEL_INSTANCE: [],
12961
      locking.LEVEL_NODEGROUP: [],
12962
      locking.LEVEL_NODE: [],
12963
      }
12964

    
12965
  def DeclareLocks(self, level):
12966
    if level == locking.LEVEL_INSTANCE:
12967
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12968

    
12969
      # Lock instances optimistically, needs verification once node and group
12970
      # locks have been acquired
12971
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12972
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12973

    
12974
    elif level == locking.LEVEL_NODEGROUP:
12975
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12976

    
12977
      if self.req_target_uuids:
12978
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12979

    
12980
        # Lock all groups used by instances optimistically; this requires going
12981
        # via the node before it's locked, requiring verification later on
12982
        lock_groups.update(group_uuid
12983
                           for instance_name in
12984
                             self.owned_locks(locking.LEVEL_INSTANCE)
12985
                           for group_uuid in
12986
                             self.cfg.GetInstanceNodeGroups(instance_name))
12987
      else:
12988
        # No target groups, need to lock all of them
12989
        lock_groups = locking.ALL_SET
12990

    
12991
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12992

    
12993
    elif level == locking.LEVEL_NODE:
12994
      # This will only lock the nodes in the group to be evacuated which
12995
      # contain actual instances
12996
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12997
      self._LockInstancesNodes()
12998

    
12999
      # Lock all nodes in group to be evacuated and target groups
13000
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13001
      assert self.group_uuid in owned_groups
13002
      member_nodes = [node_name
13003
                      for group in owned_groups
13004
                      for node_name in self.cfg.GetNodeGroup(group).members]
13005
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13006

    
13007
  def CheckPrereq(self):
13008
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13009
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13010
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13011

    
13012
    assert owned_groups.issuperset(self.req_target_uuids)
13013
    assert self.group_uuid in owned_groups
13014

    
13015
    # Check if locked instances are still correct
13016
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13017

    
13018
    # Get instance information
13019
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13020

    
13021
    # Check if node groups for locked instances are still correct
13022
    for instance_name in owned_instances:
13023
      inst = self.instances[instance_name]
13024
      assert owned_nodes.issuperset(inst.all_nodes), \
13025
        "Instance %s's nodes changed while we kept the lock" % instance_name
13026

    
13027
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13028
                                             owned_groups)
13029

    
13030
      assert self.group_uuid in inst_groups, \
13031
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13032

    
13033
    if self.req_target_uuids:
13034
      # User requested specific target groups
13035
      self.target_uuids = self.req_target_uuids
13036
    else:
13037
      # All groups except the one to be evacuated are potential targets
13038
      self.target_uuids = [group_uuid for group_uuid in owned_groups
13039
                           if group_uuid != self.group_uuid]
13040

    
13041
      if not self.target_uuids:
13042
        raise errors.OpPrereqError("There are no possible target groups",
13043
                                   errors.ECODE_INVAL)
13044

    
13045
  def BuildHooksEnv(self):
13046
    """Build hooks env.
13047

13048
    """
13049
    return {
13050
      "GROUP_NAME": self.op.group_name,
13051
      "TARGET_GROUPS": " ".join(self.target_uuids),
13052
      }
13053

    
13054
  def BuildHooksNodes(self):
13055
    """Build hooks nodes.
13056

13057
    """
13058
    mn = self.cfg.GetMasterNode()
13059

    
13060
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13061

    
13062
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13063

    
13064
    return (run_nodes, run_nodes)
13065

    
13066
  def Exec(self, feedback_fn):
13067
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13068

    
13069
    assert self.group_uuid not in self.target_uuids
13070

    
13071
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13072
                     instances=instances, target_groups=self.target_uuids)
13073

    
13074
    ial.Run(self.op.iallocator)
13075

    
13076
    if not ial.success:
13077
      raise errors.OpPrereqError("Can't compute group evacuation using"
13078
                                 " iallocator '%s': %s" %
13079
                                 (self.op.iallocator, ial.info),
13080
                                 errors.ECODE_NORES)
13081

    
13082
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13083

    
13084
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13085
                 len(jobs), self.op.group_name)
13086

    
13087
    return ResultWithJobs(jobs)
13088

    
13089

    
13090
class TagsLU(NoHooksLU): # pylint: disable=W0223
13091
  """Generic tags LU.
13092

13093
  This is an abstract class which is the parent of all the other tags LUs.
13094

13095
  """
13096
  def ExpandNames(self):
13097
    self.group_uuid = None
13098
    self.needed_locks = {}
13099
    if self.op.kind == constants.TAG_NODE:
13100
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13101
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
13102
    elif self.op.kind == constants.TAG_INSTANCE:
13103
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13104
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13105
    elif self.op.kind == constants.TAG_NODEGROUP:
13106
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13107

    
13108
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13109
    # not possible to acquire the BGL based on opcode parameters)
13110

    
13111
  def CheckPrereq(self):
13112
    """Check prerequisites.
13113

13114
    """
13115
    if self.op.kind == constants.TAG_CLUSTER:
13116
      self.target = self.cfg.GetClusterInfo()
13117
    elif self.op.kind == constants.TAG_NODE:
13118
      self.target = self.cfg.GetNodeInfo(self.op.name)
13119
    elif self.op.kind == constants.TAG_INSTANCE:
13120
      self.target = self.cfg.GetInstanceInfo(self.op.name)
13121
    elif self.op.kind == constants.TAG_NODEGROUP:
13122
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
13123
    else:
13124
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13125
                                 str(self.op.kind), errors.ECODE_INVAL)
13126

    
13127

    
13128
class LUTagsGet(TagsLU):
13129
  """Returns the tags of a given object.
13130

13131
  """
13132
  REQ_BGL = False
13133

    
13134
  def ExpandNames(self):
13135
    TagsLU.ExpandNames(self)
13136

    
13137
    # Share locks as this is only a read operation
13138
    self.share_locks = _ShareAll()
13139

    
13140
  def Exec(self, feedback_fn):
13141
    """Returns the tag list.
13142

13143
    """
13144
    return list(self.target.GetTags())
13145

    
13146

    
13147
class LUTagsSearch(NoHooksLU):
13148
  """Searches the tags for a given pattern.
13149

13150
  """
13151
  REQ_BGL = False
13152

    
13153
  def ExpandNames(self):
13154
    self.needed_locks = {}
13155

    
13156
  def CheckPrereq(self):
13157
    """Check prerequisites.
13158

13159
    This checks the pattern passed for validity by compiling it.
13160

13161
    """
13162
    try:
13163
      self.re = re.compile(self.op.pattern)
13164
    except re.error, err:
13165
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13166
                                 (self.op.pattern, err), errors.ECODE_INVAL)
13167

    
13168
  def Exec(self, feedback_fn):
13169
    """Returns the tag list.
13170

13171
    """
13172
    cfg = self.cfg
13173
    tgts = [("/cluster", cfg.GetClusterInfo())]
13174
    ilist = cfg.GetAllInstancesInfo().values()
13175
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13176
    nlist = cfg.GetAllNodesInfo().values()
13177
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13178
    tgts.extend(("/nodegroup/%s" % n.name, n)
13179
                for n in cfg.GetAllNodeGroupsInfo().values())
13180
    results = []
13181
    for path, target in tgts:
13182
      for tag in target.GetTags():
13183
        if self.re.search(tag):
13184
          results.append((path, tag))
13185
    return results
13186

    
13187

    
13188
class LUTagsSet(TagsLU):
13189
  """Sets a tag on a given object.
13190

13191
  """
13192
  REQ_BGL = False
13193

    
13194
  def CheckPrereq(self):
13195
    """Check prerequisites.
13196

13197
    This checks the type and length of the tag name and value.
13198

13199
    """
13200
    TagsLU.CheckPrereq(self)
13201
    for tag in self.op.tags:
13202
      objects.TaggableObject.ValidateTag(tag)
13203

    
13204
  def Exec(self, feedback_fn):
13205
    """Sets the tag.
13206

13207
    """
13208
    try:
13209
      for tag in self.op.tags:
13210
        self.target.AddTag(tag)
13211
    except errors.TagError, err:
13212
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
13213
    self.cfg.Update(self.target, feedback_fn)
13214

    
13215

    
13216
class LUTagsDel(TagsLU):
13217
  """Delete a list of tags from a given object.
13218

13219
  """
13220
  REQ_BGL = False
13221

    
13222
  def CheckPrereq(self):
13223
    """Check prerequisites.
13224

13225
    This checks that we have the given tag.
13226

13227
    """
13228
    TagsLU.CheckPrereq(self)
13229
    for tag in self.op.tags:
13230
      objects.TaggableObject.ValidateTag(tag)
13231
    del_tags = frozenset(self.op.tags)
13232
    cur_tags = self.target.GetTags()
13233

    
13234
    diff_tags = del_tags - cur_tags
13235
    if diff_tags:
13236
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
13237
      raise errors.OpPrereqError("Tag(s) %s not found" %
13238
                                 (utils.CommaJoin(diff_names), ),
13239
                                 errors.ECODE_NOENT)
13240

    
13241
  def Exec(self, feedback_fn):
13242
    """Remove the tag from the object.
13243

13244
    """
13245
    for tag in self.op.tags:
13246
      self.target.RemoveTag(tag)
13247
    self.cfg.Update(self.target, feedback_fn)
13248

    
13249

    
13250
class LUTestDelay(NoHooksLU):
13251
  """Sleep for a specified amount of time.
13252

13253
  This LU sleeps on the master and/or nodes for a specified amount of
13254
  time.
13255

13256
  """
13257
  REQ_BGL = False
13258

    
13259
  def ExpandNames(self):
13260
    """Expand names and set required locks.
13261

13262
    This expands the node list, if any.
13263

13264
    """
13265
    self.needed_locks = {}
13266
    if self.op.on_nodes:
13267
      # _GetWantedNodes can be used here, but is not always appropriate to use
13268
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13269
      # more information.
13270
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13271
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13272

    
13273
  def _TestDelay(self):
13274
    """Do the actual sleep.
13275

13276
    """
13277
    if self.op.on_master:
13278
      if not utils.TestDelay(self.op.duration):
13279
        raise errors.OpExecError("Error during master delay test")
13280
    if self.op.on_nodes:
13281
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13282
      for node, node_result in result.items():
13283
        node_result.Raise("Failure during rpc call to node %s" % node)
13284

    
13285
  def Exec(self, feedback_fn):
13286
    """Execute the test delay opcode, with the wanted repetitions.
13287

13288
    """
13289
    if self.op.repeat == 0:
13290
      self._TestDelay()
13291
    else:
13292
      top_value = self.op.repeat - 1
13293
      for i in range(self.op.repeat):
13294
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13295
        self._TestDelay()
13296

    
13297

    
13298
class LUTestJqueue(NoHooksLU):
13299
  """Utility LU to test some aspects of the job queue.
13300

13301
  """
13302
  REQ_BGL = False
13303

    
13304
  # Must be lower than default timeout for WaitForJobChange to see whether it
13305
  # notices changed jobs
13306
  _CLIENT_CONNECT_TIMEOUT = 20.0
13307
  _CLIENT_CONFIRM_TIMEOUT = 60.0
13308

    
13309
  @classmethod
13310
  def _NotifyUsingSocket(cls, cb, errcls):
13311
    """Opens a Unix socket and waits for another program to connect.
13312

13313
    @type cb: callable
13314
    @param cb: Callback to send socket name to client
13315
    @type errcls: class
13316
    @param errcls: Exception class to use for errors
13317

13318
    """
13319
    # Using a temporary directory as there's no easy way to create temporary
13320
    # sockets without writing a custom loop around tempfile.mktemp and
13321
    # socket.bind
13322
    tmpdir = tempfile.mkdtemp()
13323
    try:
13324
      tmpsock = utils.PathJoin(tmpdir, "sock")
13325

    
13326
      logging.debug("Creating temporary socket at %s", tmpsock)
13327
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13328
      try:
13329
        sock.bind(tmpsock)
13330
        sock.listen(1)
13331

    
13332
        # Send details to client
13333
        cb(tmpsock)
13334

    
13335
        # Wait for client to connect before continuing
13336
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13337
        try:
13338
          (conn, _) = sock.accept()
13339
        except socket.error, err:
13340
          raise errcls("Client didn't connect in time (%s)" % err)
13341
      finally:
13342
        sock.close()
13343
    finally:
13344
      # Remove as soon as client is connected
13345
      shutil.rmtree(tmpdir)
13346

    
13347
    # Wait for client to close
13348
    try:
13349
      try:
13350
        # pylint: disable=E1101
13351
        # Instance of '_socketobject' has no ... member
13352
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13353
        conn.recv(1)
13354
      except socket.error, err:
13355
        raise errcls("Client failed to confirm notification (%s)" % err)
13356
    finally:
13357
      conn.close()
13358

    
13359
  def _SendNotification(self, test, arg, sockname):
13360
    """Sends a notification to the client.
13361

13362
    @type test: string
13363
    @param test: Test name
13364
    @param arg: Test argument (depends on test)
13365
    @type sockname: string
13366
    @param sockname: Socket path
13367

13368
    """
13369
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13370

    
13371
  def _Notify(self, prereq, test, arg):
13372
    """Notifies the client of a test.
13373

13374
    @type prereq: bool
13375
    @param prereq: Whether this is a prereq-phase test
13376
    @type test: string
13377
    @param test: Test name
13378
    @param arg: Test argument (depends on test)
13379

13380
    """
13381
    if prereq:
13382
      errcls = errors.OpPrereqError
13383
    else:
13384
      errcls = errors.OpExecError
13385

    
13386
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13387
                                                  test, arg),
13388
                                   errcls)
13389

    
13390
  def CheckArguments(self):
13391
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13392
    self.expandnames_calls = 0
13393

    
13394
  def ExpandNames(self):
13395
    checkargs_calls = getattr(self, "checkargs_calls", 0)
13396
    if checkargs_calls < 1:
13397
      raise errors.ProgrammerError("CheckArguments was not called")
13398

    
13399
    self.expandnames_calls += 1
13400

    
13401
    if self.op.notify_waitlock:
13402
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
13403

    
13404
    self.LogInfo("Expanding names")
13405

    
13406
    # Get lock on master node (just to get a lock, not for a particular reason)
13407
    self.needed_locks = {
13408
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13409
      }
13410

    
13411
  def Exec(self, feedback_fn):
13412
    if self.expandnames_calls < 1:
13413
      raise errors.ProgrammerError("ExpandNames was not called")
13414

    
13415
    if self.op.notify_exec:
13416
      self._Notify(False, constants.JQT_EXEC, None)
13417

    
13418
    self.LogInfo("Executing")
13419

    
13420
    if self.op.log_messages:
13421
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13422
      for idx, msg in enumerate(self.op.log_messages):
13423
        self.LogInfo("Sending log message %s", idx + 1)
13424
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13425
        # Report how many test messages have been sent
13426
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13427

    
13428
    if self.op.fail:
13429
      raise errors.OpExecError("Opcode failure was requested")
13430

    
13431
    return True
13432

    
13433

    
13434
class IAllocator(object):
13435
  """IAllocator framework.
13436

13437
  An IAllocator instance has three sets of attributes:
13438
    - cfg that is needed to query the cluster
13439
    - input data (all members of the _KEYS class attribute are required)
13440
    - four buffer attributes (in|out_data|text), that represent the
13441
      input (to the external script) in text and data structure format,
13442
      and the output from it, again in two formats
13443
    - the result variables from the script (success, info, nodes) for
13444
      easy usage
13445

13446
  """
13447
  # pylint: disable=R0902
13448
  # lots of instance attributes
13449

    
13450
  def __init__(self, cfg, rpc_runner, mode, **kwargs):
13451
    self.cfg = cfg
13452
    self.rpc = rpc_runner
13453
    # init buffer variables
13454
    self.in_text = self.out_text = self.in_data = self.out_data = None
13455
    # init all input fields so that pylint is happy
13456
    self.mode = mode
13457
    self.memory = self.disks = self.disk_template = None
13458
    self.os = self.tags = self.nics = self.vcpus = None
13459
    self.hypervisor = None
13460
    self.relocate_from = None
13461
    self.name = None
13462
    self.instances = None
13463
    self.evac_mode = None
13464
    self.target_groups = []
13465
    # computed fields
13466
    self.required_nodes = None
13467
    # init result fields
13468
    self.success = self.info = self.result = None
13469

    
13470
    try:
13471
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13472
    except KeyError:
13473
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13474
                                   " IAllocator" % self.mode)
13475

    
13476
    keyset = [n for (n, _) in keydata]
13477

    
13478
    for key in kwargs:
13479
      if key not in keyset:
13480
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13481
                                     " IAllocator" % key)
13482
      setattr(self, key, kwargs[key])
13483

    
13484
    for key in keyset:
13485
      if key not in kwargs:
13486
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13487
                                     " IAllocator" % key)
13488
    self._BuildInputData(compat.partial(fn, self), keydata)
13489

    
13490
  def _ComputeClusterData(self):
13491
    """Compute the generic allocator input data.
13492

13493
    This is the data that is independent of the actual operation.
13494

13495
    """
13496
    cfg = self.cfg
13497
    cluster_info = cfg.GetClusterInfo()
13498
    # cluster data
13499
    data = {
13500
      "version": constants.IALLOCATOR_VERSION,
13501
      "cluster_name": cfg.GetClusterName(),
13502
      "cluster_tags": list(cluster_info.GetTags()),
13503
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13504
      # we don't have job IDs
13505
      }
13506
    ninfo = cfg.GetAllNodesInfo()
13507
    iinfo = cfg.GetAllInstancesInfo().values()
13508
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13509

    
13510
    # node data
13511
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13512

    
13513
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13514
      hypervisor_name = self.hypervisor
13515
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13516
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13517
    else:
13518
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13519

    
13520
    node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
13521
                                        [hypervisor_name])
13522
    node_iinfo = \
13523
      self.rpc.call_all_instances_info(node_list,
13524
                                       cluster_info.enabled_hypervisors)
13525

    
13526
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13527

    
13528
    config_ndata = self._ComputeBasicNodeData(ninfo)
13529
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13530
                                                 i_list, config_ndata)
13531
    assert len(data["nodes"]) == len(ninfo), \
13532
        "Incomplete node data computed"
13533

    
13534
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13535

    
13536
    self.in_data = data
13537

    
13538
  @staticmethod
13539
  def _ComputeNodeGroupData(cfg):
13540
    """Compute node groups data.
13541

13542
    """
13543
    ng = dict((guuid, {
13544
      "name": gdata.name,
13545
      "alloc_policy": gdata.alloc_policy,
13546
      })
13547
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13548

    
13549
    return ng
13550

    
13551
  @staticmethod
13552
  def _ComputeBasicNodeData(node_cfg):
13553
    """Compute global node data.
13554

13555
    @rtype: dict
13556
    @returns: a dict of name: (node dict, node config)
13557

13558
    """
13559
    # fill in static (config-based) values
13560
    node_results = dict((ninfo.name, {
13561
      "tags": list(ninfo.GetTags()),
13562
      "primary_ip": ninfo.primary_ip,
13563
      "secondary_ip": ninfo.secondary_ip,
13564
      "offline": ninfo.offline,
13565
      "drained": ninfo.drained,
13566
      "master_candidate": ninfo.master_candidate,
13567
      "group": ninfo.group,
13568
      "master_capable": ninfo.master_capable,
13569
      "vm_capable": ninfo.vm_capable,
13570
      })
13571
      for ninfo in node_cfg.values())
13572

    
13573
    return node_results
13574

    
13575
  @staticmethod
13576
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13577
                              node_results):
13578
    """Compute global node data.
13579

13580
    @param node_results: the basic node structures as filled from the config
13581

13582
    """
13583
    #TODO(dynmem): compute the right data on MAX and MIN memory
13584
    # make a copy of the current dict
13585
    node_results = dict(node_results)
13586
    for nname, nresult in node_data.items():
13587
      assert nname in node_results, "Missing basic data for node %s" % nname
13588
      ninfo = node_cfg[nname]
13589

    
13590
      if not (ninfo.offline or ninfo.drained):
13591
        nresult.Raise("Can't get data for node %s" % nname)
13592
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13593
                                nname)
13594
        remote_info = _MakeLegacyNodeInfo(nresult.payload)
13595

    
13596
        for attr in ["memory_total", "memory_free", "memory_dom0",
13597
                     "vg_size", "vg_free", "cpu_total"]:
13598
          if attr not in remote_info:
13599
            raise errors.OpExecError("Node '%s' didn't return attribute"
13600
                                     " '%s'" % (nname, attr))
13601
          if not isinstance(remote_info[attr], int):
13602
            raise errors.OpExecError("Node '%s' returned invalid value"
13603
                                     " for '%s': %s" %
13604
                                     (nname, attr, remote_info[attr]))
13605
        # compute memory used by primary instances
13606
        i_p_mem = i_p_up_mem = 0
13607
        for iinfo, beinfo in i_list:
13608
          if iinfo.primary_node == nname:
13609
            i_p_mem += beinfo[constants.BE_MAXMEM]
13610
            if iinfo.name not in node_iinfo[nname].payload:
13611
              i_used_mem = 0
13612
            else:
13613
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13614
            i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
13615
            remote_info["memory_free"] -= max(0, i_mem_diff)
13616

    
13617
            if iinfo.admin_state == constants.ADMINST_UP:
13618
              i_p_up_mem += beinfo[constants.BE_MAXMEM]
13619

    
13620
        # compute memory used by instances
13621
        pnr_dyn = {
13622
          "total_memory": remote_info["memory_total"],
13623
          "reserved_memory": remote_info["memory_dom0"],
13624
          "free_memory": remote_info["memory_free"],
13625
          "total_disk": remote_info["vg_size"],
13626
          "free_disk": remote_info["vg_free"],
13627
          "total_cpus": remote_info["cpu_total"],
13628
          "i_pri_memory": i_p_mem,
13629
          "i_pri_up_memory": i_p_up_mem,
13630
          }
13631
        pnr_dyn.update(node_results[nname])
13632
        node_results[nname] = pnr_dyn
13633

    
13634
    return node_results
13635

    
13636
  @staticmethod
13637
  def _ComputeInstanceData(cluster_info, i_list):
13638
    """Compute global instance data.
13639

13640
    """
13641
    instance_data = {}
13642
    for iinfo, beinfo in i_list:
13643
      nic_data = []
13644
      for nic in iinfo.nics:
13645
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13646
        nic_dict = {
13647
          "mac": nic.mac,
13648
          "ip": nic.ip,
13649
          "mode": filled_params[constants.NIC_MODE],
13650
          "link": filled_params[constants.NIC_LINK],
13651
          }
13652
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13653
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13654
        nic_data.append(nic_dict)
13655
      pir = {
13656
        "tags": list(iinfo.GetTags()),
13657
        "admin_state": iinfo.admin_state,
13658
        "vcpus": beinfo[constants.BE_VCPUS],
13659
        "memory": beinfo[constants.BE_MAXMEM],
13660
        "os": iinfo.os,
13661
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13662
        "nics": nic_data,
13663
        "disks": [{constants.IDISK_SIZE: dsk.size,
13664
                   constants.IDISK_MODE: dsk.mode}
13665
                  for dsk in iinfo.disks],
13666
        "disk_template": iinfo.disk_template,
13667
        "hypervisor": iinfo.hypervisor,
13668
        }
13669
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13670
                                                 pir["disks"])
13671
      instance_data[iinfo.name] = pir
13672

    
13673
    return instance_data
13674

    
13675
  def _AddNewInstance(self):
13676
    """Add new instance data to allocator structure.
13677

13678
    This in combination with _AllocatorGetClusterData will create the
13679
    correct structure needed as input for the allocator.
13680

13681
    The checks for the completeness of the opcode must have already been
13682
    done.
13683

13684
    """
13685
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13686

    
13687
    if self.disk_template in constants.DTS_INT_MIRROR:
13688
      self.required_nodes = 2
13689
    else:
13690
      self.required_nodes = 1
13691

    
13692
    request = {
13693
      "name": self.name,
13694
      "disk_template": self.disk_template,
13695
      "tags": self.tags,
13696
      "os": self.os,
13697
      "vcpus": self.vcpus,
13698
      "memory": self.memory,
13699
      "disks": self.disks,
13700
      "disk_space_total": disk_space,
13701
      "nics": self.nics,
13702
      "required_nodes": self.required_nodes,
13703
      "hypervisor": self.hypervisor,
13704
      }
13705

    
13706
    return request
13707

    
13708
  def _AddRelocateInstance(self):
13709
    """Add relocate instance data to allocator structure.
13710

13711
    This in combination with _IAllocatorGetClusterData will create the
13712
    correct structure needed as input for the allocator.
13713

13714
    The checks for the completeness of the opcode must have already been
13715
    done.
13716

13717
    """
13718
    instance = self.cfg.GetInstanceInfo(self.name)
13719
    if instance is None:
13720
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13721
                                   " IAllocator" % self.name)
13722

    
13723
    if instance.disk_template not in constants.DTS_MIRRORED:
13724
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13725
                                 errors.ECODE_INVAL)
13726

    
13727
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13728
        len(instance.secondary_nodes) != 1:
13729
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13730
                                 errors.ECODE_STATE)
13731

    
13732
    self.required_nodes = 1
13733
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13734
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13735

    
13736
    request = {
13737
      "name": self.name,
13738
      "disk_space_total": disk_space,
13739
      "required_nodes": self.required_nodes,
13740
      "relocate_from": self.relocate_from,
13741
      }
13742
    return request
13743

    
13744
  def _AddNodeEvacuate(self):
13745
    """Get data for node-evacuate requests.
13746

13747
    """
13748
    return {
13749
      "instances": self.instances,
13750
      "evac_mode": self.evac_mode,
13751
      }
13752

    
13753
  def _AddChangeGroup(self):
13754
    """Get data for node-evacuate requests.
13755

13756
    """
13757
    return {
13758
      "instances": self.instances,
13759
      "target_groups": self.target_groups,
13760
      }
13761

    
13762
  def _BuildInputData(self, fn, keydata):
13763
    """Build input data structures.
13764

13765
    """
13766
    self._ComputeClusterData()
13767

    
13768
    request = fn()
13769
    request["type"] = self.mode
13770
    for keyname, keytype in keydata:
13771
      if keyname not in request:
13772
        raise errors.ProgrammerError("Request parameter %s is missing" %
13773
                                     keyname)
13774
      val = request[keyname]
13775
      if not keytype(val):
13776
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13777
                                     " validation, value %s, expected"
13778
                                     " type %s" % (keyname, val, keytype))
13779
    self.in_data["request"] = request
13780

    
13781
    self.in_text = serializer.Dump(self.in_data)
13782

    
13783
  _STRING_LIST = ht.TListOf(ht.TString)
13784
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13785
     # pylint: disable=E1101
13786
     # Class '...' has no 'OP_ID' member
13787
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13788
                          opcodes.OpInstanceMigrate.OP_ID,
13789
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13790
     })))
13791

    
13792
  _NEVAC_MOVED = \
13793
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13794
                       ht.TItems([ht.TNonEmptyString,
13795
                                  ht.TNonEmptyString,
13796
                                  ht.TListOf(ht.TNonEmptyString),
13797
                                 ])))
13798
  _NEVAC_FAILED = \
13799
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13800
                       ht.TItems([ht.TNonEmptyString,
13801
                                  ht.TMaybeString,
13802
                                 ])))
13803
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13804
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13805

    
13806
  _MODE_DATA = {
13807
    constants.IALLOCATOR_MODE_ALLOC:
13808
      (_AddNewInstance,
13809
       [
13810
        ("name", ht.TString),
13811
        ("memory", ht.TInt),
13812
        ("disks", ht.TListOf(ht.TDict)),
13813
        ("disk_template", ht.TString),
13814
        ("os", ht.TString),
13815
        ("tags", _STRING_LIST),
13816
        ("nics", ht.TListOf(ht.TDict)),
13817
        ("vcpus", ht.TInt),
13818
        ("hypervisor", ht.TString),
13819
        ], ht.TList),
13820
    constants.IALLOCATOR_MODE_RELOC:
13821
      (_AddRelocateInstance,
13822
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13823
       ht.TList),
13824
     constants.IALLOCATOR_MODE_NODE_EVAC:
13825
      (_AddNodeEvacuate, [
13826
        ("instances", _STRING_LIST),
13827
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13828
        ], _NEVAC_RESULT),
13829
     constants.IALLOCATOR_MODE_CHG_GROUP:
13830
      (_AddChangeGroup, [
13831
        ("instances", _STRING_LIST),
13832
        ("target_groups", _STRING_LIST),
13833
        ], _NEVAC_RESULT),
13834
    }
13835

    
13836
  def Run(self, name, validate=True, call_fn=None):
13837
    """Run an instance allocator and return the results.
13838

13839
    """
13840
    if call_fn is None:
13841
      call_fn = self.rpc.call_iallocator_runner
13842

    
13843
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13844
    result.Raise("Failure while running the iallocator script")
13845

    
13846
    self.out_text = result.payload
13847
    if validate:
13848
      self._ValidateResult()
13849

    
13850
  def _ValidateResult(self):
13851
    """Process the allocator results.
13852

13853
    This will process and if successful save the result in
13854
    self.out_data and the other parameters.
13855

13856
    """
13857
    try:
13858
      rdict = serializer.Load(self.out_text)
13859
    except Exception, err:
13860
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13861

    
13862
    if not isinstance(rdict, dict):
13863
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13864

    
13865
    # TODO: remove backwards compatiblity in later versions
13866
    if "nodes" in rdict and "result" not in rdict:
13867
      rdict["result"] = rdict["nodes"]
13868
      del rdict["nodes"]
13869

    
13870
    for key in "success", "info", "result":
13871
      if key not in rdict:
13872
        raise errors.OpExecError("Can't parse iallocator results:"
13873
                                 " missing key '%s'" % key)
13874
      setattr(self, key, rdict[key])
13875

    
13876
    if not self._result_check(self.result):
13877
      raise errors.OpExecError("Iallocator returned invalid result,"
13878
                               " expected %s, got %s" %
13879
                               (self._result_check, self.result),
13880
                               errors.ECODE_INVAL)
13881

    
13882
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13883
      assert self.relocate_from is not None
13884
      assert self.required_nodes == 1
13885

    
13886
      node2group = dict((name, ndata["group"])
13887
                        for (name, ndata) in self.in_data["nodes"].items())
13888

    
13889
      fn = compat.partial(self._NodesToGroups, node2group,
13890
                          self.in_data["nodegroups"])
13891

    
13892
      instance = self.cfg.GetInstanceInfo(self.name)
13893
      request_groups = fn(self.relocate_from + [instance.primary_node])
13894
      result_groups = fn(rdict["result"] + [instance.primary_node])
13895

    
13896
      if self.success and not set(result_groups).issubset(request_groups):
13897
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13898
                                 " differ from original groups (%s)" %
13899
                                 (utils.CommaJoin(result_groups),
13900
                                  utils.CommaJoin(request_groups)))
13901

    
13902
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13903
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13904

    
13905
    self.out_data = rdict
13906

    
13907
  @staticmethod
13908
  def _NodesToGroups(node2group, groups, nodes):
13909
    """Returns a list of unique group names for a list of nodes.
13910

13911
    @type node2group: dict
13912
    @param node2group: Map from node name to group UUID
13913
    @type groups: dict
13914
    @param groups: Group information
13915
    @type nodes: list
13916
    @param nodes: Node names
13917

13918
    """
13919
    result = set()
13920

    
13921
    for node in nodes:
13922
      try:
13923
        group_uuid = node2group[node]
13924
      except KeyError:
13925
        # Ignore unknown node
13926
        pass
13927
      else:
13928
        try:
13929
          group = groups[group_uuid]
13930
        except KeyError:
13931
          # Can't find group, let's use UUID
13932
          group_name = group_uuid
13933
        else:
13934
          group_name = group["name"]
13935

    
13936
        result.add(group_name)
13937

    
13938
    return sorted(result)
13939

    
13940

    
13941
class LUTestAllocator(NoHooksLU):
13942
  """Run allocator tests.
13943

13944
  This LU runs the allocator tests
13945

13946
  """
13947
  def CheckPrereq(self):
13948
    """Check prerequisites.
13949

13950
    This checks the opcode parameters depending on the director and mode test.
13951

13952
    """
13953
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13954
      for attr in ["memory", "disks", "disk_template",
13955
                   "os", "tags", "nics", "vcpus"]:
13956
        if not hasattr(self.op, attr):
13957
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13958
                                     attr, errors.ECODE_INVAL)
13959
      iname = self.cfg.ExpandInstanceName(self.op.name)
13960
      if iname is not None:
13961
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13962
                                   iname, errors.ECODE_EXISTS)
13963
      if not isinstance(self.op.nics, list):
13964
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13965
                                   errors.ECODE_INVAL)
13966
      if not isinstance(self.op.disks, list):
13967
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13968
                                   errors.ECODE_INVAL)
13969
      for row in self.op.disks:
13970
        if (not isinstance(row, dict) or
13971
            constants.IDISK_SIZE not in row or
13972
            not isinstance(row[constants.IDISK_SIZE], int) or
13973
            constants.IDISK_MODE not in row or
13974
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13975
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13976
                                     " parameter", errors.ECODE_INVAL)
13977
      if self.op.hypervisor is None:
13978
        self.op.hypervisor = self.cfg.GetHypervisorType()
13979
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13980
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13981
      self.op.name = fname
13982
      self.relocate_from = \
13983
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13984
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13985
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13986
      if not self.op.instances:
13987
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13988
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13989
    else:
13990
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13991
                                 self.op.mode, errors.ECODE_INVAL)
13992

    
13993
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13994
      if self.op.allocator is None:
13995
        raise errors.OpPrereqError("Missing allocator name",
13996
                                   errors.ECODE_INVAL)
13997
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13998
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13999
                                 self.op.direction, errors.ECODE_INVAL)
14000

    
14001
  def Exec(self, feedback_fn):
14002
    """Run the allocator test.
14003

14004
    """
14005
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14006
      ial = IAllocator(self.cfg, self.rpc,
14007
                       mode=self.op.mode,
14008
                       name=self.op.name,
14009
                       memory=self.op.memory,
14010
                       disks=self.op.disks,
14011
                       disk_template=self.op.disk_template,
14012
                       os=self.op.os,
14013
                       tags=self.op.tags,
14014
                       nics=self.op.nics,
14015
                       vcpus=self.op.vcpus,
14016
                       hypervisor=self.op.hypervisor,
14017
                       )
14018
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14019
      ial = IAllocator(self.cfg, self.rpc,
14020
                       mode=self.op.mode,
14021
                       name=self.op.name,
14022
                       relocate_from=list(self.relocate_from),
14023
                       )
14024
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14025
      ial = IAllocator(self.cfg, self.rpc,
14026
                       mode=self.op.mode,
14027
                       instances=self.op.instances,
14028
                       target_groups=self.op.target_groups)
14029
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14030
      ial = IAllocator(self.cfg, self.rpc,
14031
                       mode=self.op.mode,
14032
                       instances=self.op.instances,
14033
                       evac_mode=self.op.evac_mode)
14034
    else:
14035
      raise errors.ProgrammerError("Uncatched mode %s in"
14036
                                   " LUTestAllocator.Exec", self.op.mode)
14037

    
14038
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
14039
      result = ial.in_text
14040
    else:
14041
      ial.Run(self.op.allocator, validate=False)
14042
      result = ial.out_text
14043
    return result
14044

    
14045

    
14046
#: Query type implementations
14047
_QUERY_IMPL = {
14048
  constants.QR_INSTANCE: _InstanceQuery,
14049
  constants.QR_NODE: _NodeQuery,
14050
  constants.QR_GROUP: _GroupQuery,
14051
  constants.QR_OS: _OsQuery,
14052
  }
14053

    
14054
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14055

    
14056

    
14057
def _GetQueryImplementation(name):
14058
  """Returns the implemtnation for a query type.
14059

14060
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
14061

14062
  """
14063
  try:
14064
    return _QUERY_IMPL[name]
14065
  except KeyError:
14066
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14067
                               errors.ECODE_INVAL)