Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 784cd737

History | View | Annotate | Download (506.2 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62
from ganeti import rpc
63

    
64
import ganeti.masterd.instance # pylint: disable=W0611
65

    
66

    
67
#: Size of DRBD meta block device
68
DRBD_META_SIZE = 128
69

    
70
# States of instance
71
INSTANCE_UP = [constants.ADMINST_UP]
72
INSTANCE_DOWN = [constants.ADMINST_DOWN]
73
INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74
INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75
INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
76

    
77

    
78
class ResultWithJobs:
79
  """Data container for LU results with jobs.
80

81
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83
  contained in the C{jobs} attribute and include the job IDs in the opcode
84
  result.
85

86
  """
87
  def __init__(self, jobs, **kwargs):
88
    """Initializes this class.
89

90
    Additional return values can be specified as keyword arguments.
91

92
    @type jobs: list of lists of L{opcode.OpCode}
93
    @param jobs: A list of lists of opcode objects
94

95
    """
96
    self.jobs = jobs
97
    self.other = kwargs
98

    
99

    
100
class LogicalUnit(object):
101
  """Logical Unit base class.
102

103
  Subclasses must follow these rules:
104
    - implement ExpandNames
105
    - implement CheckPrereq (except when tasklets are used)
106
    - implement Exec (except when tasklets are used)
107
    - implement BuildHooksEnv
108
    - implement BuildHooksNodes
109
    - redefine HPATH and HTYPE
110
    - optionally redefine their run requirements:
111
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
112

113
  Note that all commands require root permissions.
114

115
  @ivar dry_run_result: the value (if any) that will be returned to the caller
116
      in dry-run mode (signalled by opcode dry_run parameter)
117

118
  """
119
  HPATH = None
120
  HTYPE = None
121
  REQ_BGL = True
122

    
123
  def __init__(self, processor, op, context, rpc_runner):
124
    """Constructor for LogicalUnit.
125

126
    This needs to be overridden in derived classes in order to check op
127
    validity.
128

129
    """
130
    self.proc = processor
131
    self.op = op
132
    self.cfg = context.cfg
133
    self.glm = context.glm
134
    # readability alias
135
    self.owned_locks = context.glm.list_owned
136
    self.context = context
137
    self.rpc = rpc_runner
138
    # Dicts used to declare locking needs to mcpu
139
    self.needed_locks = None
140
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
141
    self.add_locks = {}
142
    self.remove_locks = {}
143
    # Used to force good behavior when calling helper functions
144
    self.recalculate_locks = {}
145
    # logging
146
    self.Log = processor.Log # pylint: disable=C0103
147
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
148
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
149
    self.LogStep = processor.LogStep # pylint: disable=C0103
150
    # support for dry-run
151
    self.dry_run_result = None
152
    # support for generic debug attribute
153
    if (not hasattr(self.op, "debug_level") or
154
        not isinstance(self.op.debug_level, int)):
155
      self.op.debug_level = 0
156

    
157
    # Tasklets
158
    self.tasklets = None
159

    
160
    # Validate opcode parameters and set defaults
161
    self.op.Validate(True)
162

    
163
    self.CheckArguments()
164

    
165
  def CheckArguments(self):
166
    """Check syntactic validity for the opcode arguments.
167

168
    This method is for doing a simple syntactic check and ensure
169
    validity of opcode parameters, without any cluster-related
170
    checks. While the same can be accomplished in ExpandNames and/or
171
    CheckPrereq, doing these separate is better because:
172

173
      - ExpandNames is left as as purely a lock-related function
174
      - CheckPrereq is run after we have acquired locks (and possible
175
        waited for them)
176

177
    The function is allowed to change the self.op attribute so that
178
    later methods can no longer worry about missing parameters.
179

180
    """
181
    pass
182

    
183
  def ExpandNames(self):
184
    """Expand names for this LU.
185

186
    This method is called before starting to execute the opcode, and it should
187
    update all the parameters of the opcode to their canonical form (e.g. a
188
    short node name must be fully expanded after this method has successfully
189
    completed). This way locking, hooks, logging, etc. can work correctly.
190

191
    LUs which implement this method must also populate the self.needed_locks
192
    member, as a dict with lock levels as keys, and a list of needed lock names
193
    as values. Rules:
194

195
      - use an empty dict if you don't need any lock
196
      - if you don't need any lock at a particular level omit that level
197
      - don't put anything for the BGL level
198
      - if you want all locks at a level use locking.ALL_SET as a value
199

200
    If you need to share locks (rather than acquire them exclusively) at one
201
    level you can modify self.share_locks, setting a true value (usually 1) for
202
    that level. By default locks are not shared.
203

204
    This function can also define a list of tasklets, which then will be
205
    executed in order instead of the usual LU-level CheckPrereq and Exec
206
    functions, if those are not defined by the LU.
207

208
    Examples::
209

210
      # Acquire all nodes and one instance
211
      self.needed_locks = {
212
        locking.LEVEL_NODE: locking.ALL_SET,
213
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
214
      }
215
      # Acquire just two nodes
216
      self.needed_locks = {
217
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
218
      }
219
      # Acquire no locks
220
      self.needed_locks = {} # No, you can't leave it to the default value None
221

222
    """
223
    # The implementation of this method is mandatory only if the new LU is
224
    # concurrent, so that old LUs don't need to be changed all at the same
225
    # time.
226
    if self.REQ_BGL:
227
      self.needed_locks = {} # Exclusive LUs don't need locks.
228
    else:
229
      raise NotImplementedError
230

    
231
  def DeclareLocks(self, level):
232
    """Declare LU locking needs for a level
233

234
    While most LUs can just declare their locking needs at ExpandNames time,
235
    sometimes there's the need to calculate some locks after having acquired
236
    the ones before. This function is called just before acquiring locks at a
237
    particular level, but after acquiring the ones at lower levels, and permits
238
    such calculations. It can be used to modify self.needed_locks, and by
239
    default it does nothing.
240

241
    This function is only called if you have something already set in
242
    self.needed_locks for the level.
243

244
    @param level: Locking level which is going to be locked
245
    @type level: member of ganeti.locking.LEVELS
246

247
    """
248

    
249
  def CheckPrereq(self):
250
    """Check prerequisites for this LU.
251

252
    This method should check that the prerequisites for the execution
253
    of this LU are fulfilled. It can do internode communication, but
254
    it should be idempotent - no cluster or system changes are
255
    allowed.
256

257
    The method should raise errors.OpPrereqError in case something is
258
    not fulfilled. Its return value is ignored.
259

260
    This method should also update all the parameters of the opcode to
261
    their canonical form if it hasn't been done by ExpandNames before.
262

263
    """
264
    if self.tasklets is not None:
265
      for (idx, tl) in enumerate(self.tasklets):
266
        logging.debug("Checking prerequisites for tasklet %s/%s",
267
                      idx + 1, len(self.tasklets))
268
        tl.CheckPrereq()
269
    else:
270
      pass
271

    
272
  def Exec(self, feedback_fn):
273
    """Execute the LU.
274

275
    This method should implement the actual work. It should raise
276
    errors.OpExecError for failures that are somewhat dealt with in
277
    code, or expected.
278

279
    """
280
    if self.tasklets is not None:
281
      for (idx, tl) in enumerate(self.tasklets):
282
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
283
        tl.Exec(feedback_fn)
284
    else:
285
      raise NotImplementedError
286

    
287
  def BuildHooksEnv(self):
288
    """Build hooks environment for this LU.
289

290
    @rtype: dict
291
    @return: Dictionary containing the environment that will be used for
292
      running the hooks for this LU. The keys of the dict must not be prefixed
293
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294
      will extend the environment with additional variables. If no environment
295
      should be defined, an empty dictionary should be returned (not C{None}).
296
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
297
      will not be called.
298

299
    """
300
    raise NotImplementedError
301

    
302
  def BuildHooksNodes(self):
303
    """Build list of nodes to run LU's hooks.
304

305
    @rtype: tuple; (list, list)
306
    @return: Tuple containing a list of node names on which the hook
307
      should run before the execution and a list of node names on which the
308
      hook should run after the execution. No nodes should be returned as an
309
      empty list (and not None).
310
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
311
      will not be called.
312

313
    """
314
    raise NotImplementedError
315

    
316
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317
    """Notify the LU about the results of its hooks.
318

319
    This method is called every time a hooks phase is executed, and notifies
320
    the Logical Unit about the hooks' result. The LU can then use it to alter
321
    its result based on the hooks.  By default the method does nothing and the
322
    previous result is passed back unchanged but any LU can define it if it
323
    wants to use the local cluster hook-scripts somehow.
324

325
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
326
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327
    @param hook_results: the results of the multi-node hooks rpc call
328
    @param feedback_fn: function used send feedback back to the caller
329
    @param lu_result: the previous Exec result this LU had, or None
330
        in the PRE phase
331
    @return: the new Exec result, based on the previous result
332
        and hook results
333

334
    """
335
    # API must be kept, thus we ignore the unused argument and could
336
    # be a function warnings
337
    # pylint: disable=W0613,R0201
338
    return lu_result
339

    
340
  def _ExpandAndLockInstance(self):
341
    """Helper function to expand and lock an instance.
342

343
    Many LUs that work on an instance take its name in self.op.instance_name
344
    and need to expand it and then declare the expanded name for locking. This
345
    function does it, and then updates self.op.instance_name to the expanded
346
    name. It also initializes needed_locks as a dict, if this hasn't been done
347
    before.
348

349
    """
350
    if self.needed_locks is None:
351
      self.needed_locks = {}
352
    else:
353
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354
        "_ExpandAndLockInstance called with instance-level locks set"
355
    self.op.instance_name = _ExpandInstanceName(self.cfg,
356
                                                self.op.instance_name)
357
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
358

    
359
  def _LockInstancesNodes(self, primary_only=False,
360
                          level=locking.LEVEL_NODE):
361
    """Helper function to declare instances' nodes for locking.
362

363
    This function should be called after locking one or more instances to lock
364
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365
    with all primary or secondary nodes for instances already locked and
366
    present in self.needed_locks[locking.LEVEL_INSTANCE].
367

368
    It should be called from DeclareLocks, and for safety only works if
369
    self.recalculate_locks[locking.LEVEL_NODE] is set.
370

371
    In the future it may grow parameters to just lock some instance's nodes, or
372
    to just lock primaries or secondary nodes, if needed.
373

374
    If should be called in DeclareLocks in a way similar to::
375

376
      if level == locking.LEVEL_NODE:
377
        self._LockInstancesNodes()
378

379
    @type primary_only: boolean
380
    @param primary_only: only lock primary nodes of locked instances
381
    @param level: Which lock level to use for locking nodes
382

383
    """
384
    assert level in self.recalculate_locks, \
385
      "_LockInstancesNodes helper function called with no nodes to recalculate"
386

    
387
    # TODO: check if we're really been called with the instance locks held
388

    
389
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390
    # future we might want to have different behaviors depending on the value
391
    # of self.recalculate_locks[locking.LEVEL_NODE]
392
    wanted_nodes = []
393
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395
      wanted_nodes.append(instance.primary_node)
396
      if not primary_only:
397
        wanted_nodes.extend(instance.secondary_nodes)
398

    
399
    if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400
      self.needed_locks[level] = wanted_nodes
401
    elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402
      self.needed_locks[level].extend(wanted_nodes)
403
    else:
404
      raise errors.ProgrammerError("Unknown recalculation mode")
405

    
406
    del self.recalculate_locks[level]
407

    
408

    
409
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410
  """Simple LU which runs no hooks.
411

412
  This LU is intended as a parent for other LogicalUnits which will
413
  run no hooks, in order to reduce duplicate code.
414

415
  """
416
  HPATH = None
417
  HTYPE = None
418

    
419
  def BuildHooksEnv(self):
420
    """Empty BuildHooksEnv for NoHooksLu.
421

422
    This just raises an error.
423

424
    """
425
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
426

    
427
  def BuildHooksNodes(self):
428
    """Empty BuildHooksNodes for NoHooksLU.
429

430
    """
431
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
432

    
433

    
434
class Tasklet:
435
  """Tasklet base class.
436

437
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
439
  tasklets know nothing about locks.
440

441
  Subclasses must follow these rules:
442
    - Implement CheckPrereq
443
    - Implement Exec
444

445
  """
446
  def __init__(self, lu):
447
    self.lu = lu
448

    
449
    # Shortcuts
450
    self.cfg = lu.cfg
451
    self.rpc = lu.rpc
452

    
453
  def CheckPrereq(self):
454
    """Check prerequisites for this tasklets.
455

456
    This method should check whether the prerequisites for the execution of
457
    this tasklet are fulfilled. It can do internode communication, but it
458
    should be idempotent - no cluster or system changes are allowed.
459

460
    The method should raise errors.OpPrereqError in case something is not
461
    fulfilled. Its return value is ignored.
462

463
    This method should also update all parameters to their canonical form if it
464
    hasn't been done before.
465

466
    """
467
    pass
468

    
469
  def Exec(self, feedback_fn):
470
    """Execute the tasklet.
471

472
    This method should implement the actual work. It should raise
473
    errors.OpExecError for failures that are somewhat dealt with in code, or
474
    expected.
475

476
    """
477
    raise NotImplementedError
478

    
479

    
480
class _QueryBase:
481
  """Base for query utility classes.
482

483
  """
484
  #: Attribute holding field definitions
485
  FIELDS = None
486

    
487
  def __init__(self, qfilter, fields, use_locking):
488
    """Initializes this class.
489

490
    """
491
    self.use_locking = use_locking
492

    
493
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
494
                             namefield="name")
495
    self.requested_data = self.query.RequestedData()
496
    self.names = self.query.RequestedNames()
497

    
498
    # Sort only if no names were requested
499
    self.sort_by_name = not self.names
500

    
501
    self.do_locking = None
502
    self.wanted = None
503

    
504
  def _GetNames(self, lu, all_names, lock_level):
505
    """Helper function to determine names asked for in the query.
506

507
    """
508
    if self.do_locking:
509
      names = lu.owned_locks(lock_level)
510
    else:
511
      names = all_names
512

    
513
    if self.wanted == locking.ALL_SET:
514
      assert not self.names
515
      # caller didn't specify names, so ordering is not important
516
      return utils.NiceSort(names)
517

    
518
    # caller specified names and we must keep the same order
519
    assert self.names
520
    assert not self.do_locking or lu.glm.is_owned(lock_level)
521

    
522
    missing = set(self.wanted).difference(names)
523
    if missing:
524
      raise errors.OpExecError("Some items were removed before retrieving"
525
                               " their data: %s" % missing)
526

    
527
    # Return expanded names
528
    return self.wanted
529

    
530
  def ExpandNames(self, lu):
531
    """Expand names for this query.
532

533
    See L{LogicalUnit.ExpandNames}.
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def DeclareLocks(self, lu, level):
539
    """Declare locks for this query.
540

541
    See L{LogicalUnit.DeclareLocks}.
542

543
    """
544
    raise NotImplementedError()
545

    
546
  def _GetQueryData(self, lu):
547
    """Collects all data for this query.
548

549
    @return: Query data object
550

551
    """
552
    raise NotImplementedError()
553

    
554
  def NewStyleQuery(self, lu):
555
    """Collect data and execute query.
556

557
    """
558
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559
                                  sort_by_name=self.sort_by_name)
560

    
561
  def OldStyleQuery(self, lu):
562
    """Collect data and execute query.
563

564
    """
565
    return self.query.OldStyleQuery(self._GetQueryData(lu),
566
                                    sort_by_name=self.sort_by_name)
567

    
568

    
569
def _ShareAll():
570
  """Returns a dict declaring all lock levels shared.
571

572
  """
573
  return dict.fromkeys(locking.LEVELS, 1)
574

    
575

    
576
def _MakeLegacyNodeInfo(data):
577
  """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
578

579
  Converts the data into a single dictionary. This is fine for most use cases,
580
  but some require information from more than one volume group or hypervisor.
581

582
  """
583
  (bootid, (vg_info, ), (hv_info, )) = data
584

    
585
  return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
586
    "bootid": bootid,
587
    })
588

    
589

    
590
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591
  """Checks if the owned node groups are still correct for an instance.
592

593
  @type cfg: L{config.ConfigWriter}
594
  @param cfg: The cluster configuration
595
  @type instance_name: string
596
  @param instance_name: Instance name
597
  @type owned_groups: set or frozenset
598
  @param owned_groups: List of currently owned node groups
599

600
  """
601
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
602

    
603
  if not owned_groups.issuperset(inst_groups):
604
    raise errors.OpPrereqError("Instance %s's node groups changed since"
605
                               " locks were acquired, current groups are"
606
                               " are '%s', owning groups '%s'; retry the"
607
                               " operation" %
608
                               (instance_name,
609
                                utils.CommaJoin(inst_groups),
610
                                utils.CommaJoin(owned_groups)),
611
                               errors.ECODE_STATE)
612

    
613
  return inst_groups
614

    
615

    
616
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617
  """Checks if the instances in a node group are still correct.
618

619
  @type cfg: L{config.ConfigWriter}
620
  @param cfg: The cluster configuration
621
  @type group_uuid: string
622
  @param group_uuid: Node group UUID
623
  @type owned_instances: set or frozenset
624
  @param owned_instances: List of currently owned instances
625

626
  """
627
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628
  if owned_instances != wanted_instances:
629
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
630
                               " locks were acquired, wanted '%s', have '%s';"
631
                               " retry the operation" %
632
                               (group_uuid,
633
                                utils.CommaJoin(wanted_instances),
634
                                utils.CommaJoin(owned_instances)),
635
                               errors.ECODE_STATE)
636

    
637
  return wanted_instances
638

    
639

    
640
def _SupportsOob(cfg, node):
641
  """Tells if node supports OOB.
642

643
  @type cfg: L{config.ConfigWriter}
644
  @param cfg: The cluster configuration
645
  @type node: L{objects.Node}
646
  @param node: The node
647
  @return: The OOB script if supported or an empty string otherwise
648

649
  """
650
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
651

    
652

    
653
def _GetWantedNodes(lu, nodes):
654
  """Returns list of checked and expanded node names.
655

656
  @type lu: L{LogicalUnit}
657
  @param lu: the logical unit on whose behalf we execute
658
  @type nodes: list
659
  @param nodes: list of node names or None for all nodes
660
  @rtype: list
661
  @return: the list of nodes, sorted
662
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
663

664
  """
665
  if nodes:
666
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
667

    
668
  return utils.NiceSort(lu.cfg.GetNodeList())
669

    
670

    
671
def _GetWantedInstances(lu, instances):
672
  """Returns list of checked and expanded instance names.
673

674
  @type lu: L{LogicalUnit}
675
  @param lu: the logical unit on whose behalf we execute
676
  @type instances: list
677
  @param instances: list of instance names or None for all instances
678
  @rtype: list
679
  @return: the list of instances, sorted
680
  @raise errors.OpPrereqError: if the instances parameter is wrong type
681
  @raise errors.OpPrereqError: if any of the passed instances is not found
682

683
  """
684
  if instances:
685
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
686
  else:
687
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
688
  return wanted
689

    
690

    
691
def _GetUpdatedParams(old_params, update_dict,
692
                      use_default=True, use_none=False):
693
  """Return the new version of a parameter dictionary.
694

695
  @type old_params: dict
696
  @param old_params: old parameters
697
  @type update_dict: dict
698
  @param update_dict: dict containing new parameter values, or
699
      constants.VALUE_DEFAULT to reset the parameter to its default
700
      value
701
  @param use_default: boolean
702
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703
      values as 'to be deleted' values
704
  @param use_none: boolean
705
  @type use_none: whether to recognise C{None} values as 'to be
706
      deleted' values
707
  @rtype: dict
708
  @return: the new parameter dictionary
709

710
  """
711
  params_copy = copy.deepcopy(old_params)
712
  for key, val in update_dict.iteritems():
713
    if ((use_default and val == constants.VALUE_DEFAULT) or
714
        (use_none and val is None)):
715
      try:
716
        del params_copy[key]
717
      except KeyError:
718
        pass
719
    else:
720
      params_copy[key] = val
721
  return params_copy
722

    
723

    
724
def _UpdateAndVerifySubDict(base, updates, type_check):
725
  """Updates and verifies a dict with sub dicts of the same type.
726

727
  @param base: The dict with the old data
728
  @param updates: The dict with the new data
729
  @param type_check: Dict suitable to ForceDictType to verify correct types
730
  @returns: A new dict with updated and verified values
731

732
  """
733
  def fn(old, value):
734
    new = _GetUpdatedParams(old, value)
735
    utils.ForceDictType(new, type_check)
736
    return new
737

    
738
  ret = copy.deepcopy(base)
739
  ret.update(dict((key, fn(base.get(key, {}), value))
740
                  for key, value in updates.items()))
741
  return ret
742

    
743

    
744
def _ReleaseLocks(lu, level, names=None, keep=None):
745
  """Releases locks owned by an LU.
746

747
  @type lu: L{LogicalUnit}
748
  @param level: Lock level
749
  @type names: list or None
750
  @param names: Names of locks to release
751
  @type keep: list or None
752
  @param keep: Names of locks to retain
753

754
  """
755
  assert not (keep is not None and names is not None), \
756
         "Only one of the 'names' and the 'keep' parameters can be given"
757

    
758
  if names is not None:
759
    should_release = names.__contains__
760
  elif keep:
761
    should_release = lambda name: name not in keep
762
  else:
763
    should_release = None
764

    
765
  owned = lu.owned_locks(level)
766
  if not owned:
767
    # Not owning any lock at this level, do nothing
768
    pass
769

    
770
  elif should_release:
771
    retain = []
772
    release = []
773

    
774
    # Determine which locks to release
775
    for name in owned:
776
      if should_release(name):
777
        release.append(name)
778
      else:
779
        retain.append(name)
780

    
781
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
782

    
783
    # Release just some locks
784
    lu.glm.release(level, names=release)
785

    
786
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
787
  else:
788
    # Release everything
789
    lu.glm.release(level)
790

    
791
    assert not lu.glm.is_owned(level), "No locks should be owned"
792

    
793

    
794
def _MapInstanceDisksToNodes(instances):
795
  """Creates a map from (node, volume) to instance name.
796

797
  @type instances: list of L{objects.Instance}
798
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
799

800
  """
801
  return dict(((node, vol), inst.name)
802
              for inst in instances
803
              for (node, vols) in inst.MapLVsByNode().items()
804
              for vol in vols)
805

    
806

    
807
def _RunPostHook(lu, node_name):
808
  """Runs the post-hook for an opcode on a single node.
809

810
  """
811
  hm = lu.proc.BuildHooksManager(lu)
812
  try:
813
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
814
  except:
815
    # pylint: disable=W0702
816
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
817

    
818

    
819
def _CheckOutputFields(static, dynamic, selected):
820
  """Checks whether all selected fields are valid.
821

822
  @type static: L{utils.FieldSet}
823
  @param static: static fields set
824
  @type dynamic: L{utils.FieldSet}
825
  @param dynamic: dynamic fields set
826

827
  """
828
  f = utils.FieldSet()
829
  f.Extend(static)
830
  f.Extend(dynamic)
831

    
832
  delta = f.NonMatching(selected)
833
  if delta:
834
    raise errors.OpPrereqError("Unknown output fields selected: %s"
835
                               % ",".join(delta), errors.ECODE_INVAL)
836

    
837

    
838
def _CheckGlobalHvParams(params):
839
  """Validates that given hypervisor params are not global ones.
840

841
  This will ensure that instances don't get customised versions of
842
  global params.
843

844
  """
845
  used_globals = constants.HVC_GLOBALS.intersection(params)
846
  if used_globals:
847
    msg = ("The following hypervisor parameters are global and cannot"
848
           " be customized at instance level, please modify them at"
849
           " cluster level: %s" % utils.CommaJoin(used_globals))
850
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
851

    
852

    
853
def _CheckNodeOnline(lu, node, msg=None):
854
  """Ensure that a given node is online.
855

856
  @param lu: the LU on behalf of which we make the check
857
  @param node: the node to check
858
  @param msg: if passed, should be a message to replace the default one
859
  @raise errors.OpPrereqError: if the node is offline
860

861
  """
862
  if msg is None:
863
    msg = "Can't use offline node"
864
  if lu.cfg.GetNodeInfo(node).offline:
865
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
866

    
867

    
868
def _CheckNodeNotDrained(lu, node):
869
  """Ensure that a given node is not drained.
870

871
  @param lu: the LU on behalf of which we make the check
872
  @param node: the node to check
873
  @raise errors.OpPrereqError: if the node is drained
874

875
  """
876
  if lu.cfg.GetNodeInfo(node).drained:
877
    raise errors.OpPrereqError("Can't use drained node %s" % node,
878
                               errors.ECODE_STATE)
879

    
880

    
881
def _CheckNodeVmCapable(lu, node):
882
  """Ensure that a given node is vm capable.
883

884
  @param lu: the LU on behalf of which we make the check
885
  @param node: the node to check
886
  @raise errors.OpPrereqError: if the node is not vm capable
887

888
  """
889
  if not lu.cfg.GetNodeInfo(node).vm_capable:
890
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
891
                               errors.ECODE_STATE)
892

    
893

    
894
def _CheckNodeHasOS(lu, node, os_name, force_variant):
895
  """Ensure that a node supports a given OS.
896

897
  @param lu: the LU on behalf of which we make the check
898
  @param node: the node to check
899
  @param os_name: the OS to query about
900
  @param force_variant: whether to ignore variant errors
901
  @raise errors.OpPrereqError: if the node is not supporting the OS
902

903
  """
904
  result = lu.rpc.call_os_get(node, os_name)
905
  result.Raise("OS '%s' not in supported OS list for node %s" %
906
               (os_name, node),
907
               prereq=True, ecode=errors.ECODE_INVAL)
908
  if not force_variant:
909
    _CheckOSVariant(result.payload, os_name)
910

    
911

    
912
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
913
  """Ensure that a node has the given secondary ip.
914

915
  @type lu: L{LogicalUnit}
916
  @param lu: the LU on behalf of which we make the check
917
  @type node: string
918
  @param node: the node to check
919
  @type secondary_ip: string
920
  @param secondary_ip: the ip to check
921
  @type prereq: boolean
922
  @param prereq: whether to throw a prerequisite or an execute error
923
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
924
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
925

926
  """
927
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
928
  result.Raise("Failure checking secondary ip on node %s" % node,
929
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
930
  if not result.payload:
931
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
932
           " please fix and re-run this command" % secondary_ip)
933
    if prereq:
934
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
935
    else:
936
      raise errors.OpExecError(msg)
937

    
938

    
939
def _GetClusterDomainSecret():
940
  """Reads the cluster domain secret.
941

942
  """
943
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
944
                               strict=True)
945

    
946

    
947
def _CheckInstanceState(lu, instance, req_states, msg=None):
948
  """Ensure that an instance is in one of the required states.
949

950
  @param lu: the LU on behalf of which we make the check
951
  @param instance: the instance to check
952
  @param msg: if passed, should be a message to replace the default one
953
  @raise errors.OpPrereqError: if the instance is not in the required state
954

955
  """
956
  if msg is None:
957
    msg = "can't use instance from outside %s states" % ", ".join(req_states)
958
  if instance.admin_state not in req_states:
959
    raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
960
                               (instance, instance.admin_state, msg),
961
                               errors.ECODE_STATE)
962

    
963
  if constants.ADMINST_UP not in req_states:
964
    pnode = instance.primary_node
965
    ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
966
    ins_l.Raise("Can't contact node %s for instance information" % pnode,
967
                prereq=True, ecode=errors.ECODE_ENVIRON)
968

    
969
    if instance.name in ins_l.payload:
970
      raise errors.OpPrereqError("Instance %s is running, %s" %
971
                                 (instance.name, msg), errors.ECODE_STATE)
972

    
973

    
974
def _ExpandItemName(fn, name, kind):
975
  """Expand an item name.
976

977
  @param fn: the function to use for expansion
978
  @param name: requested item name
979
  @param kind: text description ('Node' or 'Instance')
980
  @return: the resolved (full) name
981
  @raise errors.OpPrereqError: if the item is not found
982

983
  """
984
  full_name = fn(name)
985
  if full_name is None:
986
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
987
                               errors.ECODE_NOENT)
988
  return full_name
989

    
990

    
991
def _ExpandNodeName(cfg, name):
992
  """Wrapper over L{_ExpandItemName} for nodes."""
993
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
994

    
995

    
996
def _ExpandInstanceName(cfg, name):
997
  """Wrapper over L{_ExpandItemName} for instance."""
998
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
999

    
1000

    
1001
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
1002
                          minmem, maxmem, vcpus, nics, disk_template, disks,
1003
                          bep, hvp, hypervisor_name, tags):
1004
  """Builds instance related env variables for hooks
1005

1006
  This builds the hook environment from individual variables.
1007

1008
  @type name: string
1009
  @param name: the name of the instance
1010
  @type primary_node: string
1011
  @param primary_node: the name of the instance's primary node
1012
  @type secondary_nodes: list
1013
  @param secondary_nodes: list of secondary nodes as strings
1014
  @type os_type: string
1015
  @param os_type: the name of the instance's OS
1016
  @type status: string
1017
  @param status: the desired status of the instance
1018
  @type minmem: string
1019
  @param minmem: the minimum memory size of the instance
1020
  @type maxmem: string
1021
  @param maxmem: the maximum memory size of the instance
1022
  @type vcpus: string
1023
  @param vcpus: the count of VCPUs the instance has
1024
  @type nics: list
1025
  @param nics: list of tuples (ip, mac, mode, link) representing
1026
      the NICs the instance has
1027
  @type disk_template: string
1028
  @param disk_template: the disk template of the instance
1029
  @type disks: list
1030
  @param disks: the list of (size, mode) pairs
1031
  @type bep: dict
1032
  @param bep: the backend parameters for the instance
1033
  @type hvp: dict
1034
  @param hvp: the hypervisor parameters for the instance
1035
  @type hypervisor_name: string
1036
  @param hypervisor_name: the hypervisor for the instance
1037
  @type tags: list
1038
  @param tags: list of instance tags as strings
1039
  @rtype: dict
1040
  @return: the hook environment for this instance
1041

1042
  """
1043
  env = {
1044
    "OP_TARGET": name,
1045
    "INSTANCE_NAME": name,
1046
    "INSTANCE_PRIMARY": primary_node,
1047
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1048
    "INSTANCE_OS_TYPE": os_type,
1049
    "INSTANCE_STATUS": status,
1050
    "INSTANCE_MINMEM": minmem,
1051
    "INSTANCE_MAXMEM": maxmem,
1052
    # TODO(2.7) remove deprecated "memory" value
1053
    "INSTANCE_MEMORY": maxmem,
1054
    "INSTANCE_VCPUS": vcpus,
1055
    "INSTANCE_DISK_TEMPLATE": disk_template,
1056
    "INSTANCE_HYPERVISOR": hypervisor_name,
1057
  }
1058
  if nics:
1059
    nic_count = len(nics)
1060
    for idx, (ip, mac, mode, link) in enumerate(nics):
1061
      if ip is None:
1062
        ip = ""
1063
      env["INSTANCE_NIC%d_IP" % idx] = ip
1064
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1065
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1066
      env["INSTANCE_NIC%d_LINK" % idx] = link
1067
      if mode == constants.NIC_MODE_BRIDGED:
1068
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1069
  else:
1070
    nic_count = 0
1071

    
1072
  env["INSTANCE_NIC_COUNT"] = nic_count
1073

    
1074
  if disks:
1075
    disk_count = len(disks)
1076
    for idx, (size, mode) in enumerate(disks):
1077
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1078
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1079
  else:
1080
    disk_count = 0
1081

    
1082
  env["INSTANCE_DISK_COUNT"] = disk_count
1083

    
1084
  if not tags:
1085
    tags = []
1086

    
1087
  env["INSTANCE_TAGS"] = " ".join(tags)
1088

    
1089
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1090
    for key, value in source.items():
1091
      env["INSTANCE_%s_%s" % (kind, key)] = value
1092

    
1093
  return env
1094

    
1095

    
1096
def _NICListToTuple(lu, nics):
1097
  """Build a list of nic information tuples.
1098

1099
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1100
  value in LUInstanceQueryData.
1101

1102
  @type lu:  L{LogicalUnit}
1103
  @param lu: the logical unit on whose behalf we execute
1104
  @type nics: list of L{objects.NIC}
1105
  @param nics: list of nics to convert to hooks tuples
1106

1107
  """
1108
  hooks_nics = []
1109
  cluster = lu.cfg.GetClusterInfo()
1110
  for nic in nics:
1111
    ip = nic.ip
1112
    mac = nic.mac
1113
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1114
    mode = filled_params[constants.NIC_MODE]
1115
    link = filled_params[constants.NIC_LINK]
1116
    hooks_nics.append((ip, mac, mode, link))
1117
  return hooks_nics
1118

    
1119

    
1120
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1121
  """Builds instance related env variables for hooks from an object.
1122

1123
  @type lu: L{LogicalUnit}
1124
  @param lu: the logical unit on whose behalf we execute
1125
  @type instance: L{objects.Instance}
1126
  @param instance: the instance for which we should build the
1127
      environment
1128
  @type override: dict
1129
  @param override: dictionary with key/values that will override
1130
      our values
1131
  @rtype: dict
1132
  @return: the hook environment dictionary
1133

1134
  """
1135
  cluster = lu.cfg.GetClusterInfo()
1136
  bep = cluster.FillBE(instance)
1137
  hvp = cluster.FillHV(instance)
1138
  args = {
1139
    "name": instance.name,
1140
    "primary_node": instance.primary_node,
1141
    "secondary_nodes": instance.secondary_nodes,
1142
    "os_type": instance.os,
1143
    "status": instance.admin_state,
1144
    "maxmem": bep[constants.BE_MAXMEM],
1145
    "minmem": bep[constants.BE_MINMEM],
1146
    "vcpus": bep[constants.BE_VCPUS],
1147
    "nics": _NICListToTuple(lu, instance.nics),
1148
    "disk_template": instance.disk_template,
1149
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1150
    "bep": bep,
1151
    "hvp": hvp,
1152
    "hypervisor_name": instance.hypervisor,
1153
    "tags": instance.tags,
1154
  }
1155
  if override:
1156
    args.update(override)
1157
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1158

    
1159

    
1160
def _AdjustCandidatePool(lu, exceptions):
1161
  """Adjust the candidate pool after node operations.
1162

1163
  """
1164
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1165
  if mod_list:
1166
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1167
               utils.CommaJoin(node.name for node in mod_list))
1168
    for name in mod_list:
1169
      lu.context.ReaddNode(name)
1170
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1171
  if mc_now > mc_max:
1172
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1173
               (mc_now, mc_max))
1174

    
1175

    
1176
def _DecideSelfPromotion(lu, exceptions=None):
1177
  """Decide whether I should promote myself as a master candidate.
1178

1179
  """
1180
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1181
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1182
  # the new node will increase mc_max with one, so:
1183
  mc_should = min(mc_should + 1, cp_size)
1184
  return mc_now < mc_should
1185

    
1186

    
1187
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1188
  """Check that the brigdes needed by a list of nics exist.
1189

1190
  """
1191
  cluster = lu.cfg.GetClusterInfo()
1192
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1193
  brlist = [params[constants.NIC_LINK] for params in paramslist
1194
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1195
  if brlist:
1196
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1197
    result.Raise("Error checking bridges on destination node '%s'" %
1198
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1199

    
1200

    
1201
def _CheckInstanceBridgesExist(lu, instance, node=None):
1202
  """Check that the brigdes needed by an instance exist.
1203

1204
  """
1205
  if node is None:
1206
    node = instance.primary_node
1207
  _CheckNicsBridgesExist(lu, instance.nics, node)
1208

    
1209

    
1210
def _CheckOSVariant(os_obj, name):
1211
  """Check whether an OS name conforms to the os variants specification.
1212

1213
  @type os_obj: L{objects.OS}
1214
  @param os_obj: OS object to check
1215
  @type name: string
1216
  @param name: OS name passed by the user, to check for validity
1217

1218
  """
1219
  variant = objects.OS.GetVariant(name)
1220
  if not os_obj.supported_variants:
1221
    if variant:
1222
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1223
                                 " passed)" % (os_obj.name, variant),
1224
                                 errors.ECODE_INVAL)
1225
    return
1226
  if not variant:
1227
    raise errors.OpPrereqError("OS name must include a variant",
1228
                               errors.ECODE_INVAL)
1229

    
1230
  if variant not in os_obj.supported_variants:
1231
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1232

    
1233

    
1234
def _GetNodeInstancesInner(cfg, fn):
1235
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1236

    
1237

    
1238
def _GetNodeInstances(cfg, node_name):
1239
  """Returns a list of all primary and secondary instances on a node.
1240

1241
  """
1242

    
1243
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1244

    
1245

    
1246
def _GetNodePrimaryInstances(cfg, node_name):
1247
  """Returns primary instances on a node.
1248

1249
  """
1250
  return _GetNodeInstancesInner(cfg,
1251
                                lambda inst: node_name == inst.primary_node)
1252

    
1253

    
1254
def _GetNodeSecondaryInstances(cfg, node_name):
1255
  """Returns secondary instances on a node.
1256

1257
  """
1258
  return _GetNodeInstancesInner(cfg,
1259
                                lambda inst: node_name in inst.secondary_nodes)
1260

    
1261

    
1262
def _GetStorageTypeArgs(cfg, storage_type):
1263
  """Returns the arguments for a storage type.
1264

1265
  """
1266
  # Special case for file storage
1267
  if storage_type == constants.ST_FILE:
1268
    # storage.FileStorage wants a list of storage directories
1269
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1270

    
1271
  return []
1272

    
1273

    
1274
def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1275
  faulty = []
1276

    
1277
  for dev in instance.disks:
1278
    cfg.SetDiskID(dev, node_name)
1279

    
1280
  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1281
  result.Raise("Failed to get disk status from node %s" % node_name,
1282
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1283

    
1284
  for idx, bdev_status in enumerate(result.payload):
1285
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1286
      faulty.append(idx)
1287

    
1288
  return faulty
1289

    
1290

    
1291
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1292
  """Check the sanity of iallocator and node arguments and use the
1293
  cluster-wide iallocator if appropriate.
1294

1295
  Check that at most one of (iallocator, node) is specified. If none is
1296
  specified, then the LU's opcode's iallocator slot is filled with the
1297
  cluster-wide default iallocator.
1298

1299
  @type iallocator_slot: string
1300
  @param iallocator_slot: the name of the opcode iallocator slot
1301
  @type node_slot: string
1302
  @param node_slot: the name of the opcode target node slot
1303

1304
  """
1305
  node = getattr(lu.op, node_slot, None)
1306
  iallocator = getattr(lu.op, iallocator_slot, None)
1307

    
1308
  if node is not None and iallocator is not None:
1309
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1310
                               errors.ECODE_INVAL)
1311
  elif node is None and iallocator is None:
1312
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1313
    if default_iallocator:
1314
      setattr(lu.op, iallocator_slot, default_iallocator)
1315
    else:
1316
      raise errors.OpPrereqError("No iallocator or node given and no"
1317
                                 " cluster-wide default iallocator found;"
1318
                                 " please specify either an iallocator or a"
1319
                                 " node, or set a cluster-wide default"
1320
                                 " iallocator")
1321

    
1322

    
1323
def _GetDefaultIAllocator(cfg, iallocator):
1324
  """Decides on which iallocator to use.
1325

1326
  @type cfg: L{config.ConfigWriter}
1327
  @param cfg: Cluster configuration object
1328
  @type iallocator: string or None
1329
  @param iallocator: Iallocator specified in opcode
1330
  @rtype: string
1331
  @return: Iallocator name
1332

1333
  """
1334
  if not iallocator:
1335
    # Use default iallocator
1336
    iallocator = cfg.GetDefaultIAllocator()
1337

    
1338
  if not iallocator:
1339
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1340
                               " opcode nor as a cluster-wide default",
1341
                               errors.ECODE_INVAL)
1342

    
1343
  return iallocator
1344

    
1345

    
1346
class LUClusterPostInit(LogicalUnit):
1347
  """Logical unit for running hooks after cluster initialization.
1348

1349
  """
1350
  HPATH = "cluster-init"
1351
  HTYPE = constants.HTYPE_CLUSTER
1352

    
1353
  def BuildHooksEnv(self):
1354
    """Build hooks env.
1355

1356
    """
1357
    return {
1358
      "OP_TARGET": self.cfg.GetClusterName(),
1359
      }
1360

    
1361
  def BuildHooksNodes(self):
1362
    """Build hooks nodes.
1363

1364
    """
1365
    return ([], [self.cfg.GetMasterNode()])
1366

    
1367
  def Exec(self, feedback_fn):
1368
    """Nothing to do.
1369

1370
    """
1371
    return True
1372

    
1373

    
1374
class LUClusterDestroy(LogicalUnit):
1375
  """Logical unit for destroying the cluster.
1376

1377
  """
1378
  HPATH = "cluster-destroy"
1379
  HTYPE = constants.HTYPE_CLUSTER
1380

    
1381
  def BuildHooksEnv(self):
1382
    """Build hooks env.
1383

1384
    """
1385
    return {
1386
      "OP_TARGET": self.cfg.GetClusterName(),
1387
      }
1388

    
1389
  def BuildHooksNodes(self):
1390
    """Build hooks nodes.
1391

1392
    """
1393
    return ([], [])
1394

    
1395
  def CheckPrereq(self):
1396
    """Check prerequisites.
1397

1398
    This checks whether the cluster is empty.
1399

1400
    Any errors are signaled by raising errors.OpPrereqError.
1401

1402
    """
1403
    master = self.cfg.GetMasterNode()
1404

    
1405
    nodelist = self.cfg.GetNodeList()
1406
    if len(nodelist) != 1 or nodelist[0] != master:
1407
      raise errors.OpPrereqError("There are still %d node(s) in"
1408
                                 " this cluster." % (len(nodelist) - 1),
1409
                                 errors.ECODE_INVAL)
1410
    instancelist = self.cfg.GetInstanceList()
1411
    if instancelist:
1412
      raise errors.OpPrereqError("There are still %d instance(s) in"
1413
                                 " this cluster." % len(instancelist),
1414
                                 errors.ECODE_INVAL)
1415

    
1416
  def Exec(self, feedback_fn):
1417
    """Destroys the cluster.
1418

1419
    """
1420
    master_params = self.cfg.GetMasterNetworkParameters()
1421

    
1422
    # Run post hooks on master node before it's removed
1423
    _RunPostHook(self, master_params.name)
1424

    
1425
    ems = self.cfg.GetUseExternalMipScript()
1426
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1427
                                                     master_params, ems)
1428
    result.Raise("Could not disable the master role")
1429

    
1430
    return master_params.name
1431

    
1432

    
1433
def _VerifyCertificate(filename):
1434
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1435

1436
  @type filename: string
1437
  @param filename: Path to PEM file
1438

1439
  """
1440
  try:
1441
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1442
                                           utils.ReadFile(filename))
1443
  except Exception, err: # pylint: disable=W0703
1444
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1445
            "Failed to load X509 certificate %s: %s" % (filename, err))
1446

    
1447
  (errcode, msg) = \
1448
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1449
                                constants.SSL_CERT_EXPIRATION_ERROR)
1450

    
1451
  if msg:
1452
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1453
  else:
1454
    fnamemsg = None
1455

    
1456
  if errcode is None:
1457
    return (None, fnamemsg)
1458
  elif errcode == utils.CERT_WARNING:
1459
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1460
  elif errcode == utils.CERT_ERROR:
1461
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1462

    
1463
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1464

    
1465

    
1466
def _GetAllHypervisorParameters(cluster, instances):
1467
  """Compute the set of all hypervisor parameters.
1468

1469
  @type cluster: L{objects.Cluster}
1470
  @param cluster: the cluster object
1471
  @param instances: list of L{objects.Instance}
1472
  @param instances: additional instances from which to obtain parameters
1473
  @rtype: list of (origin, hypervisor, parameters)
1474
  @return: a list with all parameters found, indicating the hypervisor they
1475
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1476

1477
  """
1478
  hvp_data = []
1479

    
1480
  for hv_name in cluster.enabled_hypervisors:
1481
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1482

    
1483
  for os_name, os_hvp in cluster.os_hvp.items():
1484
    for hv_name, hv_params in os_hvp.items():
1485
      if hv_params:
1486
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1487
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1488

    
1489
  # TODO: collapse identical parameter values in a single one
1490
  for instance in instances:
1491
    if instance.hvparams:
1492
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1493
                       cluster.FillHV(instance)))
1494

    
1495
  return hvp_data
1496

    
1497

    
1498
class _VerifyErrors(object):
1499
  """Mix-in for cluster/group verify LUs.
1500

1501
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1502
  self.op and self._feedback_fn to be available.)
1503

1504
  """
1505

    
1506
  ETYPE_FIELD = "code"
1507
  ETYPE_ERROR = "ERROR"
1508
  ETYPE_WARNING = "WARNING"
1509

    
1510
  def _Error(self, ecode, item, msg, *args, **kwargs):
1511
    """Format an error message.
1512

1513
    Based on the opcode's error_codes parameter, either format a
1514
    parseable error code, or a simpler error string.
1515

1516
    This must be called only from Exec and functions called from Exec.
1517

1518
    """
1519
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1520
    itype, etxt, _ = ecode
1521
    # first complete the msg
1522
    if args:
1523
      msg = msg % args
1524
    # then format the whole message
1525
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1526
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1527
    else:
1528
      if item:
1529
        item = " " + item
1530
      else:
1531
        item = ""
1532
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1533
    # and finally report it via the feedback_fn
1534
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1535

    
1536
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1537
    """Log an error message if the passed condition is True.
1538

1539
    """
1540
    cond = (bool(cond)
1541
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1542

    
1543
    # If the error code is in the list of ignored errors, demote the error to a
1544
    # warning
1545
    (_, etxt, _) = ecode
1546
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1547
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1548

    
1549
    if cond:
1550
      self._Error(ecode, *args, **kwargs)
1551

    
1552
    # do not mark the operation as failed for WARN cases only
1553
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1554
      self.bad = self.bad or cond
1555

    
1556

    
1557
class LUClusterVerify(NoHooksLU):
1558
  """Submits all jobs necessary to verify the cluster.
1559

1560
  """
1561
  REQ_BGL = False
1562

    
1563
  def ExpandNames(self):
1564
    self.needed_locks = {}
1565

    
1566
  def Exec(self, feedback_fn):
1567
    jobs = []
1568

    
1569
    if self.op.group_name:
1570
      groups = [self.op.group_name]
1571
      depends_fn = lambda: None
1572
    else:
1573
      groups = self.cfg.GetNodeGroupList()
1574

    
1575
      # Verify global configuration
1576
      jobs.append([
1577
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1578
        ])
1579

    
1580
      # Always depend on global verification
1581
      depends_fn = lambda: [(-len(jobs), [])]
1582

    
1583
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1584
                                            ignore_errors=self.op.ignore_errors,
1585
                                            depends=depends_fn())]
1586
                for group in groups)
1587

    
1588
    # Fix up all parameters
1589
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1590
      op.debug_simulate_errors = self.op.debug_simulate_errors
1591
      op.verbose = self.op.verbose
1592
      op.error_codes = self.op.error_codes
1593
      try:
1594
        op.skip_checks = self.op.skip_checks
1595
      except AttributeError:
1596
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1597

    
1598
    return ResultWithJobs(jobs)
1599

    
1600

    
1601
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1602
  """Verifies the cluster config.
1603

1604
  """
1605
  REQ_BGL = True
1606

    
1607
  def _VerifyHVP(self, hvp_data):
1608
    """Verifies locally the syntax of the hypervisor parameters.
1609

1610
    """
1611
    for item, hv_name, hv_params in hvp_data:
1612
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1613
             (item, hv_name))
1614
      try:
1615
        hv_class = hypervisor.GetHypervisor(hv_name)
1616
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1617
        hv_class.CheckParameterSyntax(hv_params)
1618
      except errors.GenericError, err:
1619
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1620

    
1621
  def ExpandNames(self):
1622
    # Information can be safely retrieved as the BGL is acquired in exclusive
1623
    # mode
1624
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1625
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1626
    self.all_node_info = self.cfg.GetAllNodesInfo()
1627
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1628
    self.needed_locks = {}
1629

    
1630
  def Exec(self, feedback_fn):
1631
    """Verify integrity of cluster, performing various test on nodes.
1632

1633
    """
1634
    self.bad = False
1635
    self._feedback_fn = feedback_fn
1636

    
1637
    feedback_fn("* Verifying cluster config")
1638

    
1639
    for msg in self.cfg.VerifyConfig():
1640
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1641

    
1642
    feedback_fn("* Verifying cluster certificate files")
1643

    
1644
    for cert_filename in constants.ALL_CERT_FILES:
1645
      (errcode, msg) = _VerifyCertificate(cert_filename)
1646
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1647

    
1648
    feedback_fn("* Verifying hypervisor parameters")
1649

    
1650
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1651
                                                self.all_inst_info.values()))
1652

    
1653
    feedback_fn("* Verifying all nodes belong to an existing group")
1654

    
1655
    # We do this verification here because, should this bogus circumstance
1656
    # occur, it would never be caught by VerifyGroup, which only acts on
1657
    # nodes/instances reachable from existing node groups.
1658

    
1659
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1660
                         if node.group not in self.all_group_info)
1661

    
1662
    dangling_instances = {}
1663
    no_node_instances = []
1664

    
1665
    for inst in self.all_inst_info.values():
1666
      if inst.primary_node in dangling_nodes:
1667
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1668
      elif inst.primary_node not in self.all_node_info:
1669
        no_node_instances.append(inst.name)
1670

    
1671
    pretty_dangling = [
1672
        "%s (%s)" %
1673
        (node.name,
1674
         utils.CommaJoin(dangling_instances.get(node.name,
1675
                                                ["no instances"])))
1676
        for node in dangling_nodes]
1677

    
1678
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1679
                  None,
1680
                  "the following nodes (and their instances) belong to a non"
1681
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1682

    
1683
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1684
                  None,
1685
                  "the following instances have a non-existing primary-node:"
1686
                  " %s", utils.CommaJoin(no_node_instances))
1687

    
1688
    return not self.bad
1689

    
1690

    
1691
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1692
  """Verifies the status of a node group.
1693

1694
  """
1695
  HPATH = "cluster-verify"
1696
  HTYPE = constants.HTYPE_CLUSTER
1697
  REQ_BGL = False
1698

    
1699
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1700

    
1701
  class NodeImage(object):
1702
    """A class representing the logical and physical status of a node.
1703

1704
    @type name: string
1705
    @ivar name: the node name to which this object refers
1706
    @ivar volumes: a structure as returned from
1707
        L{ganeti.backend.GetVolumeList} (runtime)
1708
    @ivar instances: a list of running instances (runtime)
1709
    @ivar pinst: list of configured primary instances (config)
1710
    @ivar sinst: list of configured secondary instances (config)
1711
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1712
        instances for which this node is secondary (config)
1713
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1714
    @ivar dfree: free disk, as reported by the node (runtime)
1715
    @ivar offline: the offline status (config)
1716
    @type rpc_fail: boolean
1717
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1718
        not whether the individual keys were correct) (runtime)
1719
    @type lvm_fail: boolean
1720
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1721
    @type hyp_fail: boolean
1722
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1723
    @type ghost: boolean
1724
    @ivar ghost: whether this is a known node or not (config)
1725
    @type os_fail: boolean
1726
    @ivar os_fail: whether the RPC call didn't return valid OS data
1727
    @type oslist: list
1728
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1729
    @type vm_capable: boolean
1730
    @ivar vm_capable: whether the node can host instances
1731

1732
    """
1733
    def __init__(self, offline=False, name=None, vm_capable=True):
1734
      self.name = name
1735
      self.volumes = {}
1736
      self.instances = []
1737
      self.pinst = []
1738
      self.sinst = []
1739
      self.sbp = {}
1740
      self.mfree = 0
1741
      self.dfree = 0
1742
      self.offline = offline
1743
      self.vm_capable = vm_capable
1744
      self.rpc_fail = False
1745
      self.lvm_fail = False
1746
      self.hyp_fail = False
1747
      self.ghost = False
1748
      self.os_fail = False
1749
      self.oslist = {}
1750

    
1751
  def ExpandNames(self):
1752
    # This raises errors.OpPrereqError on its own:
1753
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1754

    
1755
    # Get instances in node group; this is unsafe and needs verification later
1756
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1757

    
1758
    self.needed_locks = {
1759
      locking.LEVEL_INSTANCE: inst_names,
1760
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1761
      locking.LEVEL_NODE: [],
1762
      }
1763

    
1764
    self.share_locks = _ShareAll()
1765

    
1766
  def DeclareLocks(self, level):
1767
    if level == locking.LEVEL_NODE:
1768
      # Get members of node group; this is unsafe and needs verification later
1769
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1770

    
1771
      all_inst_info = self.cfg.GetAllInstancesInfo()
1772

    
1773
      # In Exec(), we warn about mirrored instances that have primary and
1774
      # secondary living in separate node groups. To fully verify that
1775
      # volumes for these instances are healthy, we will need to do an
1776
      # extra call to their secondaries. We ensure here those nodes will
1777
      # be locked.
1778
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1779
        # Important: access only the instances whose lock is owned
1780
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1781
          nodes.update(all_inst_info[inst].secondary_nodes)
1782

    
1783
      self.needed_locks[locking.LEVEL_NODE] = nodes
1784

    
1785
  def CheckPrereq(self):
1786
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1787
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1788

    
1789
    group_nodes = set(self.group_info.members)
1790
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1791

    
1792
    unlocked_nodes = \
1793
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1794

    
1795
    unlocked_instances = \
1796
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1797

    
1798
    if unlocked_nodes:
1799
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1800
                                 utils.CommaJoin(unlocked_nodes))
1801

    
1802
    if unlocked_instances:
1803
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1804
                                 utils.CommaJoin(unlocked_instances))
1805

    
1806
    self.all_node_info = self.cfg.GetAllNodesInfo()
1807
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1808

    
1809
    self.my_node_names = utils.NiceSort(group_nodes)
1810
    self.my_inst_names = utils.NiceSort(group_instances)
1811

    
1812
    self.my_node_info = dict((name, self.all_node_info[name])
1813
                             for name in self.my_node_names)
1814

    
1815
    self.my_inst_info = dict((name, self.all_inst_info[name])
1816
                             for name in self.my_inst_names)
1817

    
1818
    # We detect here the nodes that will need the extra RPC calls for verifying
1819
    # split LV volumes; they should be locked.
1820
    extra_lv_nodes = set()
1821

    
1822
    for inst in self.my_inst_info.values():
1823
      if inst.disk_template in constants.DTS_INT_MIRROR:
1824
        group = self.my_node_info[inst.primary_node].group
1825
        for nname in inst.secondary_nodes:
1826
          if self.all_node_info[nname].group != group:
1827
            extra_lv_nodes.add(nname)
1828

    
1829
    unlocked_lv_nodes = \
1830
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1831

    
1832
    if unlocked_lv_nodes:
1833
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1834
                                 utils.CommaJoin(unlocked_lv_nodes))
1835
    self.extra_lv_nodes = list(extra_lv_nodes)
1836

    
1837
  def _VerifyNode(self, ninfo, nresult):
1838
    """Perform some basic validation on data returned from a node.
1839

1840
      - check the result data structure is well formed and has all the
1841
        mandatory fields
1842
      - check ganeti version
1843

1844
    @type ninfo: L{objects.Node}
1845
    @param ninfo: the node to check
1846
    @param nresult: the results from the node
1847
    @rtype: boolean
1848
    @return: whether overall this call was successful (and we can expect
1849
         reasonable values in the respose)
1850

1851
    """
1852
    node = ninfo.name
1853
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1854

    
1855
    # main result, nresult should be a non-empty dict
1856
    test = not nresult or not isinstance(nresult, dict)
1857
    _ErrorIf(test, constants.CV_ENODERPC, node,
1858
                  "unable to verify node: no data returned")
1859
    if test:
1860
      return False
1861

    
1862
    # compares ganeti version
1863
    local_version = constants.PROTOCOL_VERSION
1864
    remote_version = nresult.get("version", None)
1865
    test = not (remote_version and
1866
                isinstance(remote_version, (list, tuple)) and
1867
                len(remote_version) == 2)
1868
    _ErrorIf(test, constants.CV_ENODERPC, node,
1869
             "connection to node returned invalid data")
1870
    if test:
1871
      return False
1872

    
1873
    test = local_version != remote_version[0]
1874
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1875
             "incompatible protocol versions: master %s,"
1876
             " node %s", local_version, remote_version[0])
1877
    if test:
1878
      return False
1879

    
1880
    # node seems compatible, we can actually try to look into its results
1881

    
1882
    # full package version
1883
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1884
                  constants.CV_ENODEVERSION, node,
1885
                  "software version mismatch: master %s, node %s",
1886
                  constants.RELEASE_VERSION, remote_version[1],
1887
                  code=self.ETYPE_WARNING)
1888

    
1889
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1890
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1891
      for hv_name, hv_result in hyp_result.iteritems():
1892
        test = hv_result is not None
1893
        _ErrorIf(test, constants.CV_ENODEHV, node,
1894
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1895

    
1896
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1897
    if ninfo.vm_capable and isinstance(hvp_result, list):
1898
      for item, hv_name, hv_result in hvp_result:
1899
        _ErrorIf(True, constants.CV_ENODEHV, node,
1900
                 "hypervisor %s parameter verify failure (source %s): %s",
1901
                 hv_name, item, hv_result)
1902

    
1903
    test = nresult.get(constants.NV_NODESETUP,
1904
                       ["Missing NODESETUP results"])
1905
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1906
             "; ".join(test))
1907

    
1908
    return True
1909

    
1910
  def _VerifyNodeTime(self, ninfo, nresult,
1911
                      nvinfo_starttime, nvinfo_endtime):
1912
    """Check the node time.
1913

1914
    @type ninfo: L{objects.Node}
1915
    @param ninfo: the node to check
1916
    @param nresult: the remote results for the node
1917
    @param nvinfo_starttime: the start time of the RPC call
1918
    @param nvinfo_endtime: the end time of the RPC call
1919

1920
    """
1921
    node = ninfo.name
1922
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1923

    
1924
    ntime = nresult.get(constants.NV_TIME, None)
1925
    try:
1926
      ntime_merged = utils.MergeTime(ntime)
1927
    except (ValueError, TypeError):
1928
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1929
      return
1930

    
1931
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1932
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1933
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1934
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1935
    else:
1936
      ntime_diff = None
1937

    
1938
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1939
             "Node time diverges by at least %s from master node time",
1940
             ntime_diff)
1941

    
1942
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1943
    """Check the node LVM results.
1944

1945
    @type ninfo: L{objects.Node}
1946
    @param ninfo: the node to check
1947
    @param nresult: the remote results for the node
1948
    @param vg_name: the configured VG name
1949

1950
    """
1951
    if vg_name is None:
1952
      return
1953

    
1954
    node = ninfo.name
1955
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1956

    
1957
    # checks vg existence and size > 20G
1958
    vglist = nresult.get(constants.NV_VGLIST, None)
1959
    test = not vglist
1960
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1961
    if not test:
1962
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1963
                                            constants.MIN_VG_SIZE)
1964
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1965

    
1966
    # check pv names
1967
    pvlist = nresult.get(constants.NV_PVLIST, None)
1968
    test = pvlist is None
1969
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1970
    if not test:
1971
      # check that ':' is not present in PV names, since it's a
1972
      # special character for lvcreate (denotes the range of PEs to
1973
      # use on the PV)
1974
      for _, pvname, owner_vg in pvlist:
1975
        test = ":" in pvname
1976
        _ErrorIf(test, constants.CV_ENODELVM, node,
1977
                 "Invalid character ':' in PV '%s' of VG '%s'",
1978
                 pvname, owner_vg)
1979

    
1980
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1981
    """Check the node bridges.
1982

1983
    @type ninfo: L{objects.Node}
1984
    @param ninfo: the node to check
1985
    @param nresult: the remote results for the node
1986
    @param bridges: the expected list of bridges
1987

1988
    """
1989
    if not bridges:
1990
      return
1991

    
1992
    node = ninfo.name
1993
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1994

    
1995
    missing = nresult.get(constants.NV_BRIDGES, None)
1996
    test = not isinstance(missing, list)
1997
    _ErrorIf(test, constants.CV_ENODENET, node,
1998
             "did not return valid bridge information")
1999
    if not test:
2000
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
2001
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
2002

    
2003
  def _VerifyNodeUserScripts(self, ninfo, nresult):
2004
    """Check the results of user scripts presence and executability on the node
2005

2006
    @type ninfo: L{objects.Node}
2007
    @param ninfo: the node to check
2008
    @param nresult: the remote results for the node
2009

2010
    """
2011
    node = ninfo.name
2012

    
2013
    test = not constants.NV_USERSCRIPTS in nresult
2014
    self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
2015
                  "did not return user scripts information")
2016

    
2017
    broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
2018
    if not test:
2019
      self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2020
                    "user scripts not present or not executable: %s" %
2021
                    utils.CommaJoin(sorted(broken_scripts)))
2022

    
2023
  def _VerifyNodeNetwork(self, ninfo, nresult):
2024
    """Check the node network connectivity results.
2025

2026
    @type ninfo: L{objects.Node}
2027
    @param ninfo: the node to check
2028
    @param nresult: the remote results for the node
2029

2030
    """
2031
    node = ninfo.name
2032
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2033

    
2034
    test = constants.NV_NODELIST not in nresult
2035
    _ErrorIf(test, constants.CV_ENODESSH, node,
2036
             "node hasn't returned node ssh connectivity data")
2037
    if not test:
2038
      if nresult[constants.NV_NODELIST]:
2039
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2040
          _ErrorIf(True, constants.CV_ENODESSH, node,
2041
                   "ssh communication with node '%s': %s", a_node, a_msg)
2042

    
2043
    test = constants.NV_NODENETTEST not in nresult
2044
    _ErrorIf(test, constants.CV_ENODENET, node,
2045
             "node hasn't returned node tcp connectivity data")
2046
    if not test:
2047
      if nresult[constants.NV_NODENETTEST]:
2048
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2049
        for anode in nlist:
2050
          _ErrorIf(True, constants.CV_ENODENET, node,
2051
                   "tcp communication with node '%s': %s",
2052
                   anode, nresult[constants.NV_NODENETTEST][anode])
2053

    
2054
    test = constants.NV_MASTERIP not in nresult
2055
    _ErrorIf(test, constants.CV_ENODENET, node,
2056
             "node hasn't returned node master IP reachability data")
2057
    if not test:
2058
      if not nresult[constants.NV_MASTERIP]:
2059
        if node == self.master_node:
2060
          msg = "the master node cannot reach the master IP (not configured?)"
2061
        else:
2062
          msg = "cannot reach the master IP"
2063
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
2064

    
2065
  def _VerifyInstance(self, instance, instanceconfig, node_image,
2066
                      diskstatus):
2067
    """Verify an instance.
2068

2069
    This function checks to see if the required block devices are
2070
    available on the instance's node.
2071

2072
    """
2073
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2074
    node_current = instanceconfig.primary_node
2075

    
2076
    node_vol_should = {}
2077
    instanceconfig.MapLVsByNode(node_vol_should)
2078

    
2079
    for node in node_vol_should:
2080
      n_img = node_image[node]
2081
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2082
        # ignore missing volumes on offline or broken nodes
2083
        continue
2084
      for volume in node_vol_should[node]:
2085
        test = volume not in n_img.volumes
2086
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2087
                 "volume %s missing on node %s", volume, node)
2088

    
2089
    if instanceconfig.admin_state == constants.ADMINST_UP:
2090
      pri_img = node_image[node_current]
2091
      test = instance not in pri_img.instances and not pri_img.offline
2092
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2093
               "instance not running on its primary node %s",
2094
               node_current)
2095

    
2096
    diskdata = [(nname, success, status, idx)
2097
                for (nname, disks) in diskstatus.items()
2098
                for idx, (success, status) in enumerate(disks)]
2099

    
2100
    for nname, success, bdev_status, idx in diskdata:
2101
      # the 'ghost node' construction in Exec() ensures that we have a
2102
      # node here
2103
      snode = node_image[nname]
2104
      bad_snode = snode.ghost or snode.offline
2105
      _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2106
               not success and not bad_snode,
2107
               constants.CV_EINSTANCEFAULTYDISK, instance,
2108
               "couldn't retrieve status for disk/%s on %s: %s",
2109
               idx, nname, bdev_status)
2110
      _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2111
                success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2112
               constants.CV_EINSTANCEFAULTYDISK, instance,
2113
               "disk/%s on %s is faulty", idx, nname)
2114

    
2115
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2116
    """Verify if there are any unknown volumes in the cluster.
2117

2118
    The .os, .swap and backup volumes are ignored. All other volumes are
2119
    reported as unknown.
2120

2121
    @type reserved: L{ganeti.utils.FieldSet}
2122
    @param reserved: a FieldSet of reserved volume names
2123

2124
    """
2125
    for node, n_img in node_image.items():
2126
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2127
        # skip non-healthy nodes
2128
        continue
2129
      for volume in n_img.volumes:
2130
        test = ((node not in node_vol_should or
2131
                volume not in node_vol_should[node]) and
2132
                not reserved.Matches(volume))
2133
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2134
                      "volume %s is unknown", volume)
2135

    
2136
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2137
    """Verify N+1 Memory Resilience.
2138

2139
    Check that if one single node dies we can still start all the
2140
    instances it was primary for.
2141

2142
    """
2143
    cluster_info = self.cfg.GetClusterInfo()
2144
    for node, n_img in node_image.items():
2145
      # This code checks that every node which is now listed as
2146
      # secondary has enough memory to host all instances it is
2147
      # supposed to should a single other node in the cluster fail.
2148
      # FIXME: not ready for failover to an arbitrary node
2149
      # FIXME: does not support file-backed instances
2150
      # WARNING: we currently take into account down instances as well
2151
      # as up ones, considering that even if they're down someone
2152
      # might want to start them even in the event of a node failure.
2153
      if n_img.offline:
2154
        # we're skipping offline nodes from the N+1 warning, since
2155
        # most likely we don't have good memory infromation from them;
2156
        # we already list instances living on such nodes, and that's
2157
        # enough warning
2158
        continue
2159
      #TODO(dynmem): use MINMEM for checking
2160
      #TODO(dynmem): also consider ballooning out other instances
2161
      for prinode, instances in n_img.sbp.items():
2162
        needed_mem = 0
2163
        for instance in instances:
2164
          bep = cluster_info.FillBE(instance_cfg[instance])
2165
          if bep[constants.BE_AUTO_BALANCE]:
2166
            needed_mem += bep[constants.BE_MAXMEM]
2167
        test = n_img.mfree < needed_mem
2168
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2169
                      "not enough memory to accomodate instance failovers"
2170
                      " should node %s fail (%dMiB needed, %dMiB available)",
2171
                      prinode, needed_mem, n_img.mfree)
2172

    
2173
  @classmethod
2174
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2175
                   (files_all, files_opt, files_mc, files_vm)):
2176
    """Verifies file checksums collected from all nodes.
2177

2178
    @param errorif: Callback for reporting errors
2179
    @param nodeinfo: List of L{objects.Node} objects
2180
    @param master_node: Name of master node
2181
    @param all_nvinfo: RPC results
2182

2183
    """
2184
    # Define functions determining which nodes to consider for a file
2185
    files2nodefn = [
2186
      (files_all, None),
2187
      (files_mc, lambda node: (node.master_candidate or
2188
                               node.name == master_node)),
2189
      (files_vm, lambda node: node.vm_capable),
2190
      ]
2191

    
2192
    # Build mapping from filename to list of nodes which should have the file
2193
    nodefiles = {}
2194
    for (files, fn) in files2nodefn:
2195
      if fn is None:
2196
        filenodes = nodeinfo
2197
      else:
2198
        filenodes = filter(fn, nodeinfo)
2199
      nodefiles.update((filename,
2200
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2201
                       for filename in files)
2202

    
2203
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2204

    
2205
    fileinfo = dict((filename, {}) for filename in nodefiles)
2206
    ignore_nodes = set()
2207

    
2208
    for node in nodeinfo:
2209
      if node.offline:
2210
        ignore_nodes.add(node.name)
2211
        continue
2212

    
2213
      nresult = all_nvinfo[node.name]
2214

    
2215
      if nresult.fail_msg or not nresult.payload:
2216
        node_files = None
2217
      else:
2218
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2219

    
2220
      test = not (node_files and isinstance(node_files, dict))
2221
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2222
              "Node did not return file checksum data")
2223
      if test:
2224
        ignore_nodes.add(node.name)
2225
        continue
2226

    
2227
      # Build per-checksum mapping from filename to nodes having it
2228
      for (filename, checksum) in node_files.items():
2229
        assert filename in nodefiles
2230
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2231

    
2232
    for (filename, checksums) in fileinfo.items():
2233
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2234

    
2235
      # Nodes having the file
2236
      with_file = frozenset(node_name
2237
                            for nodes in fileinfo[filename].values()
2238
                            for node_name in nodes) - ignore_nodes
2239

    
2240
      expected_nodes = nodefiles[filename] - ignore_nodes
2241

    
2242
      # Nodes missing file
2243
      missing_file = expected_nodes - with_file
2244

    
2245
      if filename in files_opt:
2246
        # All or no nodes
2247
        errorif(missing_file and missing_file != expected_nodes,
2248
                constants.CV_ECLUSTERFILECHECK, None,
2249
                "File %s is optional, but it must exist on all or no"
2250
                " nodes (not found on %s)",
2251
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2252
      else:
2253
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2254
                "File %s is missing from node(s) %s", filename,
2255
                utils.CommaJoin(utils.NiceSort(missing_file)))
2256

    
2257
        # Warn if a node has a file it shouldn't
2258
        unexpected = with_file - expected_nodes
2259
        errorif(unexpected,
2260
                constants.CV_ECLUSTERFILECHECK, None,
2261
                "File %s should not exist on node(s) %s",
2262
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2263

    
2264
      # See if there are multiple versions of the file
2265
      test = len(checksums) > 1
2266
      if test:
2267
        variants = ["variant %s on %s" %
2268
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2269
                    for (idx, (checksum, nodes)) in
2270
                      enumerate(sorted(checksums.items()))]
2271
      else:
2272
        variants = []
2273

    
2274
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2275
              "File %s found with %s different checksums (%s)",
2276
              filename, len(checksums), "; ".join(variants))
2277

    
2278
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2279
                      drbd_map):
2280
    """Verifies and the node DRBD status.
2281

2282
    @type ninfo: L{objects.Node}
2283
    @param ninfo: the node to check
2284
    @param nresult: the remote results for the node
2285
    @param instanceinfo: the dict of instances
2286
    @param drbd_helper: the configured DRBD usermode helper
2287
    @param drbd_map: the DRBD map as returned by
2288
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2289

2290
    """
2291
    node = ninfo.name
2292
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2293

    
2294
    if drbd_helper:
2295
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2296
      test = (helper_result == None)
2297
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2298
               "no drbd usermode helper returned")
2299
      if helper_result:
2300
        status, payload = helper_result
2301
        test = not status
2302
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2303
                 "drbd usermode helper check unsuccessful: %s", payload)
2304
        test = status and (payload != drbd_helper)
2305
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2306
                 "wrong drbd usermode helper: %s", payload)
2307

    
2308
    # compute the DRBD minors
2309
    node_drbd = {}
2310
    for minor, instance in drbd_map[node].items():
2311
      test = instance not in instanceinfo
2312
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2313
               "ghost instance '%s' in temporary DRBD map", instance)
2314
        # ghost instance should not be running, but otherwise we
2315
        # don't give double warnings (both ghost instance and
2316
        # unallocated minor in use)
2317
      if test:
2318
        node_drbd[minor] = (instance, False)
2319
      else:
2320
        instance = instanceinfo[instance]
2321
        node_drbd[minor] = (instance.name,
2322
                            instance.admin_state == constants.ADMINST_UP)
2323

    
2324
    # and now check them
2325
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2326
    test = not isinstance(used_minors, (tuple, list))
2327
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2328
             "cannot parse drbd status file: %s", str(used_minors))
2329
    if test:
2330
      # we cannot check drbd status
2331
      return
2332

    
2333
    for minor, (iname, must_exist) in node_drbd.items():
2334
      test = minor not in used_minors and must_exist
2335
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2336
               "drbd minor %d of instance %s is not active", minor, iname)
2337
    for minor in used_minors:
2338
      test = minor not in node_drbd
2339
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2340
               "unallocated drbd minor %d is in use", minor)
2341

    
2342
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2343
    """Builds the node OS structures.
2344

2345
    @type ninfo: L{objects.Node}
2346
    @param ninfo: the node to check
2347
    @param nresult: the remote results for the node
2348
    @param nimg: the node image object
2349

2350
    """
2351
    node = ninfo.name
2352
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2353

    
2354
    remote_os = nresult.get(constants.NV_OSLIST, None)
2355
    test = (not isinstance(remote_os, list) or
2356
            not compat.all(isinstance(v, list) and len(v) == 7
2357
                           for v in remote_os))
2358

    
2359
    _ErrorIf(test, constants.CV_ENODEOS, node,
2360
             "node hasn't returned valid OS data")
2361

    
2362
    nimg.os_fail = test
2363

    
2364
    if test:
2365
      return
2366

    
2367
    os_dict = {}
2368

    
2369
    for (name, os_path, status, diagnose,
2370
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2371

    
2372
      if name not in os_dict:
2373
        os_dict[name] = []
2374

    
2375
      # parameters is a list of lists instead of list of tuples due to
2376
      # JSON lacking a real tuple type, fix it:
2377
      parameters = [tuple(v) for v in parameters]
2378
      os_dict[name].append((os_path, status, diagnose,
2379
                            set(variants), set(parameters), set(api_ver)))
2380

    
2381
    nimg.oslist = os_dict
2382

    
2383
  def _VerifyNodeOS(self, ninfo, nimg, base):
2384
    """Verifies the node OS list.
2385

2386
    @type ninfo: L{objects.Node}
2387
    @param ninfo: the node to check
2388
    @param nimg: the node image object
2389
    @param base: the 'template' node we match against (e.g. from the master)
2390

2391
    """
2392
    node = ninfo.name
2393
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2394

    
2395
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2396

    
2397
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2398
    for os_name, os_data in nimg.oslist.items():
2399
      assert os_data, "Empty OS status for OS %s?!" % os_name
2400
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2401
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2402
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2403
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2404
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2405
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2406
      # comparisons with the 'base' image
2407
      test = os_name not in base.oslist
2408
      _ErrorIf(test, constants.CV_ENODEOS, node,
2409
               "Extra OS %s not present on reference node (%s)",
2410
               os_name, base.name)
2411
      if test:
2412
        continue
2413
      assert base.oslist[os_name], "Base node has empty OS status?"
2414
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2415
      if not b_status:
2416
        # base OS is invalid, skipping
2417
        continue
2418
      for kind, a, b in [("API version", f_api, b_api),
2419
                         ("variants list", f_var, b_var),
2420
                         ("parameters", beautify_params(f_param),
2421
                          beautify_params(b_param))]:
2422
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2423
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2424
                 kind, os_name, base.name,
2425
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2426

    
2427
    # check any missing OSes
2428
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2429
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2430
             "OSes present on reference node %s but missing on this node: %s",
2431
             base.name, utils.CommaJoin(missing))
2432

    
2433
  def _VerifyOob(self, ninfo, nresult):
2434
    """Verifies out of band functionality of a node.
2435

2436
    @type ninfo: L{objects.Node}
2437
    @param ninfo: the node to check
2438
    @param nresult: the remote results for the node
2439

2440
    """
2441
    node = ninfo.name
2442
    # We just have to verify the paths on master and/or master candidates
2443
    # as the oob helper is invoked on the master
2444
    if ((ninfo.master_candidate or ninfo.master_capable) and
2445
        constants.NV_OOB_PATHS in nresult):
2446
      for path_result in nresult[constants.NV_OOB_PATHS]:
2447
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2448

    
2449
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2450
    """Verifies and updates the node volume data.
2451

2452
    This function will update a L{NodeImage}'s internal structures
2453
    with data from the remote call.
2454

2455
    @type ninfo: L{objects.Node}
2456
    @param ninfo: the node to check
2457
    @param nresult: the remote results for the node
2458
    @param nimg: the node image object
2459
    @param vg_name: the configured VG name
2460

2461
    """
2462
    node = ninfo.name
2463
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2464

    
2465
    nimg.lvm_fail = True
2466
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2467
    if vg_name is None:
2468
      pass
2469
    elif isinstance(lvdata, basestring):
2470
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2471
               utils.SafeEncode(lvdata))
2472
    elif not isinstance(lvdata, dict):
2473
      _ErrorIf(True, constants.CV_ENODELVM, node,
2474
               "rpc call to node failed (lvlist)")
2475
    else:
2476
      nimg.volumes = lvdata
2477
      nimg.lvm_fail = False
2478

    
2479
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2480
    """Verifies and updates the node instance list.
2481

2482
    If the listing was successful, then updates this node's instance
2483
    list. Otherwise, it marks the RPC call as failed for the instance
2484
    list key.
2485

2486
    @type ninfo: L{objects.Node}
2487
    @param ninfo: the node to check
2488
    @param nresult: the remote results for the node
2489
    @param nimg: the node image object
2490

2491
    """
2492
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2493
    test = not isinstance(idata, list)
2494
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2495
                  "rpc call to node failed (instancelist): %s",
2496
                  utils.SafeEncode(str(idata)))
2497
    if test:
2498
      nimg.hyp_fail = True
2499
    else:
2500
      nimg.instances = idata
2501

    
2502
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2503
    """Verifies and computes a node information map
2504

2505
    @type ninfo: L{objects.Node}
2506
    @param ninfo: the node to check
2507
    @param nresult: the remote results for the node
2508
    @param nimg: the node image object
2509
    @param vg_name: the configured VG name
2510

2511
    """
2512
    node = ninfo.name
2513
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2514

    
2515
    # try to read free memory (from the hypervisor)
2516
    hv_info = nresult.get(constants.NV_HVINFO, None)
2517
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2518
    _ErrorIf(test, constants.CV_ENODEHV, node,
2519
             "rpc call to node failed (hvinfo)")
2520
    if not test:
2521
      try:
2522
        nimg.mfree = int(hv_info["memory_free"])
2523
      except (ValueError, TypeError):
2524
        _ErrorIf(True, constants.CV_ENODERPC, node,
2525
                 "node returned invalid nodeinfo, check hypervisor")
2526

    
2527
    # FIXME: devise a free space model for file based instances as well
2528
    if vg_name is not None:
2529
      test = (constants.NV_VGLIST not in nresult or
2530
              vg_name not in nresult[constants.NV_VGLIST])
2531
      _ErrorIf(test, constants.CV_ENODELVM, node,
2532
               "node didn't return data for the volume group '%s'"
2533
               " - it is either missing or broken", vg_name)
2534
      if not test:
2535
        try:
2536
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2537
        except (ValueError, TypeError):
2538
          _ErrorIf(True, constants.CV_ENODERPC, node,
2539
                   "node returned invalid LVM info, check LVM status")
2540

    
2541
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2542
    """Gets per-disk status information for all instances.
2543

2544
    @type nodelist: list of strings
2545
    @param nodelist: Node names
2546
    @type node_image: dict of (name, L{objects.Node})
2547
    @param node_image: Node objects
2548
    @type instanceinfo: dict of (name, L{objects.Instance})
2549
    @param instanceinfo: Instance objects
2550
    @rtype: {instance: {node: [(succes, payload)]}}
2551
    @return: a dictionary of per-instance dictionaries with nodes as
2552
        keys and disk information as values; the disk information is a
2553
        list of tuples (success, payload)
2554

2555
    """
2556
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2557

    
2558
    node_disks = {}
2559
    node_disks_devonly = {}
2560
    diskless_instances = set()
2561
    diskless = constants.DT_DISKLESS
2562

    
2563
    for nname in nodelist:
2564
      node_instances = list(itertools.chain(node_image[nname].pinst,
2565
                                            node_image[nname].sinst))
2566
      diskless_instances.update(inst for inst in node_instances
2567
                                if instanceinfo[inst].disk_template == diskless)
2568
      disks = [(inst, disk)
2569
               for inst in node_instances
2570
               for disk in instanceinfo[inst].disks]
2571

    
2572
      if not disks:
2573
        # No need to collect data
2574
        continue
2575

    
2576
      node_disks[nname] = disks
2577

    
2578
      # Creating copies as SetDiskID below will modify the objects and that can
2579
      # lead to incorrect data returned from nodes
2580
      devonly = [dev.Copy() for (_, dev) in disks]
2581

    
2582
      for dev in devonly:
2583
        self.cfg.SetDiskID(dev, nname)
2584

    
2585
      node_disks_devonly[nname] = devonly
2586

    
2587
    assert len(node_disks) == len(node_disks_devonly)
2588

    
2589
    # Collect data from all nodes with disks
2590
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2591
                                                          node_disks_devonly)
2592

    
2593
    assert len(result) == len(node_disks)
2594

    
2595
    instdisk = {}
2596

    
2597
    for (nname, nres) in result.items():
2598
      disks = node_disks[nname]
2599

    
2600
      if nres.offline:
2601
        # No data from this node
2602
        data = len(disks) * [(False, "node offline")]
2603
      else:
2604
        msg = nres.fail_msg
2605
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2606
                 "while getting disk information: %s", msg)
2607
        if msg:
2608
          # No data from this node
2609
          data = len(disks) * [(False, msg)]
2610
        else:
2611
          data = []
2612
          for idx, i in enumerate(nres.payload):
2613
            if isinstance(i, (tuple, list)) and len(i) == 2:
2614
              data.append(i)
2615
            else:
2616
              logging.warning("Invalid result from node %s, entry %d: %s",
2617
                              nname, idx, i)
2618
              data.append((False, "Invalid result from the remote node"))
2619

    
2620
      for ((inst, _), status) in zip(disks, data):
2621
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2622

    
2623
    # Add empty entries for diskless instances.
2624
    for inst in diskless_instances:
2625
      assert inst not in instdisk
2626
      instdisk[inst] = {}
2627

    
2628
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2629
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2630
                      compat.all(isinstance(s, (tuple, list)) and
2631
                                 len(s) == 2 for s in statuses)
2632
                      for inst, nnames in instdisk.items()
2633
                      for nname, statuses in nnames.items())
2634
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2635

    
2636
    return instdisk
2637

    
2638
  @staticmethod
2639
  def _SshNodeSelector(group_uuid, all_nodes):
2640
    """Create endless iterators for all potential SSH check hosts.
2641

2642
    """
2643
    nodes = [node for node in all_nodes
2644
             if (node.group != group_uuid and
2645
                 not node.offline)]
2646
    keyfunc = operator.attrgetter("group")
2647

    
2648
    return map(itertools.cycle,
2649
               [sorted(map(operator.attrgetter("name"), names))
2650
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2651
                                                  keyfunc)])
2652

    
2653
  @classmethod
2654
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2655
    """Choose which nodes should talk to which other nodes.
2656

2657
    We will make nodes contact all nodes in their group, and one node from
2658
    every other group.
2659

2660
    @warning: This algorithm has a known issue if one node group is much
2661
      smaller than others (e.g. just one node). In such a case all other
2662
      nodes will talk to the single node.
2663

2664
    """
2665
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2666
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2667

    
2668
    return (online_nodes,
2669
            dict((name, sorted([i.next() for i in sel]))
2670
                 for name in online_nodes))
2671

    
2672
  def BuildHooksEnv(self):
2673
    """Build hooks env.
2674

2675
    Cluster-Verify hooks just ran in the post phase and their failure makes
2676
    the output be logged in the verify output and the verification to fail.
2677

2678
    """
2679
    env = {
2680
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2681
      }
2682

    
2683
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2684
               for node in self.my_node_info.values())
2685

    
2686
    return env
2687

    
2688
  def BuildHooksNodes(self):
2689
    """Build hooks nodes.
2690

2691
    """
2692
    return ([], self.my_node_names)
2693

    
2694
  def Exec(self, feedback_fn):
2695
    """Verify integrity of the node group, performing various test on nodes.
2696

2697
    """
2698
    # This method has too many local variables. pylint: disable=R0914
2699
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2700

    
2701
    if not self.my_node_names:
2702
      # empty node group
2703
      feedback_fn("* Empty node group, skipping verification")
2704
      return True
2705

    
2706
    self.bad = False
2707
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2708
    verbose = self.op.verbose
2709
    self._feedback_fn = feedback_fn
2710

    
2711
    vg_name = self.cfg.GetVGName()
2712
    drbd_helper = self.cfg.GetDRBDHelper()
2713
    cluster = self.cfg.GetClusterInfo()
2714
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2715
    hypervisors = cluster.enabled_hypervisors
2716
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2717

    
2718
    i_non_redundant = [] # Non redundant instances
2719
    i_non_a_balanced = [] # Non auto-balanced instances
2720
    i_offline = 0 # Count of offline instances
2721
    n_offline = 0 # Count of offline nodes
2722
    n_drained = 0 # Count of nodes being drained
2723
    node_vol_should = {}
2724

    
2725
    # FIXME: verify OS list
2726

    
2727
    # File verification
2728
    filemap = _ComputeAncillaryFiles(cluster, False)
2729

    
2730
    # do local checksums
2731
    master_node = self.master_node = self.cfg.GetMasterNode()
2732
    master_ip = self.cfg.GetMasterIP()
2733

    
2734
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2735

    
2736
    user_scripts = []
2737
    if self.cfg.GetUseExternalMipScript():
2738
      user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2739

    
2740
    node_verify_param = {
2741
      constants.NV_FILELIST:
2742
        utils.UniqueSequence(filename
2743
                             for files in filemap
2744
                             for filename in files),
2745
      constants.NV_NODELIST:
2746
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2747
                                  self.all_node_info.values()),
2748
      constants.NV_HYPERVISOR: hypervisors,
2749
      constants.NV_HVPARAMS:
2750
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2751
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2752
                                 for node in node_data_list
2753
                                 if not node.offline],
2754
      constants.NV_INSTANCELIST: hypervisors,
2755
      constants.NV_VERSION: None,
2756
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2757
      constants.NV_NODESETUP: None,
2758
      constants.NV_TIME: None,
2759
      constants.NV_MASTERIP: (master_node, master_ip),
2760
      constants.NV_OSLIST: None,
2761
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2762
      constants.NV_USERSCRIPTS: user_scripts,
2763
      }
2764

    
2765
    if vg_name is not None:
2766
      node_verify_param[constants.NV_VGLIST] = None
2767
      node_verify_param[constants.NV_LVLIST] = vg_name
2768
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2769
      node_verify_param[constants.NV_DRBDLIST] = None
2770

    
2771
    if drbd_helper:
2772
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2773

    
2774
    # bridge checks
2775
    # FIXME: this needs to be changed per node-group, not cluster-wide
2776
    bridges = set()
2777
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2778
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2779
      bridges.add(default_nicpp[constants.NIC_LINK])
2780
    for instance in self.my_inst_info.values():
2781
      for nic in instance.nics:
2782
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2783
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2784
          bridges.add(full_nic[constants.NIC_LINK])
2785

    
2786
    if bridges:
2787
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2788

    
2789
    # Build our expected cluster state
2790
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2791
                                                 name=node.name,
2792
                                                 vm_capable=node.vm_capable))
2793
                      for node in node_data_list)
2794

    
2795
    # Gather OOB paths
2796
    oob_paths = []
2797
    for node in self.all_node_info.values():
2798
      path = _SupportsOob(self.cfg, node)
2799
      if path and path not in oob_paths:
2800
        oob_paths.append(path)
2801

    
2802
    if oob_paths:
2803
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2804

    
2805
    for instance in self.my_inst_names:
2806
      inst_config = self.my_inst_info[instance]
2807

    
2808
      for nname in inst_config.all_nodes:
2809
        if nname not in node_image:
2810
          gnode = self.NodeImage(name=nname)
2811
          gnode.ghost = (nname not in self.all_node_info)
2812
          node_image[nname] = gnode
2813

    
2814
      inst_config.MapLVsByNode(node_vol_should)
2815

    
2816
      pnode = inst_config.primary_node
2817
      node_image[pnode].pinst.append(instance)
2818

    
2819
      for snode in inst_config.secondary_nodes:
2820
        nimg = node_image[snode]
2821
        nimg.sinst.append(instance)
2822
        if pnode not in nimg.sbp:
2823
          nimg.sbp[pnode] = []
2824
        nimg.sbp[pnode].append(instance)
2825

    
2826
    # At this point, we have the in-memory data structures complete,
2827
    # except for the runtime information, which we'll gather next
2828

    
2829
    # Due to the way our RPC system works, exact response times cannot be
2830
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2831
    # time before and after executing the request, we can at least have a time
2832
    # window.
2833
    nvinfo_starttime = time.time()
2834
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2835
                                           node_verify_param,
2836
                                           self.cfg.GetClusterName())
2837
    nvinfo_endtime = time.time()
2838

    
2839
    if self.extra_lv_nodes and vg_name is not None:
2840
      extra_lv_nvinfo = \
2841
          self.rpc.call_node_verify(self.extra_lv_nodes,
2842
                                    {constants.NV_LVLIST: vg_name},
2843
                                    self.cfg.GetClusterName())
2844
    else:
2845
      extra_lv_nvinfo = {}
2846

    
2847
    all_drbd_map = self.cfg.ComputeDRBDMap()
2848

    
2849
    feedback_fn("* Gathering disk information (%s nodes)" %
2850
                len(self.my_node_names))
2851
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2852
                                     self.my_inst_info)
2853

    
2854
    feedback_fn("* Verifying configuration file consistency")
2855

    
2856
    # If not all nodes are being checked, we need to make sure the master node
2857
    # and a non-checked vm_capable node are in the list.
2858
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2859
    if absent_nodes:
2860
      vf_nvinfo = all_nvinfo.copy()
2861
      vf_node_info = list(self.my_node_info.values())
2862
      additional_nodes = []
2863
      if master_node not in self.my_node_info:
2864
        additional_nodes.append(master_node)
2865
        vf_node_info.append(self.all_node_info[master_node])
2866
      # Add the first vm_capable node we find which is not included
2867
      for node in absent_nodes:
2868
        nodeinfo = self.all_node_info[node]
2869
        if nodeinfo.vm_capable and not nodeinfo.offline:
2870
          additional_nodes.append(node)
2871
          vf_node_info.append(self.all_node_info[node])
2872
          break
2873
      key = constants.NV_FILELIST
2874
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2875
                                                 {key: node_verify_param[key]},
2876
                                                 self.cfg.GetClusterName()))
2877
    else:
2878
      vf_nvinfo = all_nvinfo
2879
      vf_node_info = self.my_node_info.values()
2880

    
2881
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2882

    
2883
    feedback_fn("* Verifying node status")
2884

    
2885
    refos_img = None
2886

    
2887
    for node_i in node_data_list:
2888
      node = node_i.name
2889
      nimg = node_image[node]
2890

    
2891
      if node_i.offline:
2892
        if verbose:
2893
          feedback_fn("* Skipping offline node %s" % (node,))
2894
        n_offline += 1
2895
        continue
2896

    
2897
      if node == master_node:
2898
        ntype = "master"
2899
      elif node_i.master_candidate:
2900
        ntype = "master candidate"
2901
      elif node_i.drained:
2902
        ntype = "drained"
2903
        n_drained += 1
2904
      else:
2905
        ntype = "regular"
2906
      if verbose:
2907
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2908

    
2909
      msg = all_nvinfo[node].fail_msg
2910
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2911
               msg)
2912
      if msg:
2913
        nimg.rpc_fail = True
2914
        continue
2915

    
2916
      nresult = all_nvinfo[node].payload
2917

    
2918
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2919
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2920
      self._VerifyNodeNetwork(node_i, nresult)
2921
      self._VerifyNodeUserScripts(node_i, nresult)
2922
      self._VerifyOob(node_i, nresult)
2923

    
2924
      if nimg.vm_capable:
2925
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2926
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2927
                             all_drbd_map)
2928

    
2929
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2930
        self._UpdateNodeInstances(node_i, nresult, nimg)
2931
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2932
        self._UpdateNodeOS(node_i, nresult, nimg)
2933

    
2934
        if not nimg.os_fail:
2935
          if refos_img is None:
2936
            refos_img = nimg
2937
          self._VerifyNodeOS(node_i, nimg, refos_img)
2938
        self._VerifyNodeBridges(node_i, nresult, bridges)
2939

    
2940
        # Check whether all running instancies are primary for the node. (This
2941
        # can no longer be done from _VerifyInstance below, since some of the
2942
        # wrong instances could be from other node groups.)
2943
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2944

    
2945
        for inst in non_primary_inst:
2946
          # FIXME: investigate best way to handle offline insts
2947
          if inst.admin_state == constants.ADMINST_OFFLINE:
2948
            if verbose:
2949
              feedback_fn("* Skipping offline instance %s" % inst.name)
2950
            i_offline += 1
2951
            continue
2952
          test = inst in self.all_inst_info
2953
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2954
                   "instance should not run on node %s", node_i.name)
2955
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2956
                   "node is running unknown instance %s", inst)
2957

    
2958
    for node, result in extra_lv_nvinfo.items():
2959
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2960
                              node_image[node], vg_name)
2961

    
2962
    feedback_fn("* Verifying instance status")
2963
    for instance in self.my_inst_names:
2964
      if verbose:
2965
        feedback_fn("* Verifying instance %s" % instance)
2966
      inst_config = self.my_inst_info[instance]
2967
      self._VerifyInstance(instance, inst_config, node_image,
2968
                           instdisk[instance])
2969
      inst_nodes_offline = []
2970

    
2971
      pnode = inst_config.primary_node
2972
      pnode_img = node_image[pnode]
2973
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2974
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2975
               " primary node failed", instance)
2976

    
2977
      _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2978
               pnode_img.offline,
2979
               constants.CV_EINSTANCEBADNODE, instance,
2980
               "instance is marked as running and lives on offline node %s",
2981
               inst_config.primary_node)
2982

    
2983
      # If the instance is non-redundant we cannot survive losing its primary
2984
      # node, so we are not N+1 compliant. On the other hand we have no disk
2985
      # templates with more than one secondary so that situation is not well
2986
      # supported either.
2987
      # FIXME: does not support file-backed instances
2988
      if not inst_config.secondary_nodes:
2989
        i_non_redundant.append(instance)
2990

    
2991
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2992
               constants.CV_EINSTANCELAYOUT,
2993
               instance, "instance has multiple secondary nodes: %s",
2994
               utils.CommaJoin(inst_config.secondary_nodes),
2995
               code=self.ETYPE_WARNING)
2996

    
2997
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2998
        pnode = inst_config.primary_node
2999
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
3000
        instance_groups = {}
3001

    
3002
        for node in instance_nodes:
3003
          instance_groups.setdefault(self.all_node_info[node].group,
3004
                                     []).append(node)
3005

    
3006
        pretty_list = [
3007
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
3008
          # Sort so that we always list the primary node first.
3009
          for group, nodes in sorted(instance_groups.items(),
3010
                                     key=lambda (_, nodes): pnode in nodes,
3011
                                     reverse=True)]
3012

    
3013
        self._ErrorIf(len(instance_groups) > 1,
3014
                      constants.CV_EINSTANCESPLITGROUPS,
3015
                      instance, "instance has primary and secondary nodes in"
3016
                      " different groups: %s", utils.CommaJoin(pretty_list),
3017
                      code=self.ETYPE_WARNING)
3018

    
3019
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3020
        i_non_a_balanced.append(instance)
3021

    
3022
      for snode in inst_config.secondary_nodes:
3023
        s_img = node_image[snode]
3024
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3025
                 snode, "instance %s, connection to secondary node failed",
3026
                 instance)
3027

    
3028
        if s_img.offline:
3029
          inst_nodes_offline.append(snode)
3030

    
3031
      # warn that the instance lives on offline nodes
3032
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3033
               "instance has offline secondary node(s) %s",
3034
               utils.CommaJoin(inst_nodes_offline))
3035
      # ... or ghost/non-vm_capable nodes
3036
      for node in inst_config.all_nodes:
3037
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3038
                 instance, "instance lives on ghost node %s", node)
3039
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3040
                 instance, "instance lives on non-vm_capable node %s", node)
3041

    
3042
    feedback_fn("* Verifying orphan volumes")
3043
    reserved = utils.FieldSet(*cluster.reserved_lvs)
3044

    
3045
    # We will get spurious "unknown volume" warnings if any node of this group
3046
    # is secondary for an instance whose primary is in another group. To avoid
3047
    # them, we find these instances and add their volumes to node_vol_should.
3048
    for inst in self.all_inst_info.values():
3049
      for secondary in inst.secondary_nodes:
3050
        if (secondary in self.my_node_info
3051
            and inst.name not in self.my_inst_info):
3052
          inst.MapLVsByNode(node_vol_should)
3053
          break
3054

    
3055
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3056

    
3057
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3058
      feedback_fn("* Verifying N+1 Memory redundancy")
3059
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3060

    
3061
    feedback_fn("* Other Notes")
3062
    if i_non_redundant:
3063
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3064
                  % len(i_non_redundant))
3065

    
3066
    if i_non_a_balanced:
3067
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3068
                  % len(i_non_a_balanced))
3069

    
3070
    if i_offline:
3071
      feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3072

    
3073
    if n_offline:
3074
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3075

    
3076
    if n_drained:
3077
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3078

    
3079
    return not self.bad
3080

    
3081
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3082
    """Analyze the post-hooks' result
3083

3084
    This method analyses the hook result, handles it, and sends some
3085
    nicely-formatted feedback back to the user.
3086

3087
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
3088
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3089
    @param hooks_results: the results of the multi-node hooks rpc call
3090
    @param feedback_fn: function used send feedback back to the caller
3091
    @param lu_result: previous Exec result
3092
    @return: the new Exec result, based on the previous result
3093
        and hook results
3094

3095
    """
3096
    # We only really run POST phase hooks, only for non-empty groups,
3097
    # and are only interested in their results
3098
    if not self.my_node_names:
3099
      # empty node group
3100
      pass
3101
    elif phase == constants.HOOKS_PHASE_POST:
3102
      # Used to change hooks' output to proper indentation
3103
      feedback_fn("* Hooks Results")
3104
      assert hooks_results, "invalid result from hooks"
3105

    
3106
      for node_name in hooks_results:
3107
        res = hooks_results[node_name]
3108
        msg = res.fail_msg
3109
        test = msg and not res.offline
3110
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3111
                      "Communication failure in hooks execution: %s", msg)
3112
        if res.offline or msg:
3113
          # No need to investigate payload if node is offline or gave
3114
          # an error.
3115
          continue
3116
        for script, hkr, output in res.payload:
3117
          test = hkr == constants.HKR_FAIL
3118
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3119
                        "Script %s failed, output:", script)
3120
          if test:
3121
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3122
            feedback_fn("%s" % output)
3123
            lu_result = False
3124

    
3125
    return lu_result
3126

    
3127

    
3128
class LUClusterVerifyDisks(NoHooksLU):
3129
  """Verifies the cluster disks status.
3130

3131
  """
3132
  REQ_BGL = False
3133

    
3134
  def ExpandNames(self):
3135
    self.share_locks = _ShareAll()
3136
    self.needed_locks = {
3137
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3138
      }
3139

    
3140
  def Exec(self, feedback_fn):
3141
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3142

    
3143
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3144
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3145
                           for group in group_names])
3146

    
3147

    
3148
class LUGroupVerifyDisks(NoHooksLU):
3149
  """Verifies the status of all disks in a node group.
3150

3151
  """
3152
  REQ_BGL = False
3153

    
3154
  def ExpandNames(self):
3155
    # Raises errors.OpPrereqError on its own if group can't be found
3156
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3157

    
3158
    self.share_locks = _ShareAll()
3159
    self.needed_locks = {
3160
      locking.LEVEL_INSTANCE: [],
3161
      locking.LEVEL_NODEGROUP: [],
3162
      locking.LEVEL_NODE: [],
3163
      }
3164

    
3165
  def DeclareLocks(self, level):
3166
    if level == locking.LEVEL_INSTANCE:
3167
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3168

    
3169
      # Lock instances optimistically, needs verification once node and group
3170
      # locks have been acquired
3171
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3172
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3173

    
3174
    elif level == locking.LEVEL_NODEGROUP:
3175
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3176

    
3177
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3178
        set([self.group_uuid] +
3179
            # Lock all groups used by instances optimistically; this requires
3180
            # going via the node before it's locked, requiring verification
3181
            # later on
3182
            [group_uuid
3183
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3184
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3185

    
3186
    elif level == locking.LEVEL_NODE:
3187
      # This will only lock the nodes in the group to be verified which contain
3188
      # actual instances
3189
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3190
      self._LockInstancesNodes()
3191

    
3192
      # Lock all nodes in group to be verified
3193
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3194
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3195
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3196

    
3197
  def CheckPrereq(self):
3198
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3199
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3200
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3201

    
3202
    assert self.group_uuid in owned_groups
3203

    
3204
    # Check if locked instances are still correct
3205
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3206

    
3207
    # Get instance information
3208
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3209

    
3210
    # Check if node groups for locked instances are still correct
3211
    for (instance_name, inst) in self.instances.items():
3212
      assert owned_nodes.issuperset(inst.all_nodes), \
3213
        "Instance %s's nodes changed while we kept the lock" % instance_name
3214

    
3215
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3216
                                             owned_groups)
3217

    
3218
      assert self.group_uuid in inst_groups, \
3219
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3220

    
3221
  def Exec(self, feedback_fn):
3222
    """Verify integrity of cluster disks.
3223

3224
    @rtype: tuple of three items
3225
    @return: a tuple of (dict of node-to-node_error, list of instances
3226
        which need activate-disks, dict of instance: (node, volume) for
3227
        missing volumes
3228

3229
    """
3230
    res_nodes = {}
3231
    res_instances = set()
3232
    res_missing = {}
3233

    
3234
    nv_dict = _MapInstanceDisksToNodes([inst
3235
            for inst in self.instances.values()
3236
            if inst.admin_state == constants.ADMINST_UP])
3237

    
3238
    if nv_dict:
3239
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3240
                             set(self.cfg.GetVmCapableNodeList()))
3241

    
3242
      node_lvs = self.rpc.call_lv_list(nodes, [])
3243

    
3244
      for (node, node_res) in node_lvs.items():
3245
        if node_res.offline:
3246
          continue
3247

    
3248
        msg = node_res.fail_msg
3249
        if msg:
3250
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3251
          res_nodes[node] = msg
3252
          continue
3253

    
3254
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3255
          inst = nv_dict.pop((node, lv_name), None)
3256
          if not (lv_online or inst is None):
3257
            res_instances.add(inst)
3258

    
3259
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3260
      # better
3261
      for key, inst in nv_dict.iteritems():
3262
        res_missing.setdefault(inst, []).append(list(key))
3263

    
3264
    return (res_nodes, list(res_instances), res_missing)
3265

    
3266

    
3267
class LUClusterRepairDiskSizes(NoHooksLU):
3268
  """Verifies the cluster disks sizes.
3269

3270
  """
3271
  REQ_BGL = False
3272

    
3273
  def ExpandNames(self):
3274
    if self.op.instances:
3275
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3276
      self.needed_locks = {
3277
        locking.LEVEL_NODE_RES: [],
3278
        locking.LEVEL_INSTANCE: self.wanted_names,
3279
        }
3280
      self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3281
    else:
3282
      self.wanted_names = None
3283
      self.needed_locks = {
3284
        locking.LEVEL_NODE_RES: locking.ALL_SET,
3285
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3286
        }
3287
    self.share_locks = {
3288
      locking.LEVEL_NODE_RES: 1,
3289
      locking.LEVEL_INSTANCE: 0,
3290
      }
3291

    
3292
  def DeclareLocks(self, level):
3293
    if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3294
      self._LockInstancesNodes(primary_only=True, level=level)
3295

    
3296
  def CheckPrereq(self):
3297
    """Check prerequisites.
3298

3299
    This only checks the optional instance list against the existing names.
3300

3301
    """
3302
    if self.wanted_names is None:
3303
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3304

    
3305
    self.wanted_instances = \
3306
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3307

    
3308
  def _EnsureChildSizes(self, disk):
3309
    """Ensure children of the disk have the needed disk size.
3310

3311
    This is valid mainly for DRBD8 and fixes an issue where the
3312
    children have smaller disk size.
3313

3314
    @param disk: an L{ganeti.objects.Disk} object
3315

3316
    """
3317
    if disk.dev_type == constants.LD_DRBD8:
3318
      assert disk.children, "Empty children for DRBD8?"
3319
      fchild = disk.children[0]
3320
      mismatch = fchild.size < disk.size
3321
      if mismatch:
3322
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3323
                     fchild.size, disk.size)
3324
        fchild.size = disk.size
3325

    
3326
      # and we recurse on this child only, not on the metadev
3327
      return self._EnsureChildSizes(fchild) or mismatch
3328
    else:
3329
      return False
3330

    
3331
  def Exec(self, feedback_fn):
3332
    """Verify the size of cluster disks.
3333

3334
    """
3335
    # TODO: check child disks too
3336
    # TODO: check differences in size between primary/secondary nodes
3337
    per_node_disks = {}
3338
    for instance in self.wanted_instances:
3339
      pnode = instance.primary_node
3340
      if pnode not in per_node_disks:
3341
        per_node_disks[pnode] = []
3342
      for idx, disk in enumerate(instance.disks):
3343
        per_node_disks[pnode].append((instance, idx, disk))
3344

    
3345
    assert not (frozenset(per_node_disks.keys()) -
3346
                self.owned_locks(locking.LEVEL_NODE_RES)), \
3347
      "Not owning correct locks"
3348
    assert not self.owned_locks(locking.LEVEL_NODE)
3349

    
3350
    changed = []
3351
    for node, dskl in per_node_disks.items():
3352
      newl = [v[2].Copy() for v in dskl]
3353
      for dsk in newl:
3354
        self.cfg.SetDiskID(dsk, node)
3355
      result = self.rpc.call_blockdev_getsize(node, newl)
3356
      if result.fail_msg:
3357
        self.LogWarning("Failure in blockdev_getsize call to node"
3358
                        " %s, ignoring", node)
3359
        continue
3360
      if len(result.payload) != len(dskl):
3361
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3362
                        " result.payload=%s", node, len(dskl), result.payload)
3363
        self.LogWarning("Invalid result from node %s, ignoring node results",
3364
                        node)
3365
        continue
3366
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3367
        if size is None:
3368
          self.LogWarning("Disk %d of instance %s did not return size"
3369
                          " information, ignoring", idx, instance.name)
3370
          continue
3371
        if not isinstance(size, (int, long)):
3372
          self.LogWarning("Disk %d of instance %s did not return valid"
3373
                          " size information, ignoring", idx, instance.name)
3374
          continue
3375
        size = size >> 20
3376
        if size != disk.size:
3377
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3378
                       " correcting: recorded %d, actual %d", idx,
3379
                       instance.name, disk.size, size)
3380
          disk.size = size
3381
          self.cfg.Update(instance, feedback_fn)
3382
          changed.append((instance.name, idx, size))
3383
        if self._EnsureChildSizes(disk):
3384
          self.cfg.Update(instance, feedback_fn)
3385
          changed.append((instance.name, idx, disk.size))
3386
    return changed
3387

    
3388

    
3389
class LUClusterRename(LogicalUnit):
3390
  """Rename the cluster.
3391

3392
  """
3393
  HPATH = "cluster-rename"
3394
  HTYPE = constants.HTYPE_CLUSTER
3395

    
3396
  def BuildHooksEnv(self):
3397
    """Build hooks env.
3398

3399
    """
3400
    return {
3401
      "OP_TARGET": self.cfg.GetClusterName(),
3402
      "NEW_NAME": self.op.name,
3403
      }
3404

    
3405
  def BuildHooksNodes(self):
3406
    """Build hooks nodes.
3407

3408
    """
3409
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3410

    
3411
  def CheckPrereq(self):
3412
    """Verify that the passed name is a valid one.
3413

3414
    """
3415
    hostname = netutils.GetHostname(name=self.op.name,
3416
                                    family=self.cfg.GetPrimaryIPFamily())
3417

    
3418
    new_name = hostname.name
3419
    self.ip = new_ip = hostname.ip
3420
    old_name = self.cfg.GetClusterName()
3421
    old_ip = self.cfg.GetMasterIP()
3422
    if new_name == old_name and new_ip == old_ip:
3423
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3424
                                 " cluster has changed",
3425
                                 errors.ECODE_INVAL)
3426
    if new_ip != old_ip:
3427
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3428
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3429
                                   " reachable on the network" %
3430
                                   new_ip, errors.ECODE_NOTUNIQUE)
3431

    
3432
    self.op.name = new_name
3433

    
3434
  def Exec(self, feedback_fn):
3435
    """Rename the cluster.
3436

3437
    """
3438
    clustername = self.op.name
3439
    new_ip = self.ip
3440

    
3441
    # shutdown the master IP
3442
    master_params = self.cfg.GetMasterNetworkParameters()
3443
    ems = self.cfg.GetUseExternalMipScript()
3444
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3445
                                                     master_params, ems)
3446
    result.Raise("Could not disable the master role")
3447

    
3448
    try:
3449
      cluster = self.cfg.GetClusterInfo()
3450
      cluster.cluster_name = clustername
3451
      cluster.master_ip = new_ip
3452
      self.cfg.Update(cluster, feedback_fn)
3453

    
3454
      # update the known hosts file
3455
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3456
      node_list = self.cfg.GetOnlineNodeList()
3457
      try:
3458
        node_list.remove(master_params.name)
3459
      except ValueError:
3460
        pass
3461
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3462
    finally:
3463
      master_params.ip = new_ip
3464
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3465
                                                     master_params, ems)
3466
      msg = result.fail_msg
3467
      if msg:
3468
        self.LogWarning("Could not re-enable the master role on"
3469
                        " the master, please restart manually: %s", msg)
3470

    
3471
    return clustername
3472

    
3473

    
3474
def _ValidateNetmask(cfg, netmask):
3475
  """Checks if a netmask is valid.
3476

3477
  @type cfg: L{config.ConfigWriter}
3478
  @param cfg: The cluster configuration
3479
  @type netmask: int
3480
  @param netmask: the netmask to be verified
3481
  @raise errors.OpPrereqError: if the validation fails
3482

3483
  """
3484
  ip_family = cfg.GetPrimaryIPFamily()
3485
  try:
3486
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3487
  except errors.ProgrammerError:
3488
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3489
                               ip_family)
3490
  if not ipcls.ValidateNetmask(netmask):
3491
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3492
                                (netmask))
3493

    
3494

    
3495
class LUClusterSetParams(LogicalUnit):
3496
  """Change the parameters of the cluster.
3497

3498
  """
3499
  HPATH = "cluster-modify"
3500
  HTYPE = constants.HTYPE_CLUSTER
3501
  REQ_BGL = False
3502

    
3503
  def CheckArguments(self):
3504
    """Check parameters
3505

3506
    """
3507
    if self.op.uid_pool:
3508
      uidpool.CheckUidPool(self.op.uid_pool)
3509

    
3510
    if self.op.add_uids:
3511
      uidpool.CheckUidPool(self.op.add_uids)
3512

    
3513
    if self.op.remove_uids:
3514
      uidpool.CheckUidPool(self.op.remove_uids)
3515

    
3516
    if self.op.master_netmask is not None:
3517
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3518

    
3519
    if self.op.diskparams:
3520
      for dt_params in self.op.diskparams.values():
3521
        utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3522

    
3523
  def ExpandNames(self):
3524
    # FIXME: in the future maybe other cluster params won't require checking on
3525
    # all nodes to be modified.
3526
    self.needed_locks = {
3527
      locking.LEVEL_NODE: locking.ALL_SET,
3528
    }
3529
    self.share_locks[locking.LEVEL_NODE] = 1
3530

    
3531
  def BuildHooksEnv(self):
3532
    """Build hooks env.
3533

3534
    """
3535
    return {
3536
      "OP_TARGET": self.cfg.GetClusterName(),
3537
      "NEW_VG_NAME": self.op.vg_name,
3538
      }
3539

    
3540
  def BuildHooksNodes(self):
3541
    """Build hooks nodes.
3542

3543
    """
3544
    mn = self.cfg.GetMasterNode()
3545
    return ([mn], [mn])
3546

    
3547
  def CheckPrereq(self):
3548
    """Check prerequisites.
3549

3550
    This checks whether the given params don't conflict and
3551
    if the given volume group is valid.
3552

3553
    """
3554
    if self.op.vg_name is not None and not self.op.vg_name:
3555
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3556
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3557
                                   " instances exist", errors.ECODE_INVAL)
3558

    
3559
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3560
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3561
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3562
                                   " drbd-based instances exist",
3563
                                   errors.ECODE_INVAL)
3564

    
3565
    node_list = self.owned_locks(locking.LEVEL_NODE)
3566

    
3567
    # if vg_name not None, checks given volume group on all nodes
3568
    if self.op.vg_name:
3569
      vglist = self.rpc.call_vg_list(node_list)
3570
      for node in node_list:
3571
        msg = vglist[node].fail_msg
3572
        if msg:
3573
          # ignoring down node
3574
          self.LogWarning("Error while gathering data on node %s"
3575
                          " (ignoring node): %s", node, msg)
3576
          continue
3577
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3578
                                              self.op.vg_name,
3579
                                              constants.MIN_VG_SIZE)
3580
        if vgstatus:
3581
          raise errors.OpPrereqError("Error on node '%s': %s" %
3582
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3583

    
3584
    if self.op.drbd_helper:
3585
      # checks given drbd helper on all nodes
3586
      helpers = self.rpc.call_drbd_helper(node_list)
3587
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3588
        if ninfo.offline:
3589
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3590
          continue
3591
        msg = helpers[node].fail_msg
3592
        if msg:
3593
          raise errors.OpPrereqError("Error checking drbd helper on node"
3594
                                     " '%s': %s" % (node, msg),
3595
                                     errors.ECODE_ENVIRON)
3596
        node_helper = helpers[node].payload
3597
        if node_helper != self.op.drbd_helper:
3598
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3599
                                     (node, node_helper), errors.ECODE_ENVIRON)
3600

    
3601
    self.cluster = cluster = self.cfg.GetClusterInfo()
3602
    # validate params changes
3603
    if self.op.beparams:
3604
      objects.UpgradeBeParams(self.op.beparams)
3605
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3606
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3607

    
3608
    if self.op.ndparams:
3609
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3610
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3611

    
3612
      # TODO: we need a more general way to handle resetting
3613
      # cluster-level parameters to default values
3614
      if self.new_ndparams["oob_program"] == "":
3615
        self.new_ndparams["oob_program"] = \
3616
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3617

    
3618
    if self.op.nicparams:
3619
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3620
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3621
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3622
      nic_errors = []
3623

    
3624
      # check all instances for consistency
3625
      for instance in self.cfg.GetAllInstancesInfo().values():
3626
        for nic_idx, nic in enumerate(instance.nics):
3627
          params_copy = copy.deepcopy(nic.nicparams)
3628
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3629

    
3630
          # check parameter syntax
3631
          try:
3632
            objects.NIC.CheckParameterSyntax(params_filled)
3633
          except errors.ConfigurationError, err:
3634
            nic_errors.append("Instance %s, nic/%d: %s" %
3635
                              (instance.name, nic_idx, err))
3636

    
3637
          # if we're moving instances to routed, check that they have an ip
3638
          target_mode = params_filled[constants.NIC_MODE]
3639
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3640
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3641
                              " address" % (instance.name, nic_idx))
3642
      if nic_errors:
3643
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3644
                                   "\n".join(nic_errors))
3645

    
3646
    # hypervisor list/parameters
3647
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3648
    if self.op.hvparams:
3649
      for hv_name, hv_dict in self.op.hvparams.items():
3650
        if hv_name not in self.new_hvparams:
3651
          self.new_hvparams[hv_name] = hv_dict
3652
        else:
3653
          self.new_hvparams[hv_name].update(hv_dict)
3654

    
3655
    # disk template parameters
3656
    self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3657
    if self.op.diskparams:
3658
      for dt_name, dt_params in self.op.diskparams.items():
3659
        if dt_name not in self.op.diskparams:
3660
          self.new_diskparams[dt_name] = dt_params
3661
        else:
3662
          self.new_diskparams[dt_name].update(dt_params)
3663

    
3664
    # os hypervisor parameters
3665
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3666
    if self.op.os_hvp:
3667
      for os_name, hvs in self.op.os_hvp.items():
3668
        if os_name not in self.new_os_hvp:
3669
          self.new_os_hvp[os_name] = hvs
3670
        else:
3671
          for hv_name, hv_dict in hvs.items():
3672
            if hv_name not in self.new_os_hvp[os_name]:
3673
              self.new_os_hvp[os_name][hv_name] = hv_dict
3674
            else:
3675
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3676

    
3677
    # os parameters
3678
    self.new_osp = objects.FillDict(cluster.osparams, {})
3679
    if self.op.osparams:
3680
      for os_name, osp in self.op.osparams.items():
3681
        if os_name not in self.new_osp:
3682
          self.new_osp[os_name] = {}
3683

    
3684
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3685
                                                  use_none=True)
3686

    
3687
        if not self.new_osp[os_name]:
3688
          # we removed all parameters
3689
          del self.new_osp[os_name]
3690
        else:
3691
          # check the parameter validity (remote check)
3692
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3693
                         os_name, self.new_osp[os_name])
3694

    
3695
    # changes to the hypervisor list
3696
    if self.op.enabled_hypervisors is not None:
3697
      self.hv_list = self.op.enabled_hypervisors
3698
      for hv in self.hv_list:
3699
        # if the hypervisor doesn't already exist in the cluster
3700
        # hvparams, we initialize it to empty, and then (in both
3701
        # cases) we make sure to fill the defaults, as we might not
3702
        # have a complete defaults list if the hypervisor wasn't
3703
        # enabled before
3704
        if hv not in new_hvp:
3705
          new_hvp[hv] = {}
3706
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3707
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3708
    else:
3709
      self.hv_list = cluster.enabled_hypervisors
3710

    
3711
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3712
      # either the enabled list has changed, or the parameters have, validate
3713
      for hv_name, hv_params in self.new_hvparams.items():
3714
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3715
            (self.op.enabled_hypervisors and
3716
             hv_name in self.op.enabled_hypervisors)):
3717
          # either this is a new hypervisor, or its parameters have changed
3718
          hv_class = hypervisor.GetHypervisor(hv_name)
3719
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3720
          hv_class.CheckParameterSyntax(hv_params)
3721
          _CheckHVParams(self, node_list, hv_name, hv_params)
3722

    
3723
    if self.op.os_hvp:
3724
      # no need to check any newly-enabled hypervisors, since the
3725
      # defaults have already been checked in the above code-block
3726
      for os_name, os_hvp in self.new_os_hvp.items():
3727
        for hv_name, hv_params in os_hvp.items():
3728
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3729
          # we need to fill in the new os_hvp on top of the actual hv_p
3730
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3731
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3732
          hv_class = hypervisor.GetHypervisor(hv_name)
3733
          hv_class.CheckParameterSyntax(new_osp)
3734
          _CheckHVParams(self, node_list, hv_name, new_osp)
3735

    
3736
    if self.op.default_iallocator:
3737
      alloc_script = utils.FindFile(self.op.default_iallocator,
3738
                                    constants.IALLOCATOR_SEARCH_PATH,
3739
                                    os.path.isfile)
3740
      if alloc_script is None:
3741
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3742
                                   " specified" % self.op.default_iallocator,
3743
                                   errors.ECODE_INVAL)
3744

    
3745
  def Exec(self, feedback_fn):
3746
    """Change the parameters of the cluster.
3747

3748
    """
3749
    if self.op.vg_name is not None:
3750
      new_volume = self.op.vg_name
3751
      if not new_volume:
3752
        new_volume = None
3753
      if new_volume != self.cfg.GetVGName():
3754
        self.cfg.SetVGName(new_volume)
3755
      else:
3756
        feedback_fn("Cluster LVM configuration already in desired"
3757
                    " state, not changing")
3758
    if self.op.drbd_helper is not None:
3759
      new_helper = self.op.drbd_helper
3760
      if not new_helper:
3761
        new_helper = None
3762
      if new_helper != self.cfg.GetDRBDHelper():
3763
        self.cfg.SetDRBDHelper(new_helper)
3764
      else:
3765
        feedback_fn("Cluster DRBD helper already in desired state,"
3766
                    " not changing")
3767
    if self.op.hvparams:
3768
      self.cluster.hvparams = self.new_hvparams
3769
    if self.op.os_hvp:
3770
      self.cluster.os_hvp = self.new_os_hvp
3771
    if self.op.enabled_hypervisors is not None:
3772
      self.cluster.hvparams = self.new_hvparams
3773
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3774
    if self.op.beparams:
3775
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3776
    if self.op.nicparams:
3777
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3778
    if self.op.osparams:
3779
      self.cluster.osparams = self.new_osp
3780
    if self.op.ndparams:
3781
      self.cluster.ndparams = self.new_ndparams
3782
    if self.op.diskparams:
3783
      self.cluster.diskparams = self.new_diskparams
3784

    
3785
    if self.op.candidate_pool_size is not None:
3786
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3787
      # we need to update the pool size here, otherwise the save will fail
3788
      _AdjustCandidatePool(self, [])
3789

    
3790
    if self.op.maintain_node_health is not None:
3791
      if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3792
        feedback_fn("Note: CONFD was disabled at build time, node health"
3793
                    " maintenance is not useful (still enabling it)")
3794
      self.cluster.maintain_node_health = self.op.maintain_node_health
3795

    
3796
    if self.op.prealloc_wipe_disks is not None:
3797
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3798

    
3799
    if self.op.add_uids is not None:
3800
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3801

    
3802
    if self.op.remove_uids is not None:
3803
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3804

    
3805
    if self.op.uid_pool is not None:
3806
      self.cluster.uid_pool = self.op.uid_pool
3807

    
3808
    if self.op.default_iallocator is not None:
3809
      self.cluster.default_iallocator = self.op.default_iallocator
3810

    
3811
    if self.op.reserved_lvs is not None:
3812
      self.cluster.reserved_lvs = self.op.reserved_lvs
3813

    
3814
    if self.op.use_external_mip_script is not None:
3815
      self.cluster.use_external_mip_script = self.op.use_external_mip_script
3816

    
3817
    def helper_os(aname, mods, desc):
3818
      desc += " OS list"
3819
      lst = getattr(self.cluster, aname)
3820
      for key, val in mods:
3821
        if key == constants.DDM_ADD:
3822
          if val in lst:
3823
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3824
          else:
3825
            lst.append(val)
3826
        elif key == constants.DDM_REMOVE:
3827
          if val in lst:
3828
            lst.remove(val)
3829
          else:
3830
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3831
        else:
3832
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3833

    
3834
    if self.op.hidden_os:
3835
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3836

    
3837
    if self.op.blacklisted_os:
3838
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3839

    
3840
    if self.op.master_netdev:
3841
      master_params = self.cfg.GetMasterNetworkParameters()
3842
      ems = self.cfg.GetUseExternalMipScript()
3843
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3844
                  self.cluster.master_netdev)
3845
      result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3846
                                                       master_params, ems)
3847
      result.Raise("Could not disable the master ip")
3848
      feedback_fn("Changing master_netdev from %s to %s" %
3849
                  (master_params.netdev, self.op.master_netdev))
3850
      self.cluster.master_netdev = self.op.master_netdev
3851

    
3852
    if self.op.master_netmask:
3853
      master_params = self.cfg.GetMasterNetworkParameters()
3854
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3855
      result = self.rpc.call_node_change_master_netmask(master_params.name,
3856
                                                        master_params.netmask,
3857
                                                        self.op.master_netmask,
3858
                                                        master_params.ip,
3859
                                                        master_params.netdev)
3860
      if result.fail_msg:
3861
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3862
        feedback_fn(msg)
3863

    
3864
      self.cluster.master_netmask = self.op.master_netmask
3865

    
3866
    self.cfg.Update(self.cluster, feedback_fn)
3867

    
3868
    if self.op.master_netdev:
3869
      master_params = self.cfg.GetMasterNetworkParameters()
3870
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3871
                  self.op.master_netdev)
3872
      ems = self.cfg.GetUseExternalMipScript()
3873
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3874
                                                     master_params, ems)
3875
      if result.fail_msg:
3876
        self.LogWarning("Could not re-enable the master ip on"
3877
                        " the master, please restart manually: %s",
3878
                        result.fail_msg)
3879

    
3880

    
3881
def _UploadHelper(lu, nodes, fname):
3882
  """Helper for uploading a file and showing warnings.
3883

3884
  """
3885
  if os.path.exists(fname):
3886
    result = lu.rpc.call_upload_file(nodes, fname)
3887
    for to_node, to_result in result.items():
3888
      msg = to_result.fail_msg
3889
      if msg:
3890
        msg = ("Copy of file %s to node %s failed: %s" %
3891
               (fname, to_node, msg))
3892
        lu.proc.LogWarning(msg)
3893

    
3894

    
3895
def _ComputeAncillaryFiles(cluster, redist):
3896
  """Compute files external to Ganeti which need to be consistent.
3897

3898
  @type redist: boolean
3899
  @param redist: Whether to include files which need to be redistributed
3900

3901
  """
3902
  # Compute files for all nodes
3903
  files_all = set([
3904
    constants.SSH_KNOWN_HOSTS_FILE,
3905
    constants.CONFD_HMAC_KEY,
3906
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3907
    constants.SPICE_CERT_FILE,
3908
    constants.SPICE_CACERT_FILE,
3909
    constants.RAPI_USERS_FILE,
3910
    ])
3911

    
3912
  if not redist:
3913
    files_all.update(constants.ALL_CERT_FILES)
3914
    files_all.update(ssconf.SimpleStore().GetFileList())
3915
  else:
3916
    # we need to ship at least the RAPI certificate
3917
    files_all.add(constants.RAPI_CERT_FILE)
3918

    
3919
  if cluster.modify_etc_hosts:
3920
    files_all.add(constants.ETC_HOSTS)
3921

    
3922
  # Files which are optional, these must:
3923
  # - be present in one other category as well
3924
  # - either exist or not exist on all nodes of that category (mc, vm all)
3925
  files_opt = set([
3926
    constants.RAPI_USERS_FILE,
3927
    ])
3928

    
3929
  # Files which should only be on master candidates
3930
  files_mc = set()
3931

    
3932
  if not redist:
3933
    files_mc.add(constants.CLUSTER_CONF_FILE)
3934

    
3935
    # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3936
    # replication
3937
    files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
3938

    
3939
  # Files which should only be on VM-capable nodes
3940
  files_vm = set(filename
3941
    for hv_name in cluster.enabled_hypervisors
3942
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3943

    
3944
  files_opt |= set(filename
3945
    for hv_name in cluster.enabled_hypervisors
3946
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3947

    
3948
  # Filenames in each category must be unique
3949
  all_files_set = files_all | files_mc | files_vm
3950
  assert (len(all_files_set) ==
3951
          sum(map(len, [files_all, files_mc, files_vm]))), \
3952
         "Found file listed in more than one file list"
3953

    
3954
  # Optional files must be present in one other category
3955
  assert all_files_set.issuperset(files_opt), \
3956
         "Optional file not in a different required list"
3957

    
3958
  return (files_all, files_opt, files_mc, files_vm)
3959

    
3960

    
3961
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3962
  """Distribute additional files which are part of the cluster configuration.
3963

3964
  ConfigWriter takes care of distributing the config and ssconf files, but
3965
  there are more files which should be distributed to all nodes. This function
3966
  makes sure those are copied.
3967

3968
  @param lu: calling logical unit
3969
  @param additional_nodes: list of nodes not in the config to distribute to
3970
  @type additional_vm: boolean
3971
  @param additional_vm: whether the additional nodes are vm-capable or not
3972

3973
  """
3974
  # Gather target nodes
3975
  cluster = lu.cfg.GetClusterInfo()
3976
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3977

    
3978
  online_nodes = lu.cfg.GetOnlineNodeList()
3979
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3980

    
3981
  if additional_nodes is not None:
3982
    online_nodes.extend(additional_nodes)
3983
    if additional_vm:
3984
      vm_nodes.extend(additional_nodes)
3985

    
3986
  # Never distribute to master node
3987
  for nodelist in [online_nodes, vm_nodes]:
3988
    if master_info.name in nodelist:
3989
      nodelist.remove(master_info.name)
3990

    
3991
  # Gather file lists
3992
  (files_all, _, files_mc, files_vm) = \
3993
    _ComputeAncillaryFiles(cluster, True)
3994

    
3995
  # Never re-distribute configuration file from here
3996
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3997
              constants.CLUSTER_CONF_FILE in files_vm)
3998
  assert not files_mc, "Master candidates not handled in this function"
3999

    
4000
  filemap = [
4001
    (online_nodes, files_all),
4002
    (vm_nodes, files_vm),
4003
    ]
4004

    
4005
  # Upload the files
4006
  for (node_list, files) in filemap:
4007
    for fname in files:
4008
      _UploadHelper(lu, node_list, fname)
4009

    
4010

    
4011
class LUClusterRedistConf(NoHooksLU):
4012
  """Force the redistribution of cluster configuration.
4013

4014
  This is a very simple LU.
4015

4016
  """
4017
  REQ_BGL = False
4018

    
4019
  def ExpandNames(self):
4020
    self.needed_locks = {
4021
      locking.LEVEL_NODE: locking.ALL_SET,
4022
    }
4023
    self.share_locks[locking.LEVEL_NODE] = 1
4024

    
4025
  def Exec(self, feedback_fn):
4026
    """Redistribute the configuration.
4027

4028
    """
4029
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4030
    _RedistributeAncillaryFiles(self)
4031

    
4032

    
4033
class LUClusterActivateMasterIp(NoHooksLU):
4034
  """Activate the master IP on the master node.
4035

4036
  """
4037
  def Exec(self, feedback_fn):
4038
    """Activate the master IP.
4039

4040
    """
4041
    master_params = self.cfg.GetMasterNetworkParameters()
4042
    ems = self.cfg.GetUseExternalMipScript()
4043
    result = self.rpc.call_node_activate_master_ip(master_params.name,
4044
                                                   master_params, ems)
4045
    result.Raise("Could not activate the master IP")
4046

    
4047

    
4048
class LUClusterDeactivateMasterIp(NoHooksLU):
4049
  """Deactivate the master IP on the master node.
4050

4051
  """
4052
  def Exec(self, feedback_fn):
4053
    """Deactivate the master IP.
4054

4055
    """
4056
    master_params = self.cfg.GetMasterNetworkParameters()
4057
    ems = self.cfg.GetUseExternalMipScript()
4058
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4059
                                                     master_params, ems)
4060
    result.Raise("Could not deactivate the master IP")
4061

    
4062

    
4063
def _WaitForSync(lu, instance, disks=None, oneshot=False):
4064
  """Sleep and poll for an instance's disk to sync.
4065

4066
  """
4067
  if not instance.disks or disks is not None and not disks:
4068
    return True
4069

    
4070
  disks = _ExpandCheckDisks(instance, disks)
4071

    
4072
  if not oneshot:
4073
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4074

    
4075
  node = instance.primary_node
4076

    
4077
  for dev in disks:
4078
    lu.cfg.SetDiskID(dev, node)
4079

    
4080
  # TODO: Convert to utils.Retry
4081

    
4082
  retries = 0
4083
  degr_retries = 10 # in seconds, as we sleep 1 second each time
4084
  while True:
4085
    max_time = 0
4086
    done = True
4087
    cumul_degraded = False
4088
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4089
    msg = rstats.fail_msg
4090
    if msg:
4091
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4092
      retries += 1
4093
      if retries >= 10:
4094
        raise errors.RemoteError("Can't contact node %s for mirror data,"
4095
                                 " aborting." % node)
4096
      time.sleep(6)
4097
      continue
4098
    rstats = rstats.payload
4099
    retries = 0
4100
    for i, mstat in enumerate(rstats):
4101
      if mstat is None:
4102
        lu.LogWarning("Can't compute data for node %s/%s",
4103
                           node, disks[i].iv_name)
4104
        continue
4105

    
4106
      cumul_degraded = (cumul_degraded or
4107
                        (mstat.is_degraded and mstat.sync_percent is None))
4108
      if mstat.sync_percent is not None:
4109
        done = False
4110
        if mstat.estimated_time is not None:
4111
          rem_time = ("%s remaining (estimated)" %
4112
                      utils.FormatSeconds(mstat.estimated_time))
4113
          max_time = mstat.estimated_time
4114
        else:
4115
          rem_time = "no time estimate"
4116
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4117
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
4118

    
4119
    # if we're done but degraded, let's do a few small retries, to
4120
    # make sure we see a stable and not transient situation; therefore
4121
    # we force restart of the loop
4122
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
4123
      logging.info("Degraded disks found, %d retries left", degr_retries)
4124
      degr_retries -= 1
4125
      time.sleep(1)
4126
      continue
4127

    
4128
    if done or oneshot:
4129
      break
4130

    
4131
    time.sleep(min(60, max_time))
4132

    
4133
  if done:
4134
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4135
  return not cumul_degraded
4136

    
4137

    
4138
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4139
  """Check that mirrors are not degraded.
4140

4141
  The ldisk parameter, if True, will change the test from the
4142
  is_degraded attribute (which represents overall non-ok status for
4143
  the device(s)) to the ldisk (representing the local storage status).
4144

4145
  """
4146
  lu.cfg.SetDiskID(dev, node)
4147

    
4148
  result = True
4149

    
4150
  if on_primary or dev.AssembleOnSecondary():
4151
    rstats = lu.rpc.call_blockdev_find(node, dev)
4152
    msg = rstats.fail_msg
4153
    if msg:
4154
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4155
      result = False
4156
    elif not rstats.payload:
4157
      lu.LogWarning("Can't find disk on node %s", node)
4158
      result = False
4159
    else:
4160
      if ldisk:
4161
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4162
      else:
4163
        result = result and not rstats.payload.is_degraded
4164

    
4165
  if dev.children:
4166
    for child in dev.children:
4167
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4168

    
4169
  return result
4170

    
4171

    
4172
class LUOobCommand(NoHooksLU):
4173
  """Logical unit for OOB handling.
4174

4175
  """
4176
  REG_BGL = False
4177
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4178

    
4179
  def ExpandNames(self):
4180
    """Gather locks we need.
4181

4182
    """
4183
    if self.op.node_names:
4184
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4185
      lock_names = self.op.node_names
4186
    else:
4187
      lock_names = locking.ALL_SET
4188

    
4189
    self.needed_locks = {
4190
      locking.LEVEL_NODE: lock_names,
4191
      }
4192

    
4193
  def CheckPrereq(self):
4194
    """Check prerequisites.
4195

4196
    This checks:
4197
     - the node exists in the configuration
4198
     - OOB is supported
4199

4200
    Any errors are signaled by raising errors.OpPrereqError.
4201

4202
    """
4203
    self.nodes = []
4204
    self.master_node = self.cfg.GetMasterNode()
4205

    
4206
    assert self.op.power_delay >= 0.0
4207

    
4208
    if self.op.node_names:
4209
      if (self.op.command in self._SKIP_MASTER and
4210
          self.master_node in self.op.node_names):
4211
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4212
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4213

    
4214
        if master_oob_handler:
4215
          additional_text = ("run '%s %s %s' if you want to operate on the"
4216
                             " master regardless") % (master_oob_handler,
4217
                                                      self.op.command,
4218
                                                      self.master_node)
4219
        else:
4220
          additional_text = "it does not support out-of-band operations"
4221

    
4222
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4223
                                    " allowed for %s; %s") %
4224
                                   (self.master_node, self.op.command,
4225
                                    additional_text), errors.ECODE_INVAL)
4226
    else:
4227
      self.op.node_names = self.cfg.GetNodeList()
4228
      if self.op.command in self._SKIP_MASTER:
4229
        self.op.node_names.remove(self.master_node)
4230

    
4231
    if self.op.command in self._SKIP_MASTER:
4232
      assert self.master_node not in self.op.node_names
4233

    
4234
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4235
      if node is None:
4236
        raise errors.OpPrereqError("Node %s not found" % node_name,
4237
                                   errors.ECODE_NOENT)
4238
      else:
4239
        self.nodes.append(node)
4240

    
4241
      if (not self.op.ignore_status and
4242
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4243
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4244
                                    " not marked offline") % node_name,
4245
                                   errors.ECODE_STATE)
4246

    
4247
  def Exec(self, feedback_fn):
4248
    """Execute OOB and return result if we expect any.
4249

4250
    """
4251
    master_node = self.master_node
4252
    ret = []
4253

    
4254
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4255
                                              key=lambda node: node.name)):
4256
      node_entry = [(constants.RS_NORMAL, node.name)]
4257
      ret.append(node_entry)
4258

    
4259
      oob_program = _SupportsOob(self.cfg, node)
4260

    
4261
      if not oob_program:
4262
        node_entry.append((constants.RS_UNAVAIL, None))
4263
        continue
4264

    
4265
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4266
                   self.op.command, oob_program, node.name)
4267
      result = self.rpc.call_run_oob(master_node, oob_program,
4268
                                     self.op.command, node.name,
4269
                                     self.op.timeout)
4270

    
4271
      if result.fail_msg:
4272
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4273
                        node.name, result.fail_msg)
4274
        node_entry.append((constants.RS_NODATA, None))
4275
      else:
4276
        try:
4277
          self._CheckPayload(result)
4278
        except errors.OpExecError, err:
4279
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4280
                          node.name, err)
4281
          node_entry.append((constants.RS_NODATA, None))
4282
        else:
4283
          if self.op.command == constants.OOB_HEALTH:
4284
            # For health we should log important events
4285
            for item, status in result.payload:
4286
              if status in [constants.OOB_STATUS_WARNING,
4287
                            constants.OOB_STATUS_CRITICAL]:
4288
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4289
                                item, node.name, status)
4290

    
4291
          if self.op.command == constants.OOB_POWER_ON:
4292
            node.powered = True
4293
          elif self.op.command == constants.OOB_POWER_OFF:
4294
            node.powered = False
4295
          elif self.op.command == constants.OOB_POWER_STATUS:
4296
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4297
            if powered != node.powered:
4298
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4299
                               " match actual power state (%s)"), node.powered,
4300
                              node.name, powered)
4301

    
4302
          # For configuration changing commands we should update the node
4303
          if self.op.command in (constants.OOB_POWER_ON,
4304
                                 constants.OOB_POWER_OFF):
4305
            self.cfg.Update(node, feedback_fn)
4306

    
4307
          node_entry.append((constants.RS_NORMAL, result.payload))
4308

    
4309
          if (self.op.command == constants.OOB_POWER_ON and
4310
              idx < len(self.nodes) - 1):
4311
            time.sleep(self.op.power_delay)
4312

    
4313
    return ret
4314

    
4315
  def _CheckPayload(self, result):
4316
    """Checks if the payload is valid.
4317

4318
    @param result: RPC result
4319
    @raises errors.OpExecError: If payload is not valid
4320

4321
    """
4322
    errs = []
4323
    if self.op.command == constants.OOB_HEALTH:
4324
      if not isinstance(result.payload, list):
4325
        errs.append("command 'health' is expected to return a list but got %s" %
4326
                    type(result.payload))
4327
      else:
4328
        for item, status in result.payload:
4329
          if status not in constants.OOB_STATUSES:
4330
            errs.append("health item '%s' has invalid status '%s'" %
4331
                        (item, status))
4332

    
4333
    if self.op.command == constants.OOB_POWER_STATUS:
4334
      if not isinstance(result.payload, dict):
4335
        errs.append("power-status is expected to return a dict but got %s" %
4336
                    type(result.payload))
4337

    
4338
    if self.op.command in [
4339
        constants.OOB_POWER_ON,
4340
        constants.OOB_POWER_OFF,
4341
        constants.OOB_POWER_CYCLE,
4342
        ]:
4343
      if result.payload is not None:
4344
        errs.append("%s is expected to not return payload but got '%s'" %
4345
                    (self.op.command, result.payload))
4346

    
4347
    if errs:
4348
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4349
                               utils.CommaJoin(errs))
4350

    
4351

    
4352
class _OsQuery(_QueryBase):
4353
  FIELDS = query.OS_FIELDS
4354

    
4355
  def ExpandNames(self, lu):
4356
    # Lock all nodes in shared mode
4357
    # Temporary removal of locks, should be reverted later
4358
    # TODO: reintroduce locks when they are lighter-weight
4359
    lu.needed_locks = {}
4360
    #self.share_locks[locking.LEVEL_NODE] = 1
4361
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4362

    
4363
    # The following variables interact with _QueryBase._GetNames
4364
    if self.names:
4365
      self.wanted = self.names
4366
    else:
4367
      self.wanted = locking.ALL_SET
4368

    
4369
    self.do_locking = self.use_locking
4370

    
4371
  def DeclareLocks(self, lu, level):
4372
    pass
4373

    
4374
  @staticmethod
4375
  def _DiagnoseByOS(rlist):
4376
    """Remaps a per-node return list into an a per-os per-node dictionary
4377

4378
    @param rlist: a map with node names as keys and OS objects as values
4379

4380
    @rtype: dict
4381
    @return: a dictionary with osnames as keys and as value another
4382
        map, with nodes as keys and tuples of (path, status, diagnose,
4383
        variants, parameters, api_versions) as values, eg::
4384

4385
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4386
                                     (/srv/..., False, "invalid api")],
4387
                           "node2": [(/srv/..., True, "", [], [])]}
4388
          }
4389

4390
    """
4391
    all_os = {}
4392
    # we build here the list of nodes that didn't fail the RPC (at RPC
4393
    # level), so that nodes with a non-responding node daemon don't
4394
    # make all OSes invalid
4395
    good_nodes = [node_name for node_name in rlist
4396
                  if not rlist[node_name].fail_msg]
4397
    for node_name, nr in rlist.items():
4398
      if nr.fail_msg or not nr.payload:
4399
        continue
4400
      for (name, path, status, diagnose, variants,
4401
           params, api_versions) in nr.payload:
4402
        if name not in all_os:
4403
          # build a list of nodes for this os containing empty lists
4404
          # for each node in node_list
4405
          all_os[name] = {}
4406
          for nname in good_nodes:
4407
            all_os[name][nname] = []
4408
        # convert params from [name, help] to (name, help)
4409
        params = [tuple(v) for v in params]
4410
        all_os[name][node_name].append((path, status, diagnose,
4411
                                        variants, params, api_versions))
4412
    return all_os
4413

    
4414
  def _GetQueryData(self, lu):
4415
    """Computes the list of nodes and their attributes.
4416

4417
    """
4418
    # Locking is not used
4419
    assert not (compat.any(lu.glm.is_owned(level)
4420
                           for level in locking.LEVELS
4421
                           if level != locking.LEVEL_CLUSTER) or
4422
                self.do_locking or self.use_locking)
4423

    
4424
    valid_nodes = [node.name
4425
                   for node in lu.cfg.GetAllNodesInfo().values()
4426
                   if not node.offline and node.vm_capable]
4427
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4428
    cluster = lu.cfg.GetClusterInfo()
4429

    
4430
    data = {}
4431

    
4432
    for (os_name, os_data) in pol.items():
4433
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4434
                          hidden=(os_name in cluster.hidden_os),
4435
                          blacklisted=(os_name in cluster.blacklisted_os))
4436

    
4437
      variants = set()
4438
      parameters = set()
4439
      api_versions = set()
4440

    
4441
      for idx, osl in enumerate(os_data.values()):
4442
        info.valid = bool(info.valid and osl and osl[0][1])
4443
        if not info.valid:
4444
          break
4445

    
4446
        (node_variants, node_params, node_api) = osl[0][3:6]
4447
        if idx == 0:
4448
          # First entry
4449
          variants.update(node_variants)
4450
          parameters.update(node_params)
4451
          api_versions.update(node_api)
4452
        else:
4453
          # Filter out inconsistent values
4454
          variants.intersection_update(node_variants)
4455
          parameters.intersection_update(node_params)
4456
          api_versions.intersection_update(node_api)
4457

    
4458
      info.variants = list(variants)
4459
      info.parameters = list(parameters)
4460
      info.api_versions = list(api_versions)
4461

    
4462
      data[os_name] = info
4463

    
4464
    # Prepare data in requested order
4465
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4466
            if name in data]
4467

    
4468

    
4469
class LUOsDiagnose(NoHooksLU):
4470
  """Logical unit for OS diagnose/query.
4471

4472
  """
4473
  REQ_BGL = False
4474

    
4475
  @staticmethod
4476
  def _BuildFilter(fields, names):
4477
    """Builds a filter for querying OSes.
4478

4479
    """
4480
    name_filter = qlang.MakeSimpleFilter("name", names)
4481

    
4482
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4483
    # respective field is not requested
4484
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4485
                     for fname in ["hidden", "blacklisted"]
4486
                     if fname not in fields]
4487
    if "valid" not in fields:
4488
      status_filter.append([qlang.OP_TRUE, "valid"])
4489

    
4490
    if status_filter:
4491
      status_filter.insert(0, qlang.OP_AND)
4492
    else:
4493
      status_filter = None
4494

    
4495
    if name_filter and status_filter:
4496
      return [qlang.OP_AND, name_filter, status_filter]
4497
    elif name_filter:
4498
      return name_filter
4499
    else:
4500
      return status_filter
4501

    
4502
  def CheckArguments(self):
4503
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4504
                       self.op.output_fields, False)
4505

    
4506
  def ExpandNames(self):
4507
    self.oq.ExpandNames(self)
4508

    
4509
  def Exec(self, feedback_fn):
4510
    return self.oq.OldStyleQuery(self)
4511

    
4512

    
4513
class LUNodeRemove(LogicalUnit):
4514
  """Logical unit for removing a node.
4515

4516
  """
4517
  HPATH = "node-remove"
4518
  HTYPE = constants.HTYPE_NODE
4519

    
4520
  def BuildHooksEnv(self):
4521
    """Build hooks env.
4522

4523
    This doesn't run on the target node in the pre phase as a failed
4524
    node would then be impossible to remove.
4525

4526
    """
4527
    return {
4528
      "OP_TARGET": self.op.node_name,
4529
      "NODE_NAME": self.op.node_name,
4530
      }
4531

    
4532
  def BuildHooksNodes(self):
4533
    """Build hooks nodes.
4534

4535
    """
4536
    all_nodes = self.cfg.GetNodeList()
4537
    try:
4538
      all_nodes.remove(self.op.node_name)
4539
    except ValueError:
4540
      logging.warning("Node '%s', which is about to be removed, was not found"
4541
                      " in the list of all nodes", self.op.node_name)
4542
    return (all_nodes, all_nodes)
4543

    
4544
  def CheckPrereq(self):
4545
    """Check prerequisites.
4546

4547
    This checks:
4548
     - the node exists in the configuration
4549
     - it does not have primary or secondary instances
4550
     - it's not the master
4551

4552
    Any errors are signaled by raising errors.OpPrereqError.
4553

4554
    """
4555
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4556
    node = self.cfg.GetNodeInfo(self.op.node_name)
4557
    assert node is not None
4558

    
4559
    masternode = self.cfg.GetMasterNode()
4560
    if node.name == masternode:
4561
      raise errors.OpPrereqError("Node is the master node, failover to another"
4562
                                 " node is required", errors.ECODE_INVAL)
4563

    
4564
    for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4565
      if node.name in instance.all_nodes:
4566
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4567
                                   " please remove first" % instance_name,
4568
                                   errors.ECODE_INVAL)
4569
    self.op.node_name = node.name
4570
    self.node = node
4571

    
4572
  def Exec(self, feedback_fn):
4573
    """Removes the node from the cluster.
4574

4575
    """
4576
    node = self.node
4577
    logging.info("Stopping the node daemon and removing configs from node %s",
4578
                 node.name)
4579

    
4580
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4581

    
4582
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4583
      "Not owning BGL"
4584

    
4585
    # Promote nodes to master candidate as needed
4586
    _AdjustCandidatePool(self, exceptions=[node.name])
4587
    self.context.RemoveNode(node.name)
4588

    
4589
    # Run post hooks on the node before it's removed
4590
    _RunPostHook(self, node.name)
4591

    
4592
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4593
    msg = result.fail_msg
4594
    if msg:
4595
      self.LogWarning("Errors encountered on the remote node while leaving"
4596
                      " the cluster: %s", msg)
4597

    
4598
    # Remove node from our /etc/hosts
4599
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4600
      master_node = self.cfg.GetMasterNode()
4601
      result = self.rpc.call_etc_hosts_modify(master_node,
4602
                                              constants.ETC_HOSTS_REMOVE,
4603
                                              node.name, None)
4604
      result.Raise("Can't update hosts file with new host data")
4605
      _RedistributeAncillaryFiles(self)
4606

    
4607

    
4608
class _NodeQuery(_QueryBase):
4609
  FIELDS = query.NODE_FIELDS
4610

    
4611
  def ExpandNames(self, lu):
4612
    lu.needed_locks = {}
4613
    lu.share_locks = _ShareAll()
4614

    
4615
    if self.names:
4616
      self.wanted = _GetWantedNodes(lu, self.names)
4617
    else:
4618
      self.wanted = locking.ALL_SET
4619

    
4620
    self.do_locking = (self.use_locking and
4621
                       query.NQ_LIVE in self.requested_data)
4622

    
4623
    if self.do_locking:
4624
      # If any non-static field is requested we need to lock the nodes
4625
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4626

    
4627
  def DeclareLocks(self, lu, level):
4628
    pass
4629

    
4630
  def _GetQueryData(self, lu):
4631
    """Computes the list of nodes and their attributes.
4632

4633
    """
4634
    all_info = lu.cfg.GetAllNodesInfo()
4635

    
4636
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4637

    
4638
    # Gather data as requested
4639
    if query.NQ_LIVE in self.requested_data:
4640
      # filter out non-vm_capable nodes
4641
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4642

    
4643
      node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4644
                                        [lu.cfg.GetHypervisorType()])
4645
      live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4646
                       for (name, nresult) in node_data.items()
4647
                       if not nresult.fail_msg and nresult.payload)
4648
    else:
4649
      live_data = None
4650

    
4651
    if query.NQ_INST in self.requested_data:
4652
      node_to_primary = dict([(name, set()) for name in nodenames])
4653
      node_to_secondary = dict([(name, set()) for name in nodenames])
4654

    
4655
      inst_data = lu.cfg.GetAllInstancesInfo()
4656

    
4657
      for inst in inst_data.values():
4658
        if inst.primary_node in node_to_primary:
4659
          node_to_primary[inst.primary_node].add(inst.name)
4660
        for secnode in inst.secondary_nodes:
4661
          if secnode in node_to_secondary:
4662
            node_to_secondary[secnode].add(inst.name)
4663
    else:
4664
      node_to_primary = None
4665
      node_to_secondary = None
4666

    
4667
    if query.NQ_OOB in self.requested_data:
4668
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4669
                         for name, node in all_info.iteritems())
4670
    else:
4671
      oob_support = None
4672

    
4673
    if query.NQ_GROUP in self.requested_data:
4674
      groups = lu.cfg.GetAllNodeGroupsInfo()
4675
    else:
4676
      groups = {}
4677

    
4678
    return query.NodeQueryData([all_info[name] for name in nodenames],
4679
                               live_data, lu.cfg.GetMasterNode(),
4680
                               node_to_primary, node_to_secondary, groups,
4681
                               oob_support, lu.cfg.GetClusterInfo())
4682

    
4683

    
4684
class LUNodeQuery(NoHooksLU):
4685
  """Logical unit for querying nodes.
4686

4687
  """
4688
  # pylint: disable=W0142
4689
  REQ_BGL = False
4690

    
4691
  def CheckArguments(self):
4692
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4693
                         self.op.output_fields, self.op.use_locking)
4694

    
4695
  def ExpandNames(self):
4696
    self.nq.ExpandNames(self)
4697

    
4698
  def DeclareLocks(self, level):
4699
    self.nq.DeclareLocks(self, level)
4700

    
4701
  def Exec(self, feedback_fn):
4702
    return self.nq.OldStyleQuery(self)
4703

    
4704

    
4705
class LUNodeQueryvols(NoHooksLU):
4706
  """Logical unit for getting volumes on node(s).
4707

4708
  """
4709
  REQ_BGL = False
4710
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4711
  _FIELDS_STATIC = utils.FieldSet("node")
4712

    
4713
  def CheckArguments(self):
4714
    _CheckOutputFields(static=self._FIELDS_STATIC,
4715
                       dynamic=self._FIELDS_DYNAMIC,
4716
                       selected=self.op.output_fields)
4717

    
4718
  def ExpandNames(self):
4719
    self.share_locks = _ShareAll()
4720
    self.needed_locks = {}
4721

    
4722
    if not self.op.nodes:
4723
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4724
    else:
4725
      self.needed_locks[locking.LEVEL_NODE] = \
4726
        _GetWantedNodes(self, self.op.nodes)
4727

    
4728
  def Exec(self, feedback_fn):
4729
    """Computes the list of nodes and their attributes.
4730

4731
    """
4732
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4733
    volumes = self.rpc.call_node_volumes(nodenames)
4734

    
4735
    ilist = self.cfg.GetAllInstancesInfo()
4736
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4737

    
4738
    output = []
4739
    for node in nodenames:
4740
      nresult = volumes[node]
4741
      if nresult.offline:
4742
        continue
4743
      msg = nresult.fail_msg
4744
      if msg:
4745
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4746
        continue
4747

    
4748
      node_vols = sorted(nresult.payload,
4749
                         key=operator.itemgetter("dev"))
4750

    
4751
      for vol in node_vols:
4752
        node_output = []
4753
        for field in self.op.output_fields:
4754
          if field == "node":
4755
            val = node
4756
          elif field == "phys":
4757
            val = vol["dev"]
4758
          elif field == "vg":
4759
            val = vol["vg"]
4760
          elif field == "name":
4761
            val = vol["name"]
4762
          elif field == "size":
4763
            val = int(float(vol["size"]))
4764
          elif field == "instance":
4765
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4766
          else:
4767
            raise errors.ParameterError(field)
4768
          node_output.append(str(val))
4769

    
4770
        output.append(node_output)
4771

    
4772
    return output
4773

    
4774

    
4775
class LUNodeQueryStorage(NoHooksLU):
4776
  """Logical unit for getting information on storage units on node(s).
4777

4778
  """
4779
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4780
  REQ_BGL = False
4781

    
4782
  def CheckArguments(self):
4783
    _CheckOutputFields(static=self._FIELDS_STATIC,
4784
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4785
                       selected=self.op.output_fields)
4786

    
4787
  def ExpandNames(self):
4788
    self.share_locks = _ShareAll()
4789
    self.needed_locks = {}
4790

    
4791
    if self.op.nodes:
4792
      self.needed_locks[locking.LEVEL_NODE] = \
4793
        _GetWantedNodes(self, self.op.nodes)
4794
    else:
4795
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4796

    
4797
  def Exec(self, feedback_fn):
4798
    """Computes the list of nodes and their attributes.
4799

4800
    """
4801
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4802

    
4803
    # Always get name to sort by
4804
    if constants.SF_NAME in self.op.output_fields:
4805
      fields = self.op.output_fields[:]
4806
    else:
4807
      fields = [constants.SF_NAME] + self.op.output_fields
4808

    
4809
    # Never ask for node or type as it's only known to the LU
4810
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4811
      while extra in fields:
4812
        fields.remove(extra)
4813

    
4814
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4815
    name_idx = field_idx[constants.SF_NAME]
4816

    
4817
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4818
    data = self.rpc.call_storage_list(self.nodes,
4819
                                      self.op.storage_type, st_args,
4820
                                      self.op.name, fields)
4821

    
4822
    result = []
4823

    
4824
    for node in utils.NiceSort(self.nodes):
4825
      nresult = data[node]
4826
      if nresult.offline:
4827
        continue
4828

    
4829
      msg = nresult.fail_msg
4830
      if msg:
4831
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4832
        continue
4833

    
4834
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4835

    
4836
      for name in utils.NiceSort(rows.keys()):
4837
        row = rows[name]
4838

    
4839
        out = []
4840

    
4841
        for field in self.op.output_fields:
4842
          if field == constants.SF_NODE:
4843
            val = node
4844
          elif field == constants.SF_TYPE:
4845
            val = self.op.storage_type
4846
          elif field in field_idx:
4847
            val = row[field_idx[field]]
4848
          else:
4849
            raise errors.ParameterError(field)
4850

    
4851
          out.append(val)
4852

    
4853
        result.append(out)
4854

    
4855
    return result
4856

    
4857

    
4858
class _InstanceQuery(_QueryBase):
4859
  FIELDS = query.INSTANCE_FIELDS
4860

    
4861
  def ExpandNames(self, lu):
4862
    lu.needed_locks = {}
4863
    lu.share_locks = _ShareAll()
4864

    
4865
    if self.names:
4866
      self.wanted = _GetWantedInstances(lu, self.names)
4867
    else:
4868
      self.wanted = locking.ALL_SET
4869

    
4870
    self.do_locking = (self.use_locking and
4871
                       query.IQ_LIVE in self.requested_data)
4872
    if self.do_locking:
4873
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4874
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4875
      lu.needed_locks[locking.LEVEL_NODE] = []
4876
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4877

    
4878
    self.do_grouplocks = (self.do_locking and
4879
                          query.IQ_NODES in self.requested_data)
4880

    
4881
  def DeclareLocks(self, lu, level):
4882
    if self.do_locking:
4883
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4884
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4885

    
4886
        # Lock all groups used by instances optimistically; this requires going
4887
        # via the node before it's locked, requiring verification later on
4888
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4889
          set(group_uuid
4890
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4891
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4892
      elif level == locking.LEVEL_NODE:
4893
        lu._LockInstancesNodes() # pylint: disable=W0212
4894

    
4895
  @staticmethod
4896
  def _CheckGroupLocks(lu):
4897
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4898
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4899

    
4900
    # Check if node groups for locked instances are still correct
4901
    for instance_name in owned_instances:
4902
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4903

    
4904
  def _GetQueryData(self, lu):
4905
    """Computes the list of instances and their attributes.
4906

4907
    """
4908
    if self.do_grouplocks:
4909
      self._CheckGroupLocks(lu)
4910

    
4911
    cluster = lu.cfg.GetClusterInfo()
4912
    all_info = lu.cfg.GetAllInstancesInfo()
4913

    
4914
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4915

    
4916
    instance_list = [all_info[name] for name in instance_names]
4917
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4918
                                        for inst in instance_list)))
4919
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4920
    bad_nodes = []
4921
    offline_nodes = []
4922
    wrongnode_inst = set()
4923

    
4924
    # Gather data as requested
4925
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4926
      live_data = {}
4927
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4928
      for name in nodes:
4929
        result = node_data[name]
4930
        if result.offline:
4931
          # offline nodes will be in both lists
4932
          assert result.fail_msg
4933
          offline_nodes.append(name)
4934
        if result.fail_msg:
4935
          bad_nodes.append(name)
4936
        elif result.payload:
4937
          for inst in result.payload:
4938
            if inst in all_info:
4939
              if all_info[inst].primary_node == name:
4940
                live_data.update(result.payload)
4941
              else:
4942
                wrongnode_inst.add(inst)
4943
            else:
4944
              # orphan instance; we don't list it here as we don't
4945
              # handle this case yet in the output of instance listing
4946
              logging.warning("Orphan instance '%s' found on node %s",
4947
                              inst, name)
4948
        # else no instance is alive
4949
    else:
4950
      live_data = {}
4951

    
4952
    if query.IQ_DISKUSAGE in self.requested_data:
4953
      disk_usage = dict((inst.name,
4954
                         _ComputeDiskSize(inst.disk_template,
4955
                                          [{constants.IDISK_SIZE: disk.size}
4956
                                           for disk in inst.disks]))
4957
                        for inst in instance_list)
4958
    else:
4959
      disk_usage = None
4960

    
4961
    if query.IQ_CONSOLE in self.requested_data:
4962
      consinfo = {}
4963
      for inst in instance_list:
4964
        if inst.name in live_data:
4965
          # Instance is running
4966
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4967
        else:
4968
          consinfo[inst.name] = None
4969
      assert set(consinfo.keys()) == set(instance_names)
4970
    else:
4971
      consinfo = None
4972

    
4973
    if query.IQ_NODES in self.requested_data:
4974
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4975
                                            instance_list)))
4976
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4977
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4978
                    for uuid in set(map(operator.attrgetter("group"),
4979
                                        nodes.values())))
4980
    else:
4981
      nodes = None
4982
      groups = None
4983

    
4984
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4985
                                   disk_usage, offline_nodes, bad_nodes,
4986
                                   live_data, wrongnode_inst, consinfo,
4987
                                   nodes, groups)
4988

    
4989

    
4990
class LUQuery(NoHooksLU):
4991
  """Query for resources/items of a certain kind.
4992

4993
  """
4994
  # pylint: disable=W0142
4995
  REQ_BGL = False
4996

    
4997
  def CheckArguments(self):
4998
    qcls = _GetQueryImplementation(self.op.what)
4999

    
5000
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
5001

    
5002
  def ExpandNames(self):
5003
    self.impl.ExpandNames(self)
5004

    
5005
  def DeclareLocks(self, level):
5006
    self.impl.DeclareLocks(self, level)
5007

    
5008
  def Exec(self, feedback_fn):
5009
    return self.impl.NewStyleQuery(self)
5010

    
5011

    
5012
class LUQueryFields(NoHooksLU):
5013
  """Query for resources/items of a certain kind.
5014

5015
  """
5016
  # pylint: disable=W0142
5017
  REQ_BGL = False
5018

    
5019
  def CheckArguments(self):
5020
    self.qcls = _GetQueryImplementation(self.op.what)
5021

    
5022
  def ExpandNames(self):
5023
    self.needed_locks = {}
5024

    
5025
  def Exec(self, feedback_fn):
5026
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5027

    
5028

    
5029
class LUNodeModifyStorage(NoHooksLU):
5030
  """Logical unit for modifying a storage volume on a node.
5031

5032
  """
5033
  REQ_BGL = False
5034

    
5035
  def CheckArguments(self):
5036
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5037

    
5038
    storage_type = self.op.storage_type
5039

    
5040
    try:
5041
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5042
    except KeyError:
5043
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
5044
                                 " modified" % storage_type,
5045
                                 errors.ECODE_INVAL)
5046

    
5047
    diff = set(self.op.changes.keys()) - modifiable
5048
    if diff:
5049
      raise errors.OpPrereqError("The following fields can not be modified for"
5050
                                 " storage units of type '%s': %r" %
5051
                                 (storage_type, list(diff)),
5052
                                 errors.ECODE_INVAL)
5053

    
5054
  def ExpandNames(self):
5055
    self.needed_locks = {
5056
      locking.LEVEL_NODE: self.op.node_name,
5057
      }
5058

    
5059
  def Exec(self, feedback_fn):
5060
    """Computes the list of nodes and their attributes.
5061

5062
    """
5063
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5064
    result = self.rpc.call_storage_modify(self.op.node_name,
5065
                                          self.op.storage_type, st_args,
5066
                                          self.op.name, self.op.changes)
5067
    result.Raise("Failed to modify storage unit '%s' on %s" %
5068
                 (self.op.name, self.op.node_name))
5069

    
5070

    
5071
class LUNodeAdd(LogicalUnit):
5072
  """Logical unit for adding node to the cluster.
5073

5074
  """
5075
  HPATH = "node-add"
5076
  HTYPE = constants.HTYPE_NODE
5077
  _NFLAGS = ["master_capable", "vm_capable"]
5078

    
5079
  def CheckArguments(self):
5080
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5081
    # validate/normalize the node name
5082
    self.hostname = netutils.GetHostname(name=self.op.node_name,
5083
                                         family=self.primary_ip_family)
5084
    self.op.node_name = self.hostname.name
5085

    
5086
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5087
      raise errors.OpPrereqError("Cannot readd the master node",
5088
                                 errors.ECODE_STATE)
5089

    
5090
    if self.op.readd and self.op.group:
5091
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
5092
                                 " being readded", errors.ECODE_INVAL)
5093

    
5094
  def BuildHooksEnv(self):
5095
    """Build hooks env.
5096

5097
    This will run on all nodes before, and on all nodes + the new node after.
5098

5099
    """
5100
    return {
5101
      "OP_TARGET": self.op.node_name,
5102
      "NODE_NAME": self.op.node_name,
5103
      "NODE_PIP": self.op.primary_ip,
5104
      "NODE_SIP": self.op.secondary_ip,
5105
      "MASTER_CAPABLE": str(self.op.master_capable),
5106
      "VM_CAPABLE": str(self.op.vm_capable),
5107
      }
5108

    
5109
  def BuildHooksNodes(self):
5110
    """Build hooks nodes.
5111

5112
    """
5113
    # Exclude added node
5114
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5115
    post_nodes = pre_nodes + [self.op.node_name, ]
5116

    
5117
    return (pre_nodes, post_nodes)
5118

    
5119
  def CheckPrereq(self):
5120
    """Check prerequisites.
5121

5122
    This checks:
5123
     - the new node is not already in the config
5124
     - it is resolvable
5125
     - its parameters (single/dual homed) matches the cluster
5126

5127
    Any errors are signaled by raising errors.OpPrereqError.
5128

5129
    """
5130
    cfg = self.cfg
5131
    hostname = self.hostname
5132
    node = hostname.name
5133
    primary_ip = self.op.primary_ip = hostname.ip
5134
    if self.op.secondary_ip is None:
5135
      if self.primary_ip_family == netutils.IP6Address.family:
5136
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5137
                                   " IPv4 address must be given as secondary",
5138
                                   errors.ECODE_INVAL)
5139
      self.op.secondary_ip = primary_ip
5140

    
5141
    secondary_ip = self.op.secondary_ip
5142
    if not netutils.IP4Address.IsValid(secondary_ip):
5143
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5144
                                 " address" % secondary_ip, errors.ECODE_INVAL)
5145

    
5146
    node_list = cfg.GetNodeList()
5147
    if not self.op.readd and node in node_list:
5148
      raise errors.OpPrereqError("Node %s is already in the configuration" %
5149
                                 node, errors.ECODE_EXISTS)
5150
    elif self.op.readd and node not in node_list:
5151
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5152
                                 errors.ECODE_NOENT)
5153

    
5154
    self.changed_primary_ip = False
5155

    
5156
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5157
      if self.op.readd and node == existing_node_name:
5158
        if existing_node.secondary_ip != secondary_ip:
5159
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
5160
                                     " address configuration as before",
5161
                                     errors.ECODE_INVAL)
5162
        if existing_node.primary_ip != primary_ip:
5163
          self.changed_primary_ip = True
5164

    
5165
        continue
5166

    
5167
      if (existing_node.primary_ip == primary_ip or
5168
          existing_node.secondary_ip == primary_ip or
5169
          existing_node.primary_ip == secondary_ip or
5170
          existing_node.secondary_ip == secondary_ip):
5171
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5172
                                   " existing node %s" % existing_node.name,
5173
                                   errors.ECODE_NOTUNIQUE)
5174

    
5175
    # After this 'if' block, None is no longer a valid value for the
5176
    # _capable op attributes
5177
    if self.op.readd:
5178
      old_node = self.cfg.GetNodeInfo(node)
5179
      assert old_node is not None, "Can't retrieve locked node %s" % node
5180
      for attr in self._NFLAGS:
5181
        if getattr(self.op, attr) is None:
5182
          setattr(self.op, attr, getattr(old_node, attr))
5183
    else:
5184
      for attr in self._NFLAGS:
5185
        if getattr(self.op, attr) is None:
5186
          setattr(self.op, attr, True)
5187

    
5188
    if self.op.readd and not self.op.vm_capable:
5189
      pri, sec = cfg.GetNodeInstances(node)
5190
      if pri or sec:
5191
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5192
                                   " flag set to false, but it already holds"
5193
                                   " instances" % node,
5194
                                   errors.ECODE_STATE)
5195

    
5196
    # check that the type of the node (single versus dual homed) is the
5197
    # same as for the master
5198
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5199
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5200
    newbie_singlehomed = secondary_ip == primary_ip
5201
    if master_singlehomed != newbie_singlehomed:
5202
      if master_singlehomed:
5203
        raise errors.OpPrereqError("The master has no secondary ip but the"
5204
                                   " new node has one",
5205
                                   errors.ECODE_INVAL)
5206
      else:
5207
        raise errors.OpPrereqError("The master has a secondary ip but the"
5208
                                   " new node doesn't have one",
5209
                                   errors.ECODE_INVAL)
5210

    
5211
    # checks reachability
5212
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5213
      raise errors.OpPrereqError("Node not reachable by ping",
5214
                                 errors.ECODE_ENVIRON)
5215

    
5216
    if not newbie_singlehomed:
5217
      # check reachability from my secondary ip to newbie's secondary ip
5218
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5219
                           source=myself.secondary_ip):
5220
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5221
                                   " based ping to node daemon port",
5222
                                   errors.ECODE_ENVIRON)
5223

    
5224
    if self.op.readd:
5225
      exceptions = [node]
5226
    else:
5227
      exceptions = []
5228

    
5229
    if self.op.master_capable:
5230
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5231
    else:
5232
      self.master_candidate = False
5233

    
5234
    if self.op.readd:
5235
      self.new_node = old_node
5236
    else:
5237
      node_group = cfg.LookupNodeGroup(self.op.group)
5238
      self.new_node = objects.Node(name=node,
5239
                                   primary_ip=primary_ip,
5240
                                   secondary_ip=secondary_ip,
5241
                                   master_candidate=self.master_candidate,
5242
                                   offline=False, drained=False,
5243
                                   group=node_group)
5244

    
5245
    if self.op.ndparams:
5246
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5247

    
5248
  def Exec(self, feedback_fn):
5249
    """Adds the new node to the cluster.
5250

5251
    """
5252
    new_node = self.new_node
5253
    node = new_node.name
5254

    
5255
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5256
      "Not owning BGL"
5257

    
5258
    # We adding a new node so we assume it's powered
5259
    new_node.powered = True
5260

    
5261
    # for re-adds, reset the offline/drained/master-candidate flags;
5262
    # we need to reset here, otherwise offline would prevent RPC calls
5263
    # later in the procedure; this also means that if the re-add
5264
    # fails, we are left with a non-offlined, broken node
5265
    if self.op.readd:
5266
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5267
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5268
      # if we demote the node, we do cleanup later in the procedure
5269
      new_node.master_candidate = self.master_candidate
5270
      if self.changed_primary_ip:
5271
        new_node.primary_ip = self.op.primary_ip
5272

    
5273
    # copy the master/vm_capable flags
5274
    for attr in self._NFLAGS:
5275
      setattr(new_node, attr, getattr(self.op, attr))
5276

    
5277
    # notify the user about any possible mc promotion
5278
    if new_node.master_candidate:
5279
      self.LogInfo("Node will be a master candidate")
5280

    
5281
    if self.op.ndparams:
5282
      new_node.ndparams = self.op.ndparams
5283
    else:
5284
      new_node.ndparams = {}
5285

    
5286
    # check connectivity
5287
    result = self.rpc.call_version([node])[node]
5288
    result.Raise("Can't get version information from node %s" % node)
5289
    if constants.PROTOCOL_VERSION == result.payload:
5290
      logging.info("Communication to node %s fine, sw version %s match",
5291
                   node, result.payload)
5292
    else:
5293
      raise errors.OpExecError("Version mismatch master version %s,"
5294
                               " node version %s" %
5295
                               (constants.PROTOCOL_VERSION, result.payload))
5296

    
5297
    # Add node to our /etc/hosts, and add key to known_hosts
5298
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5299
      master_node = self.cfg.GetMasterNode()
5300
      result = self.rpc.call_etc_hosts_modify(master_node,
5301
                                              constants.ETC_HOSTS_ADD,
5302
                                              self.hostname.name,
5303
                                              self.hostname.ip)
5304
      result.Raise("Can't update hosts file with new host data")
5305

    
5306
    if new_node.secondary_ip != new_node.primary_ip:
5307
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5308
                               False)
5309

    
5310
    node_verify_list = [self.cfg.GetMasterNode()]
5311
    node_verify_param = {
5312
      constants.NV_NODELIST: ([node], {}),
5313
      # TODO: do a node-net-test as well?
5314
    }
5315

    
5316
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5317
                                       self.cfg.GetClusterName())
5318
    for verifier in node_verify_list:
5319
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5320
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5321
      if nl_payload:
5322
        for failed in nl_payload:
5323
          feedback_fn("ssh/hostname verification failed"
5324
                      " (checking from %s): %s" %
5325
                      (verifier, nl_payload[failed]))
5326
        raise errors.OpExecError("ssh/hostname verification failed")
5327

    
5328
    if self.op.readd:
5329
      _RedistributeAncillaryFiles(self)
5330
      self.context.ReaddNode(new_node)
5331
      # make sure we redistribute the config
5332
      self.cfg.Update(new_node, feedback_fn)
5333
      # and make sure the new node will not have old files around
5334
      if not new_node.master_candidate:
5335
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5336
        msg = result.fail_msg
5337
        if msg:
5338
          self.LogWarning("Node failed to demote itself from master"
5339
                          " candidate status: %s" % msg)
5340
    else:
5341
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5342
                                  additional_vm=self.op.vm_capable)
5343
      self.context.AddNode(new_node, self.proc.GetECId())
5344

    
5345

    
5346
class LUNodeSetParams(LogicalUnit):
5347
  """Modifies the parameters of a node.
5348

5349
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5350
      to the node role (as _ROLE_*)
5351
  @cvar _R2F: a dictionary from node role to tuples of flags
5352
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5353

5354
  """
5355
  HPATH = "node-modify"
5356
  HTYPE = constants.HTYPE_NODE
5357
  REQ_BGL = False
5358
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5359
  _F2R = {
5360
    (True, False, False): _ROLE_CANDIDATE,
5361
    (False, True, False): _ROLE_DRAINED,
5362
    (False, False, True): _ROLE_OFFLINE,
5363
    (False, False, False): _ROLE_REGULAR,
5364
    }
5365
  _R2F = dict((v, k) for k, v in _F2R.items())
5366
  _FLAGS = ["master_candidate", "drained", "offline"]
5367

    
5368
  def CheckArguments(self):
5369
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5370
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5371
                self.op.master_capable, self.op.vm_capable,
5372
                self.op.secondary_ip, self.op.ndparams]
5373
    if all_mods.count(None) == len(all_mods):
5374
      raise errors.OpPrereqError("Please pass at least one modification",
5375
                                 errors.ECODE_INVAL)
5376
    if all_mods.count(True) > 1:
5377
      raise errors.OpPrereqError("Can't set the node into more than one"
5378
                                 " state at the same time",
5379
                                 errors.ECODE_INVAL)
5380

    
5381
    # Boolean value that tells us whether we might be demoting from MC
5382
    self.might_demote = (self.op.master_candidate == False or
5383
                         self.op.offline == True or
5384
                         self.op.drained == True or
5385
                         self.op.master_capable == False)
5386

    
5387
    if self.op.secondary_ip:
5388
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5389
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5390
                                   " address" % self.op.secondary_ip,
5391
                                   errors.ECODE_INVAL)
5392

    
5393
    self.lock_all = self.op.auto_promote and self.might_demote
5394
    self.lock_instances = self.op.secondary_ip is not None
5395

    
5396
  def _InstanceFilter(self, instance):
5397
    """Filter for getting affected instances.
5398

5399
    """
5400
    return (instance.disk_template in constants.DTS_INT_MIRROR and
5401
            self.op.node_name in instance.all_nodes)
5402

    
5403
  def ExpandNames(self):
5404
    if self.lock_all:
5405
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5406
    else:
5407
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5408

    
5409
    # Since modifying a node can have severe effects on currently running
5410
    # operations the resource lock is at least acquired in shared mode
5411
    self.needed_locks[locking.LEVEL_NODE_RES] = \
5412
      self.needed_locks[locking.LEVEL_NODE]
5413

    
5414
    # Get node resource and instance locks in shared mode; they are not used
5415
    # for anything but read-only access
5416
    self.share_locks[locking.LEVEL_NODE_RES] = 1
5417
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5418

    
5419
    if self.lock_instances:
5420
      self.needed_locks[locking.LEVEL_INSTANCE] = \
5421
        frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5422

    
5423
  def BuildHooksEnv(self):
5424
    """Build hooks env.
5425

5426
    This runs on the master node.
5427

5428
    """
5429
    return {
5430
      "OP_TARGET": self.op.node_name,
5431
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5432
      "OFFLINE": str(self.op.offline),
5433
      "DRAINED": str(self.op.drained),
5434
      "MASTER_CAPABLE": str(self.op.master_capable),
5435
      "VM_CAPABLE": str(self.op.vm_capable),
5436
      }
5437

    
5438
  def BuildHooksNodes(self):
5439
    """Build hooks nodes.
5440

5441
    """
5442
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5443
    return (nl, nl)
5444

    
5445
  def CheckPrereq(self):
5446
    """Check prerequisites.
5447

5448
    This only checks the instance list against the existing names.
5449

5450
    """
5451
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5452

    
5453
    if self.lock_instances:
5454
      affected_instances = \
5455
        self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5456

    
5457
      # Verify instance locks
5458
      owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5459
      wanted_instances = frozenset(affected_instances.keys())
5460
      if wanted_instances - owned_instances:
5461
        raise errors.OpPrereqError("Instances affected by changing node %s's"
5462
                                   " secondary IP address have changed since"
5463
                                   " locks were acquired, wanted '%s', have"
5464
                                   " '%s'; retry the operation" %
5465
                                   (self.op.node_name,
5466
                                    utils.CommaJoin(wanted_instances),
5467
                                    utils.CommaJoin(owned_instances)),
5468
                                   errors.ECODE_STATE)
5469
    else:
5470
      affected_instances = None
5471

    
5472
    if (self.op.master_candidate is not None or
5473
        self.op.drained is not None or
5474
        self.op.offline is not None):
5475
      # we can't change the master's node flags
5476
      if self.op.node_name == self.cfg.GetMasterNode():
5477
        raise errors.OpPrereqError("The master role can be changed"
5478
                                   " only via master-failover",
5479
                                   errors.ECODE_INVAL)
5480

    
5481
    if self.op.master_candidate and not node.master_capable:
5482
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5483
                                 " it a master candidate" % node.name,
5484
                                 errors.ECODE_STATE)
5485

    
5486
    if self.op.vm_capable == False:
5487
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5488
      if ipri or isec:
5489
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5490
                                   " the vm_capable flag" % node.name,
5491
                                   errors.ECODE_STATE)
5492

    
5493
    if node.master_candidate and self.might_demote and not self.lock_all:
5494
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5495
      # check if after removing the current node, we're missing master
5496
      # candidates
5497
      (mc_remaining, mc_should, _) = \
5498
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5499
      if mc_remaining < mc_should:
5500
        raise errors.OpPrereqError("Not enough master candidates, please"
5501
                                   " pass auto promote option to allow"
5502
                                   " promotion", errors.ECODE_STATE)
5503

    
5504
    self.old_flags = old_flags = (node.master_candidate,
5505
                                  node.drained, node.offline)
5506
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5507
    self.old_role = old_role = self._F2R[old_flags]
5508

    
5509
    # Check for ineffective changes
5510
    for attr in self._FLAGS:
5511
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5512
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5513
        setattr(self.op, attr, None)
5514

    
5515
    # Past this point, any flag change to False means a transition
5516
    # away from the respective state, as only real changes are kept
5517

    
5518
    # TODO: We might query the real power state if it supports OOB
5519
    if _SupportsOob(self.cfg, node):
5520
      if self.op.offline is False and not (node.powered or
5521
                                           self.op.powered == True):
5522
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5523
                                    " offline status can be reset") %
5524
                                   self.op.node_name)
5525
    elif self.op.powered is not None:
5526
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5527
                                  " as it does not support out-of-band"
5528
                                  " handling") % self.op.node_name)
5529

    
5530
    # If we're being deofflined/drained, we'll MC ourself if needed
5531
    if (self.op.drained == False or self.op.offline == False or
5532
        (self.op.master_capable and not node.master_capable)):
5533
      if _DecideSelfPromotion(self):
5534
        self.op.master_candidate = True
5535
        self.LogInfo("Auto-promoting node to master candidate")
5536

    
5537
    # If we're no longer master capable, we'll demote ourselves from MC
5538
    if self.op.master_capable == False and node.master_candidate:
5539
      self.LogInfo("Demoting from master candidate")
5540
      self.op.master_candidate = False
5541

    
5542
    # Compute new role
5543
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5544
    if self.op.master_candidate:
5545
      new_role = self._ROLE_CANDIDATE
5546
    elif self.op.drained:
5547
      new_role = self._ROLE_DRAINED
5548
    elif self.op.offline:
5549
      new_role = self._ROLE_OFFLINE
5550
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5551
      # False is still in new flags, which means we're un-setting (the
5552
      # only) True flag
5553
      new_role = self._ROLE_REGULAR
5554
    else: # no new flags, nothing, keep old role
5555
      new_role = old_role
5556

    
5557
    self.new_role = new_role
5558

    
5559
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5560
      # Trying to transition out of offline status
5561
      # TODO: Use standard RPC runner, but make sure it works when the node is
5562
      # still marked offline
5563
      result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5564
      if result.fail_msg:
5565
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5566
                                   " to report its version: %s" %
5567
                                   (node.name, result.fail_msg),
5568
                                   errors.ECODE_STATE)
5569
      else:
5570
        self.LogWarning("Transitioning node from offline to online state"
5571
                        " without using re-add. Please make sure the node"
5572
                        " is healthy!")
5573

    
5574
    if self.op.secondary_ip:
5575
      # Ok even without locking, because this can't be changed by any LU
5576
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5577
      master_singlehomed = master.secondary_ip == master.primary_ip
5578
      if master_singlehomed and self.op.secondary_ip:
5579
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5580
                                   " homed cluster", errors.ECODE_INVAL)
5581

    
5582
      assert not (frozenset(affected_instances) -
5583
                  self.owned_locks(locking.LEVEL_INSTANCE))
5584

    
5585
      if node.offline:
5586
        if affected_instances:
5587
          raise errors.OpPrereqError("Cannot change secondary IP address:"
5588
                                     " offline node has instances (%s)"
5589
                                     " configured to use it" %
5590
                                     utils.CommaJoin(affected_instances.keys()))
5591
      else:
5592
        # On online nodes, check that no instances are running, and that
5593
        # the node has the new ip and we can reach it.
5594
        for instance in affected_instances.values():
5595
          _CheckInstanceState(self, instance, INSTANCE_DOWN,
5596
                              msg="cannot change secondary ip")
5597

    
5598
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5599
        if master.name != node.name:
5600
          # check reachability from master secondary ip to new secondary ip
5601
          if not netutils.TcpPing(self.op.secondary_ip,
5602
                                  constants.DEFAULT_NODED_PORT,
5603
                                  source=master.secondary_ip):
5604
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5605
                                       " based ping to node daemon port",
5606
                                       errors.ECODE_ENVIRON)
5607

    
5608
    if self.op.ndparams:
5609
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5610
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5611
      self.new_ndparams = new_ndparams
5612

    
5613
  def Exec(self, feedback_fn):
5614
    """Modifies a node.
5615

5616
    """
5617
    node = self.node
5618
    old_role = self.old_role
5619
    new_role = self.new_role
5620

    
5621
    result = []
5622

    
5623
    if self.op.ndparams:
5624
      node.ndparams = self.new_ndparams
5625

    
5626
    if self.op.powered is not None:
5627
      node.powered = self.op.powered
5628

    
5629
    for attr in ["master_capable", "vm_capable"]:
5630
      val = getattr(self.op, attr)
5631
      if val is not None:
5632
        setattr(node, attr, val)
5633
        result.append((attr, str(val)))
5634

    
5635
    if new_role != old_role:
5636
      # Tell the node to demote itself, if no longer MC and not offline
5637
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5638
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5639
        if msg:
5640
          self.LogWarning("Node failed to demote itself: %s", msg)
5641

    
5642
      new_flags = self._R2F[new_role]
5643
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5644
        if of != nf:
5645
          result.append((desc, str(nf)))
5646
      (node.master_candidate, node.drained, node.offline) = new_flags
5647

    
5648
      # we locked all nodes, we adjust the CP before updating this node
5649
      if self.lock_all:
5650
        _AdjustCandidatePool(self, [node.name])
5651

    
5652
    if self.op.secondary_ip:
5653
      node.secondary_ip = self.op.secondary_ip
5654
      result.append(("secondary_ip", self.op.secondary_ip))
5655

    
5656
    # this will trigger configuration file update, if needed
5657
    self.cfg.Update(node, feedback_fn)
5658

    
5659
    # this will trigger job queue propagation or cleanup if the mc
5660
    # flag changed
5661
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5662
      self.context.ReaddNode(node)
5663

    
5664
    return result
5665

    
5666

    
5667
class LUNodePowercycle(NoHooksLU):
5668
  """Powercycles a node.
5669

5670
  """
5671
  REQ_BGL = False
5672

    
5673
  def CheckArguments(self):
5674
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5675
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5676
      raise errors.OpPrereqError("The node is the master and the force"
5677
                                 " parameter was not set",
5678
                                 errors.ECODE_INVAL)
5679

    
5680
  def ExpandNames(self):
5681
    """Locking for PowercycleNode.
5682

5683
    This is a last-resort option and shouldn't block on other
5684
    jobs. Therefore, we grab no locks.
5685

5686
    """
5687
    self.needed_locks = {}
5688

    
5689
  def Exec(self, feedback_fn):
5690
    """Reboots a node.
5691

5692
    """
5693
    result = self.rpc.call_node_powercycle(self.op.node_name,
5694
                                           self.cfg.GetHypervisorType())
5695
    result.Raise("Failed to schedule the reboot")
5696
    return result.payload
5697

    
5698

    
5699
class LUClusterQuery(NoHooksLU):
5700
  """Query cluster configuration.
5701

5702
  """
5703
  REQ_BGL = False
5704

    
5705
  def ExpandNames(self):
5706
    self.needed_locks = {}
5707

    
5708
  def Exec(self, feedback_fn):
5709
    """Return cluster config.
5710

5711
    """
5712
    cluster = self.cfg.GetClusterInfo()
5713
    os_hvp = {}
5714

    
5715
    # Filter just for enabled hypervisors
5716
    for os_name, hv_dict in cluster.os_hvp.items():
5717
      os_hvp[os_name] = {}
5718
      for hv_name, hv_params in hv_dict.items():
5719
        if hv_name in cluster.enabled_hypervisors:
5720
          os_hvp[os_name][hv_name] = hv_params
5721

    
5722
    # Convert ip_family to ip_version
5723
    primary_ip_version = constants.IP4_VERSION
5724
    if cluster.primary_ip_family == netutils.IP6Address.family:
5725
      primary_ip_version = constants.IP6_VERSION
5726

    
5727
    result = {
5728
      "software_version": constants.RELEASE_VERSION,
5729
      "protocol_version": constants.PROTOCOL_VERSION,
5730
      "config_version": constants.CONFIG_VERSION,
5731
      "os_api_version": max(constants.OS_API_VERSIONS),
5732
      "export_version": constants.EXPORT_VERSION,
5733
      "architecture": (platform.architecture()[0], platform.machine()),
5734
      "name": cluster.cluster_name,
5735
      "master": cluster.master_node,
5736
      "default_hypervisor": cluster.primary_hypervisor,
5737
      "enabled_hypervisors": cluster.enabled_hypervisors,
5738
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5739
                        for hypervisor_name in cluster.enabled_hypervisors]),
5740
      "os_hvp": os_hvp,
5741
      "beparams": cluster.beparams,
5742
      "osparams": cluster.osparams,
5743
      "nicparams": cluster.nicparams,
5744
      "ndparams": cluster.ndparams,
5745
      "candidate_pool_size": cluster.candidate_pool_size,
5746
      "master_netdev": cluster.master_netdev,
5747
      "master_netmask": cluster.master_netmask,
5748
      "use_external_mip_script": cluster.use_external_mip_script,
5749
      "volume_group_name": cluster.volume_group_name,
5750
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5751
      "file_storage_dir": cluster.file_storage_dir,
5752
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5753
      "maintain_node_health": cluster.maintain_node_health,
5754
      "ctime": cluster.ctime,
5755
      "mtime": cluster.mtime,
5756
      "uuid": cluster.uuid,
5757
      "tags": list(cluster.GetTags()),
5758
      "uid_pool": cluster.uid_pool,
5759
      "default_iallocator": cluster.default_iallocator,
5760
      "reserved_lvs": cluster.reserved_lvs,
5761
      "primary_ip_version": primary_ip_version,
5762
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5763
      "hidden_os": cluster.hidden_os,
5764
      "blacklisted_os": cluster.blacklisted_os,
5765
      }
5766

    
5767
    return result
5768

    
5769

    
5770
class LUClusterConfigQuery(NoHooksLU):
5771
  """Return configuration values.
5772

5773
  """
5774
  REQ_BGL = False
5775
  _FIELDS_DYNAMIC = utils.FieldSet()
5776
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5777
                                  "watcher_pause", "volume_group_name")
5778

    
5779
  def CheckArguments(self):
5780
    _CheckOutputFields(static=self._FIELDS_STATIC,
5781
                       dynamic=self._FIELDS_DYNAMIC,
5782
                       selected=self.op.output_fields)
5783

    
5784
  def ExpandNames(self):
5785
    self.needed_locks = {}
5786

    
5787
  def Exec(self, feedback_fn):
5788
    """Dump a representation of the cluster config to the standard output.
5789

5790
    """
5791
    values = []
5792
    for field in self.op.output_fields:
5793
      if field == "cluster_name":
5794
        entry = self.cfg.GetClusterName()
5795
      elif field == "master_node":
5796
        entry = self.cfg.GetMasterNode()
5797
      elif field == "drain_flag":
5798
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5799
      elif field == "watcher_pause":
5800
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5801
      elif field == "volume_group_name":
5802
        entry = self.cfg.GetVGName()
5803
      else:
5804
        raise errors.ParameterError(field)
5805
      values.append(entry)
5806
    return values
5807

    
5808

    
5809
class LUInstanceActivateDisks(NoHooksLU):
5810
  """Bring up an instance's disks.
5811

5812
  """
5813
  REQ_BGL = False
5814

    
5815
  def ExpandNames(self):
5816
    self._ExpandAndLockInstance()
5817
    self.needed_locks[locking.LEVEL_NODE] = []
5818
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5819

    
5820
  def DeclareLocks(self, level):
5821
    if level == locking.LEVEL_NODE:
5822
      self._LockInstancesNodes()
5823

    
5824
  def CheckPrereq(self):
5825
    """Check prerequisites.
5826

5827
    This checks that the instance is in the cluster.
5828

5829
    """
5830
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5831
    assert self.instance is not None, \
5832
      "Cannot retrieve locked instance %s" % self.op.instance_name
5833
    _CheckNodeOnline(self, self.instance.primary_node)
5834

    
5835
  def Exec(self, feedback_fn):
5836
    """Activate the disks.
5837

5838
    """
5839
    disks_ok, disks_info = \
5840
              _AssembleInstanceDisks(self, self.instance,
5841
                                     ignore_size=self.op.ignore_size)
5842
    if not disks_ok:
5843
      raise errors.OpExecError("Cannot activate block devices")
5844

    
5845
    return disks_info
5846

    
5847

    
5848
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5849
                           ignore_size=False):
5850
  """Prepare the block devices for an instance.
5851

5852
  This sets up the block devices on all nodes.
5853

5854
  @type lu: L{LogicalUnit}
5855
  @param lu: the logical unit on whose behalf we execute
5856
  @type instance: L{objects.Instance}
5857
  @param instance: the instance for whose disks we assemble
5858
  @type disks: list of L{objects.Disk} or None
5859
  @param disks: which disks to assemble (or all, if None)
5860
  @type ignore_secondaries: boolean
5861
  @param ignore_secondaries: if true, errors on secondary nodes
5862
      won't result in an error return from the function
5863
  @type ignore_size: boolean
5864
  @param ignore_size: if true, the current known size of the disk
5865
      will not be used during the disk activation, useful for cases
5866
      when the size is wrong
5867
  @return: False if the operation failed, otherwise a list of
5868
      (host, instance_visible_name, node_visible_name)
5869
      with the mapping from node devices to instance devices
5870

5871
  """
5872
  device_info = []
5873
  disks_ok = True
5874
  iname = instance.name
5875
  disks = _ExpandCheckDisks(instance, disks)
5876

    
5877
  # With the two passes mechanism we try to reduce the window of
5878
  # opportunity for the race condition of switching DRBD to primary
5879
  # before handshaking occured, but we do not eliminate it
5880

    
5881
  # The proper fix would be to wait (with some limits) until the
5882
  # connection has been made and drbd transitions from WFConnection
5883
  # into any other network-connected state (Connected, SyncTarget,
5884
  # SyncSource, etc.)
5885

    
5886
  # 1st pass, assemble on all nodes in secondary mode
5887
  for idx, inst_disk in enumerate(disks):
5888
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5889
      if ignore_size:
5890
        node_disk = node_disk.Copy()
5891
        node_disk.UnsetSize()
5892
      lu.cfg.SetDiskID(node_disk, node)
5893
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5894
      msg = result.fail_msg
5895
      if msg:
5896
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5897
                           " (is_primary=False, pass=1): %s",
5898
                           inst_disk.iv_name, node, msg)
5899
        if not ignore_secondaries:
5900
          disks_ok = False
5901

    
5902
  # FIXME: race condition on drbd migration to primary
5903

    
5904
  # 2nd pass, do only the primary node
5905
  for idx, inst_disk in enumerate(disks):
5906
    dev_path = None
5907

    
5908
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5909
      if node != instance.primary_node:
5910
        continue
5911
      if ignore_size:
5912
        node_disk = node_disk.Copy()
5913
        node_disk.UnsetSize()
5914
      lu.cfg.SetDiskID(node_disk, node)
5915
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5916
      msg = result.fail_msg
5917
      if msg:
5918
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5919
                           " (is_primary=True, pass=2): %s",
5920
                           inst_disk.iv_name, node, msg)
5921
        disks_ok = False
5922
      else:
5923
        dev_path = result.payload
5924

    
5925
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5926

    
5927
  # leave the disks configured for the primary node
5928
  # this is a workaround that would be fixed better by
5929
  # improving the logical/physical id handling
5930
  for disk in disks:
5931
    lu.cfg.SetDiskID(disk, instance.primary_node)
5932

    
5933
  return disks_ok, device_info
5934

    
5935

    
5936
def _StartInstanceDisks(lu, instance, force):
5937
  """Start the disks of an instance.
5938

5939
  """
5940
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5941
                                           ignore_secondaries=force)
5942
  if not disks_ok:
5943
    _ShutdownInstanceDisks(lu, instance)
5944
    if force is not None and not force:
5945
      lu.proc.LogWarning("", hint="If the message above refers to a"
5946
                         " secondary node,"
5947
                         " you can retry the operation using '--force'.")
5948
    raise errors.OpExecError("Disk consistency error")
5949

    
5950

    
5951
class LUInstanceDeactivateDisks(NoHooksLU):
5952
  """Shutdown an instance's disks.
5953

5954
  """
5955
  REQ_BGL = False
5956

    
5957
  def ExpandNames(self):
5958
    self._ExpandAndLockInstance()
5959
    self.needed_locks[locking.LEVEL_NODE] = []
5960
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5961

    
5962
  def DeclareLocks(self, level):
5963
    if level == locking.LEVEL_NODE:
5964
      self._LockInstancesNodes()
5965

    
5966
  def CheckPrereq(self):
5967
    """Check prerequisites.
5968

5969
    This checks that the instance is in the cluster.
5970

5971
    """
5972
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5973
    assert self.instance is not None, \
5974
      "Cannot retrieve locked instance %s" % self.op.instance_name
5975

    
5976
  def Exec(self, feedback_fn):
5977
    """Deactivate the disks
5978

5979
    """
5980
    instance = self.instance
5981
    if self.op.force:
5982
      _ShutdownInstanceDisks(self, instance)
5983
    else:
5984
      _SafeShutdownInstanceDisks(self, instance)
5985

    
5986

    
5987
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5988
  """Shutdown block devices of an instance.
5989

5990
  This function checks if an instance is running, before calling
5991
  _ShutdownInstanceDisks.
5992

5993
  """
5994
  _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
5995
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5996

    
5997

    
5998
def _ExpandCheckDisks(instance, disks):
5999
  """Return the instance disks selected by the disks list
6000

6001
  @type disks: list of L{objects.Disk} or None
6002
  @param disks: selected disks
6003
  @rtype: list of L{objects.Disk}
6004
  @return: selected instance disks to act on
6005

6006
  """
6007
  if disks is None:
6008
    return instance.disks
6009
  else:
6010
    if not set(disks).issubset(instance.disks):
6011
      raise errors.ProgrammerError("Can only act on disks belonging to the"
6012
                                   " target instance")
6013
    return disks
6014

    
6015

    
6016
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
6017
  """Shutdown block devices of an instance.
6018

6019
  This does the shutdown on all nodes of the instance.
6020

6021
  If the ignore_primary is false, errors on the primary node are
6022
  ignored.
6023

6024
  """
6025
  all_result = True
6026
  disks = _ExpandCheckDisks(instance, disks)
6027

    
6028
  for disk in disks:
6029
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6030
      lu.cfg.SetDiskID(top_disk, node)
6031
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6032
      msg = result.fail_msg
6033
      if msg:
6034
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6035
                      disk.iv_name, node, msg)
6036
        if ((node == instance.primary_node and not ignore_primary) or
6037
            (node != instance.primary_node and not result.offline)):
6038
          all_result = False
6039
  return all_result
6040

    
6041

    
6042
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6043
  """Checks if a node has enough free memory.
6044

6045
  This function check if a given node has the needed amount of free
6046
  memory. In case the node has less memory or we cannot get the
6047
  information from the node, this function raise an OpPrereqError
6048
  exception.
6049

6050
  @type lu: C{LogicalUnit}
6051
  @param lu: a logical unit from which we get configuration data
6052
  @type node: C{str}
6053
  @param node: the node to check
6054
  @type reason: C{str}
6055
  @param reason: string to use in the error message
6056
  @type requested: C{int}
6057
  @param requested: the amount of memory in MiB to check for
6058
  @type hypervisor_name: C{str}
6059
  @param hypervisor_name: the hypervisor to ask for memory stats
6060
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6061
      we cannot check the node
6062

6063
  """
6064
  nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6065
  nodeinfo[node].Raise("Can't get data from node %s" % node,
6066
                       prereq=True, ecode=errors.ECODE_ENVIRON)
6067
  (_, _, (hv_info, )) = nodeinfo[node].payload
6068

    
6069
  free_mem = hv_info.get("memory_free", None)
6070
  if not isinstance(free_mem, int):
6071
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6072
                               " was '%s'" % (node, free_mem),
6073
                               errors.ECODE_ENVIRON)
6074
  if requested > free_mem:
6075
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6076
                               " needed %s MiB, available %s MiB" %
6077
                               (node, reason, requested, free_mem),
6078
                               errors.ECODE_NORES)
6079

    
6080

    
6081
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6082
  """Checks if nodes have enough free disk space in the all VGs.
6083

6084
  This function check if all given nodes have the needed amount of
6085
  free disk. In case any node has less disk or we cannot get the
6086
  information from the node, this function raise an OpPrereqError
6087
  exception.
6088

6089
  @type lu: C{LogicalUnit}
6090
  @param lu: a logical unit from which we get configuration data
6091
  @type nodenames: C{list}
6092
  @param nodenames: the list of node names to check
6093
  @type req_sizes: C{dict}
6094
  @param req_sizes: the hash of vg and corresponding amount of disk in
6095
      MiB to check for
6096
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6097
      or we cannot check the node
6098

6099
  """
6100
  for vg, req_size in req_sizes.items():
6101
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6102

    
6103

    
6104
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6105
  """Checks if nodes have enough free disk space in the specified VG.
6106

6107
  This function check if all given nodes have the needed amount of
6108
  free disk. In case any node has less disk or we cannot get the
6109
  information from the node, this function raise an OpPrereqError
6110
  exception.
6111

6112
  @type lu: C{LogicalUnit}
6113
  @param lu: a logical unit from which we get configuration data
6114
  @type nodenames: C{list}
6115
  @param nodenames: the list of node names to check
6116
  @type vg: C{str}
6117
  @param vg: the volume group to check
6118
  @type requested: C{int}
6119
  @param requested: the amount of disk in MiB to check for
6120
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6121
      or we cannot check the node
6122

6123
  """
6124
  nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6125
  for node in nodenames:
6126
    info = nodeinfo[node]
6127
    info.Raise("Cannot get current information from node %s" % node,
6128
               prereq=True, ecode=errors.ECODE_ENVIRON)
6129
    (_, (vg_info, ), _) = info.payload
6130
    vg_free = vg_info.get("vg_free", None)
6131
    if not isinstance(vg_free, int):
6132
      raise errors.OpPrereqError("Can't compute free disk space on node"
6133
                                 " %s for vg %s, result was '%s'" %
6134
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
6135
    if requested > vg_free:
6136
      raise errors.OpPrereqError("Not enough disk space on target node %s"
6137
                                 " vg %s: required %d MiB, available %d MiB" %
6138
                                 (node, vg, requested, vg_free),
6139
                                 errors.ECODE_NORES)
6140

    
6141

    
6142
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6143
  """Checks if nodes have enough physical CPUs
6144

6145
  This function checks if all given nodes have the needed number of
6146
  physical CPUs. In case any node has less CPUs or we cannot get the
6147
  information from the node, this function raises an OpPrereqError
6148
  exception.
6149

6150
  @type lu: C{LogicalUnit}
6151
  @param lu: a logical unit from which we get configuration data
6152
  @type nodenames: C{list}
6153
  @param nodenames: the list of node names to check
6154
  @type requested: C{int}
6155
  @param requested: the minimum acceptable number of physical CPUs
6156
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6157
      or we cannot check the node
6158

6159
  """
6160
  nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6161
  for node in nodenames:
6162
    info = nodeinfo[node]
6163
    info.Raise("Cannot get current information from node %s" % node,
6164
               prereq=True, ecode=errors.ECODE_ENVIRON)
6165
    (_, _, (hv_info, )) = info.payload
6166
    num_cpus = hv_info.get("cpu_total", None)
6167
    if not isinstance(num_cpus, int):
6168
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6169
                                 " on node %s, result was '%s'" %
6170
                                 (node, num_cpus), errors.ECODE_ENVIRON)
6171
    if requested > num_cpus:
6172
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6173
                                 "required" % (node, num_cpus, requested),
6174
                                 errors.ECODE_NORES)
6175

    
6176

    
6177
class LUInstanceStartup(LogicalUnit):
6178
  """Starts an instance.
6179

6180
  """
6181
  HPATH = "instance-start"
6182
  HTYPE = constants.HTYPE_INSTANCE
6183
  REQ_BGL = False
6184

    
6185
  def CheckArguments(self):
6186
    # extra beparams
6187
    if self.op.beparams:
6188
      # fill the beparams dict
6189
      objects.UpgradeBeParams(self.op.beparams)
6190
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6191

    
6192
  def ExpandNames(self):
6193
    self._ExpandAndLockInstance()
6194

    
6195
  def BuildHooksEnv(self):
6196
    """Build hooks env.
6197

6198
    This runs on master, primary and secondary nodes of the instance.
6199

6200
    """
6201
    env = {
6202
      "FORCE": self.op.force,
6203
      }
6204

    
6205
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6206

    
6207
    return env
6208

    
6209
  def BuildHooksNodes(self):
6210
    """Build hooks nodes.
6211

6212
    """
6213
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6214
    return (nl, nl)
6215

    
6216
  def CheckPrereq(self):
6217
    """Check prerequisites.
6218

6219
    This checks that the instance is in the cluster.
6220

6221
    """
6222
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6223
    assert self.instance is not None, \
6224
      "Cannot retrieve locked instance %s" % self.op.instance_name
6225

    
6226
    # extra hvparams
6227
    if self.op.hvparams:
6228
      # check hypervisor parameter syntax (locally)
6229
      cluster = self.cfg.GetClusterInfo()
6230
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6231
      filled_hvp = cluster.FillHV(instance)
6232
      filled_hvp.update(self.op.hvparams)
6233
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6234
      hv_type.CheckParameterSyntax(filled_hvp)
6235
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6236

    
6237
    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6238

    
6239
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6240

    
6241
    if self.primary_offline and self.op.ignore_offline_nodes:
6242
      self.proc.LogWarning("Ignoring offline primary node")
6243

    
6244
      if self.op.hvparams or self.op.beparams:
6245
        self.proc.LogWarning("Overridden parameters are ignored")
6246
    else:
6247
      _CheckNodeOnline(self, instance.primary_node)
6248

    
6249
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6250

    
6251
      # check bridges existence
6252
      _CheckInstanceBridgesExist(self, instance)
6253

    
6254
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6255
                                                instance.name,
6256
                                                instance.hypervisor)
6257
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6258
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6259
      if not remote_info.payload: # not running already
6260
        _CheckNodeFreeMemory(self, instance.primary_node,
6261
                             "starting instance %s" % instance.name,
6262
                             bep[constants.BE_MAXMEM], instance.hypervisor)
6263

    
6264
  def Exec(self, feedback_fn):
6265
    """Start the instance.
6266

6267
    """
6268
    instance = self.instance
6269
    force = self.op.force
6270

    
6271
    if not self.op.no_remember:
6272
      self.cfg.MarkInstanceUp(instance.name)
6273

    
6274
    if self.primary_offline:
6275
      assert self.op.ignore_offline_nodes
6276
      self.proc.LogInfo("Primary node offline, marked instance as started")
6277
    else:
6278
      node_current = instance.primary_node
6279

    
6280
      _StartInstanceDisks(self, instance, force)
6281

    
6282
      result = \
6283
        self.rpc.call_instance_start(node_current,
6284
                                     (instance, self.op.hvparams,
6285
                                      self.op.beparams),
6286
                                     self.op.startup_paused)
6287
      msg = result.fail_msg
6288
      if msg:
6289
        _ShutdownInstanceDisks(self, instance)
6290
        raise errors.OpExecError("Could not start instance: %s" % msg)
6291

    
6292

    
6293
class LUInstanceReboot(LogicalUnit):
6294
  """Reboot an instance.
6295

6296
  """
6297
  HPATH = "instance-reboot"
6298
  HTYPE = constants.HTYPE_INSTANCE
6299
  REQ_BGL = False
6300

    
6301
  def ExpandNames(self):
6302
    self._ExpandAndLockInstance()
6303

    
6304
  def BuildHooksEnv(self):
6305
    """Build hooks env.
6306

6307
    This runs on master, primary and secondary nodes of the instance.
6308

6309
    """
6310
    env = {
6311
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6312
      "REBOOT_TYPE": self.op.reboot_type,
6313
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6314
      }
6315

    
6316
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6317

    
6318
    return env
6319

    
6320
  def BuildHooksNodes(self):
6321
    """Build hooks nodes.
6322

6323
    """
6324
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6325
    return (nl, nl)
6326

    
6327
  def CheckPrereq(self):
6328
    """Check prerequisites.
6329

6330
    This checks that the instance is in the cluster.
6331

6332
    """
6333
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6334
    assert self.instance is not None, \
6335
      "Cannot retrieve locked instance %s" % self.op.instance_name
6336
    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6337
    _CheckNodeOnline(self, instance.primary_node)
6338

    
6339
    # check bridges existence
6340
    _CheckInstanceBridgesExist(self, instance)
6341

    
6342
  def Exec(self, feedback_fn):
6343
    """Reboot the instance.
6344

6345
    """
6346
    instance = self.instance
6347
    ignore_secondaries = self.op.ignore_secondaries
6348
    reboot_type = self.op.reboot_type
6349

    
6350
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6351
                                              instance.name,
6352
                                              instance.hypervisor)
6353
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6354
    instance_running = bool(remote_info.payload)
6355

    
6356
    node_current = instance.primary_node
6357

    
6358
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6359
                                            constants.INSTANCE_REBOOT_HARD]:
6360
      for disk in instance.disks:
6361
        self.cfg.SetDiskID(disk, node_current)
6362
      result = self.rpc.call_instance_reboot(node_current, instance,
6363
                                             reboot_type,
6364
                                             self.op.shutdown_timeout)
6365
      result.Raise("Could not reboot instance")
6366
    else:
6367
      if instance_running:
6368
        result = self.rpc.call_instance_shutdown(node_current, instance,
6369
                                                 self.op.shutdown_timeout)
6370
        result.Raise("Could not shutdown instance for full reboot")
6371
        _ShutdownInstanceDisks(self, instance)
6372
      else:
6373
        self.LogInfo("Instance %s was already stopped, starting now",
6374
                     instance.name)
6375
      _StartInstanceDisks(self, instance, ignore_secondaries)
6376
      result = self.rpc.call_instance_start(node_current,
6377
                                            (instance, None, None), False)
6378
      msg = result.fail_msg
6379
      if msg:
6380
        _ShutdownInstanceDisks(self, instance)
6381
        raise errors.OpExecError("Could not start instance for"
6382
                                 " full reboot: %s" % msg)
6383

    
6384
    self.cfg.MarkInstanceUp(instance.name)
6385

    
6386

    
6387
class LUInstanceShutdown(LogicalUnit):
6388
  """Shutdown an instance.
6389

6390
  """
6391
  HPATH = "instance-stop"
6392
  HTYPE = constants.HTYPE_INSTANCE
6393
  REQ_BGL = False
6394

    
6395
  def ExpandNames(self):
6396
    self._ExpandAndLockInstance()
6397

    
6398
  def BuildHooksEnv(self):
6399
    """Build hooks env.
6400

6401
    This runs on master, primary and secondary nodes of the instance.
6402

6403
    """
6404
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6405
    env["TIMEOUT"] = self.op.timeout
6406
    return env
6407

    
6408
  def BuildHooksNodes(self):
6409
    """Build hooks nodes.
6410

6411
    """
6412
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6413
    return (nl, nl)
6414

    
6415
  def CheckPrereq(self):
6416
    """Check prerequisites.
6417

6418
    This checks that the instance is in the cluster.
6419

6420
    """
6421
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6422
    assert self.instance is not None, \
6423
      "Cannot retrieve locked instance %s" % self.op.instance_name
6424

    
6425
    _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6426

    
6427
    self.primary_offline = \
6428
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6429

    
6430
    if self.primary_offline and self.op.ignore_offline_nodes:
6431
      self.proc.LogWarning("Ignoring offline primary node")
6432
    else:
6433
      _CheckNodeOnline(self, self.instance.primary_node)
6434

    
6435
  def Exec(self, feedback_fn):
6436
    """Shutdown the instance.
6437

6438
    """
6439
    instance = self.instance
6440
    node_current = instance.primary_node
6441
    timeout = self.op.timeout
6442

    
6443
    if not self.op.no_remember:
6444
      self.cfg.MarkInstanceDown(instance.name)
6445

    
6446
    if self.primary_offline:
6447
      assert self.op.ignore_offline_nodes
6448
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6449
    else:
6450
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6451
      msg = result.fail_msg
6452
      if msg:
6453
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6454

    
6455
      _ShutdownInstanceDisks(self, instance)
6456

    
6457

    
6458
class LUInstanceReinstall(LogicalUnit):
6459
  """Reinstall an instance.
6460

6461
  """
6462
  HPATH = "instance-reinstall"
6463
  HTYPE = constants.HTYPE_INSTANCE
6464
  REQ_BGL = False
6465

    
6466
  def ExpandNames(self):
6467
    self._ExpandAndLockInstance()
6468

    
6469
  def BuildHooksEnv(self):
6470
    """Build hooks env.
6471

6472
    This runs on master, primary and secondary nodes of the instance.
6473

6474
    """
6475
    return _BuildInstanceHookEnvByObject(self, self.instance)
6476

    
6477
  def BuildHooksNodes(self):
6478
    """Build hooks nodes.
6479

6480
    """
6481
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6482
    return (nl, nl)
6483

    
6484
  def CheckPrereq(self):
6485
    """Check prerequisites.
6486

6487
    This checks that the instance is in the cluster and is not running.
6488

6489
    """
6490
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6491
    assert instance is not None, \
6492
      "Cannot retrieve locked instance %s" % self.op.instance_name
6493
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6494
                     " offline, cannot reinstall")
6495
    for node in instance.secondary_nodes:
6496
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6497
                       " cannot reinstall")
6498

    
6499
    if instance.disk_template == constants.DT_DISKLESS:
6500
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6501
                                 self.op.instance_name,
6502
                                 errors.ECODE_INVAL)
6503
    _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6504

    
6505
    if self.op.os_type is not None:
6506
      # OS verification
6507
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6508
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6509
      instance_os = self.op.os_type
6510
    else:
6511
      instance_os = instance.os
6512

    
6513
    nodelist = list(instance.all_nodes)
6514

    
6515
    if self.op.osparams:
6516
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6517
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6518
      self.os_inst = i_osdict # the new dict (without defaults)
6519
    else:
6520
      self.os_inst = None
6521

    
6522
    self.instance = instance
6523

    
6524
  def Exec(self, feedback_fn):
6525
    """Reinstall the instance.
6526

6527
    """
6528
    inst = self.instance
6529

    
6530
    if self.op.os_type is not None:
6531
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6532
      inst.os = self.op.os_type
6533
      # Write to configuration
6534
      self.cfg.Update(inst, feedback_fn)
6535

    
6536
    _StartInstanceDisks(self, inst, None)
6537
    try:
6538
      feedback_fn("Running the instance OS create scripts...")
6539
      # FIXME: pass debug option from opcode to backend
6540
      result = self.rpc.call_instance_os_add(inst.primary_node,
6541
                                             (inst, self.os_inst), True,
6542
                                             self.op.debug_level)
6543
      result.Raise("Could not install OS for instance %s on node %s" %
6544
                   (inst.name, inst.primary_node))
6545
    finally:
6546
      _ShutdownInstanceDisks(self, inst)
6547

    
6548

    
6549
class LUInstanceRecreateDisks(LogicalUnit):
6550
  """Recreate an instance's missing disks.
6551

6552
  """
6553
  HPATH = "instance-recreate-disks"
6554
  HTYPE = constants.HTYPE_INSTANCE
6555
  REQ_BGL = False
6556

    
6557
  def CheckArguments(self):
6558
    # normalise the disk list
6559
    self.op.disks = sorted(frozenset(self.op.disks))
6560

    
6561
  def ExpandNames(self):
6562
    self._ExpandAndLockInstance()
6563
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6564
    if self.op.nodes:
6565
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6566
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6567
    else:
6568
      self.needed_locks[locking.LEVEL_NODE] = []
6569

    
6570
  def DeclareLocks(self, level):
6571
    if level == locking.LEVEL_NODE:
6572
      # if we replace the nodes, we only need to lock the old primary,
6573
      # otherwise we need to lock all nodes for disk re-creation
6574
      primary_only = bool(self.op.nodes)
6575
      self._LockInstancesNodes(primary_only=primary_only)
6576
    elif level == locking.LEVEL_NODE_RES:
6577
      # Copy node locks
6578
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6579
        self.needed_locks[locking.LEVEL_NODE][:]
6580

    
6581
  def BuildHooksEnv(self):
6582
    """Build hooks env.
6583

6584
    This runs on master, primary and secondary nodes of the instance.
6585

6586
    """
6587
    return _BuildInstanceHookEnvByObject(self, self.instance)
6588

    
6589
  def BuildHooksNodes(self):
6590
    """Build hooks nodes.
6591

6592
    """
6593
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6594
    return (nl, nl)
6595

    
6596
  def CheckPrereq(self):
6597
    """Check prerequisites.
6598

6599
    This checks that the instance is in the cluster and is not running.
6600

6601
    """
6602
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6603
    assert instance is not None, \
6604
      "Cannot retrieve locked instance %s" % self.op.instance_name
6605
    if self.op.nodes:
6606
      if len(self.op.nodes) != len(instance.all_nodes):
6607
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6608
                                   " %d replacement nodes were specified" %
6609
                                   (instance.name, len(instance.all_nodes),
6610
                                    len(self.op.nodes)),
6611
                                   errors.ECODE_INVAL)
6612
      assert instance.disk_template != constants.DT_DRBD8 or \
6613
          len(self.op.nodes) == 2
6614
      assert instance.disk_template != constants.DT_PLAIN or \
6615
          len(self.op.nodes) == 1
6616
      primary_node = self.op.nodes[0]
6617
    else:
6618
      primary_node = instance.primary_node
6619
    _CheckNodeOnline(self, primary_node)
6620

    
6621
    if instance.disk_template == constants.DT_DISKLESS:
6622
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6623
                                 self.op.instance_name, errors.ECODE_INVAL)
6624
    # if we replace nodes *and* the old primary is offline, we don't
6625
    # check
6626
    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6627
    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6628
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6629
    if not (self.op.nodes and old_pnode.offline):
6630
      _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6631
                          msg="cannot recreate disks")
6632

    
6633
    if not self.op.disks:
6634
      self.op.disks = range(len(instance.disks))
6635
    else:
6636
      for idx in self.op.disks:
6637
        if idx >= len(instance.disks):
6638
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6639
                                     errors.ECODE_INVAL)
6640
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6641
      raise errors.OpPrereqError("Can't recreate disks partially and"
6642
                                 " change the nodes at the same time",
6643
                                 errors.ECODE_INVAL)
6644
    self.instance = instance
6645

    
6646
  def Exec(self, feedback_fn):
6647
    """Recreate the disks.
6648

6649
    """
6650
    instance = self.instance
6651

    
6652
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6653
            self.owned_locks(locking.LEVEL_NODE_RES))
6654

    
6655
    to_skip = []
6656
    mods = [] # keeps track of needed logical_id changes
6657

    
6658
    for idx, disk in enumerate(instance.disks):
6659
      if idx not in self.op.disks: # disk idx has not been passed in
6660
        to_skip.append(idx)
6661
        continue
6662
      # update secondaries for disks, if needed
6663
      if self.op.nodes:
6664
        if disk.dev_type == constants.LD_DRBD8:
6665
          # need to update the nodes and minors
6666
          assert len(self.op.nodes) == 2
6667
          assert len(disk.logical_id) == 6 # otherwise disk internals
6668
                                           # have changed
6669
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6670
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6671
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6672
                    new_minors[0], new_minors[1], old_secret)
6673
          assert len(disk.logical_id) == len(new_id)
6674
          mods.append((idx, new_id))
6675

    
6676
    # now that we have passed all asserts above, we can apply the mods
6677
    # in a single run (to avoid partial changes)
6678
    for idx, new_id in mods:
6679
      instance.disks[idx].logical_id = new_id
6680

    
6681
    # change primary node, if needed
6682
    if self.op.nodes:
6683
      instance.primary_node = self.op.nodes[0]
6684
      self.LogWarning("Changing the instance's nodes, you will have to"
6685
                      " remove any disks left on the older nodes manually")
6686

    
6687
    if self.op.nodes:
6688
      self.cfg.Update(instance, feedback_fn)
6689

    
6690
    _CreateDisks(self, instance, to_skip=to_skip)
6691

    
6692

    
6693
class LUInstanceRename(LogicalUnit):
6694
  """Rename an instance.
6695

6696
  """
6697
  HPATH = "instance-rename"
6698
  HTYPE = constants.HTYPE_INSTANCE
6699

    
6700
  def CheckArguments(self):
6701
    """Check arguments.
6702

6703
    """
6704
    if self.op.ip_check and not self.op.name_check:
6705
      # TODO: make the ip check more flexible and not depend on the name check
6706
      raise errors.OpPrereqError("IP address check requires a name check",
6707
                                 errors.ECODE_INVAL)
6708

    
6709
  def BuildHooksEnv(self):
6710
    """Build hooks env.
6711

6712
    This runs on master, primary and secondary nodes of the instance.
6713

6714
    """
6715
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6716
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6717
    return env
6718

    
6719
  def BuildHooksNodes(self):
6720
    """Build hooks nodes.
6721

6722
    """
6723
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6724
    return (nl, nl)
6725

    
6726
  def CheckPrereq(self):
6727
    """Check prerequisites.
6728

6729
    This checks that the instance is in the cluster and is not running.
6730

6731
    """
6732
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6733
                                                self.op.instance_name)
6734
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6735
    assert instance is not None
6736
    _CheckNodeOnline(self, instance.primary_node)
6737
    _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6738
                        msg="cannot rename")
6739
    self.instance = instance
6740

    
6741
    new_name = self.op.new_name
6742
    if self.op.name_check:
6743
      hostname = netutils.GetHostname(name=new_name)
6744
      if hostname.name != new_name:
6745
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6746
                     hostname.name)
6747
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6748
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6749
                                    " same as given hostname '%s'") %
6750
                                    (hostname.name, self.op.new_name),
6751
                                    errors.ECODE_INVAL)
6752
      new_name = self.op.new_name = hostname.name
6753
      if (self.op.ip_check and
6754
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6755
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6756
                                   (hostname.ip, new_name),
6757
                                   errors.ECODE_NOTUNIQUE)
6758

    
6759
    instance_list = self.cfg.GetInstanceList()
6760
    if new_name in instance_list and new_name != instance.name:
6761
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6762
                                 new_name, errors.ECODE_EXISTS)
6763

    
6764
  def Exec(self, feedback_fn):
6765
    """Rename the instance.
6766

6767
    """
6768
    inst = self.instance
6769
    old_name = inst.name
6770

    
6771
    rename_file_storage = False
6772
    if (inst.disk_template in constants.DTS_FILEBASED and
6773
        self.op.new_name != inst.name):
6774
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6775
      rename_file_storage = True
6776

    
6777
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6778
    # Change the instance lock. This is definitely safe while we hold the BGL.
6779
    # Otherwise the new lock would have to be added in acquired mode.
6780
    assert self.REQ_BGL
6781
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6782
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6783

    
6784
    # re-read the instance from the configuration after rename
6785
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6786

    
6787
    if rename_file_storage:
6788
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6789
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6790
                                                     old_file_storage_dir,
6791
                                                     new_file_storage_dir)
6792
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6793
                   " (but the instance has been renamed in Ganeti)" %
6794
                   (inst.primary_node, old_file_storage_dir,
6795
                    new_file_storage_dir))
6796

    
6797
    _StartInstanceDisks(self, inst, None)
6798
    try:
6799
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6800
                                                 old_name, self.op.debug_level)
6801
      msg = result.fail_msg
6802
      if msg:
6803
        msg = ("Could not run OS rename script for instance %s on node %s"
6804
               " (but the instance has been renamed in Ganeti): %s" %
6805
               (inst.name, inst.primary_node, msg))
6806
        self.proc.LogWarning(msg)
6807
    finally:
6808
      _ShutdownInstanceDisks(self, inst)
6809

    
6810
    return inst.name
6811

    
6812

    
6813
class LUInstanceRemove(LogicalUnit):
6814
  """Remove an instance.
6815

6816
  """
6817
  HPATH = "instance-remove"
6818
  HTYPE = constants.HTYPE_INSTANCE
6819
  REQ_BGL = False
6820

    
6821
  def ExpandNames(self):
6822
    self._ExpandAndLockInstance()
6823
    self.needed_locks[locking.LEVEL_NODE] = []
6824
    self.needed_locks[locking.LEVEL_NODE_RES] = []
6825
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6826

    
6827
  def DeclareLocks(self, level):
6828
    if level == locking.LEVEL_NODE:
6829
      self._LockInstancesNodes()
6830
    elif level == locking.LEVEL_NODE_RES:
6831
      # Copy node locks
6832
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6833
        self.needed_locks[locking.LEVEL_NODE][:]
6834

    
6835
  def BuildHooksEnv(self):
6836
    """Build hooks env.
6837

6838
    This runs on master, primary and secondary nodes of the instance.
6839

6840
    """
6841
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6842
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6843
    return env
6844

    
6845
  def BuildHooksNodes(self):
6846
    """Build hooks nodes.
6847

6848
    """
6849
    nl = [self.cfg.GetMasterNode()]
6850
    nl_post = list(self.instance.all_nodes) + nl
6851
    return (nl, nl_post)
6852

    
6853
  def CheckPrereq(self):
6854
    """Check prerequisites.
6855

6856
    This checks that the instance is in the cluster.
6857

6858
    """
6859
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6860
    assert self.instance is not None, \
6861
      "Cannot retrieve locked instance %s" % self.op.instance_name
6862

    
6863
  def Exec(self, feedback_fn):
6864
    """Remove the instance.
6865

6866
    """
6867
    instance = self.instance
6868
    logging.info("Shutting down instance %s on node %s",
6869
                 instance.name, instance.primary_node)
6870

    
6871
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6872
                                             self.op.shutdown_timeout)
6873
    msg = result.fail_msg
6874
    if msg:
6875
      if self.op.ignore_failures:
6876
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6877
      else:
6878
        raise errors.OpExecError("Could not shutdown instance %s on"
6879
                                 " node %s: %s" %
6880
                                 (instance.name, instance.primary_node, msg))
6881

    
6882
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6883
            self.owned_locks(locking.LEVEL_NODE_RES))
6884
    assert not (set(instance.all_nodes) -
6885
                self.owned_locks(locking.LEVEL_NODE)), \
6886
      "Not owning correct locks"
6887

    
6888
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6889

    
6890

    
6891
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6892
  """Utility function to remove an instance.
6893

6894
  """
6895
  logging.info("Removing block devices for instance %s", instance.name)
6896

    
6897
  if not _RemoveDisks(lu, instance):
6898
    if not ignore_failures:
6899
      raise errors.OpExecError("Can't remove instance's disks")
6900
    feedback_fn("Warning: can't remove instance's disks")
6901

    
6902
  logging.info("Removing instance %s out of cluster config", instance.name)
6903

    
6904
  lu.cfg.RemoveInstance(instance.name)
6905

    
6906
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6907
    "Instance lock removal conflict"
6908

    
6909
  # Remove lock for the instance
6910
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6911

    
6912

    
6913
class LUInstanceQuery(NoHooksLU):
6914
  """Logical unit for querying instances.
6915

6916
  """
6917
  # pylint: disable=W0142
6918
  REQ_BGL = False
6919

    
6920
  def CheckArguments(self):
6921
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6922
                             self.op.output_fields, self.op.use_locking)
6923

    
6924
  def ExpandNames(self):
6925
    self.iq.ExpandNames(self)
6926

    
6927
  def DeclareLocks(self, level):
6928
    self.iq.DeclareLocks(self, level)
6929

    
6930
  def Exec(self, feedback_fn):
6931
    return self.iq.OldStyleQuery(self)
6932

    
6933

    
6934
class LUInstanceFailover(LogicalUnit):
6935
  """Failover an instance.
6936

6937
  """
6938
  HPATH = "instance-failover"
6939
  HTYPE = constants.HTYPE_INSTANCE
6940
  REQ_BGL = False
6941

    
6942
  def CheckArguments(self):
6943
    """Check the arguments.
6944

6945
    """
6946
    self.iallocator = getattr(self.op, "iallocator", None)
6947
    self.target_node = getattr(self.op, "target_node", None)
6948

    
6949
  def ExpandNames(self):
6950
    self._ExpandAndLockInstance()
6951

    
6952
    if self.op.target_node is not None:
6953
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6954

    
6955
    self.needed_locks[locking.LEVEL_NODE] = []
6956
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6957

    
6958
    ignore_consistency = self.op.ignore_consistency
6959
    shutdown_timeout = self.op.shutdown_timeout
6960
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6961
                                       cleanup=False,
6962
                                       failover=True,
6963
                                       ignore_consistency=ignore_consistency,
6964
                                       shutdown_timeout=shutdown_timeout)
6965
    self.tasklets = [self._migrater]
6966

    
6967
  def DeclareLocks(self, level):
6968
    if level == locking.LEVEL_NODE:
6969
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6970
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6971
        if self.op.target_node is None:
6972
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6973
        else:
6974
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6975
                                                   self.op.target_node]
6976
        del self.recalculate_locks[locking.LEVEL_NODE]
6977
      else:
6978
        self._LockInstancesNodes()
6979

    
6980
  def BuildHooksEnv(self):
6981
    """Build hooks env.
6982

6983
    This runs on master, primary and secondary nodes of the instance.
6984

6985
    """
6986
    instance = self._migrater.instance
6987
    source_node = instance.primary_node
6988
    target_node = self.op.target_node
6989
    env = {
6990
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6991
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6992
      "OLD_PRIMARY": source_node,
6993
      "NEW_PRIMARY": target_node,
6994
      }
6995

    
6996
    if instance.disk_template in constants.DTS_INT_MIRROR:
6997
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6998
      env["NEW_SECONDARY"] = source_node
6999
    else:
7000
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
7001

    
7002
    env.update(_BuildInstanceHookEnvByObject(self, instance))
7003

    
7004
    return env
7005

    
7006
  def BuildHooksNodes(self):
7007
    """Build hooks nodes.
7008

7009
    """
7010
    instance = self._migrater.instance
7011
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7012
    return (nl, nl + [instance.primary_node])
7013

    
7014

    
7015
class LUInstanceMigrate(LogicalUnit):
7016
  """Migrate an instance.
7017

7018
  This is migration without shutting down, compared to the failover,
7019
  which is done with shutdown.
7020

7021
  """
7022
  HPATH = "instance-migrate"
7023
  HTYPE = constants.HTYPE_INSTANCE
7024
  REQ_BGL = False
7025

    
7026
  def ExpandNames(self):
7027
    self._ExpandAndLockInstance()
7028

    
7029
    if self.op.target_node is not None:
7030
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7031

    
7032
    self.needed_locks[locking.LEVEL_NODE] = []
7033
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7034

    
7035
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
7036
                                       cleanup=self.op.cleanup,
7037
                                       failover=False,
7038
                                       fallback=self.op.allow_failover)
7039
    self.tasklets = [self._migrater]
7040

    
7041
  def DeclareLocks(self, level):
7042
    if level == locking.LEVEL_NODE:
7043
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7044
      if instance.disk_template in constants.DTS_EXT_MIRROR:
7045
        if self.op.target_node is None:
7046
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7047
        else:
7048
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7049
                                                   self.op.target_node]
7050
        del self.recalculate_locks[locking.LEVEL_NODE]
7051
      else:
7052
        self._LockInstancesNodes()
7053

    
7054
  def BuildHooksEnv(self):
7055
    """Build hooks env.
7056

7057
    This runs on master, primary and secondary nodes of the instance.
7058

7059
    """
7060
    instance = self._migrater.instance
7061
    source_node = instance.primary_node
7062
    target_node = self.op.target_node
7063
    env = _BuildInstanceHookEnvByObject(self, instance)
7064
    env.update({
7065
      "MIGRATE_LIVE": self._migrater.live,
7066
      "MIGRATE_CLEANUP": self.op.cleanup,
7067
      "OLD_PRIMARY": source_node,
7068
      "NEW_PRIMARY": target_node,
7069
      })
7070

    
7071
    if instance.disk_template in constants.DTS_INT_MIRROR:
7072
      env["OLD_SECONDARY"] = target_node
7073
      env["NEW_SECONDARY"] = source_node
7074
    else:
7075
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7076

    
7077
    return env
7078

    
7079
  def BuildHooksNodes(self):
7080
    """Build hooks nodes.
7081

7082
    """
7083
    instance = self._migrater.instance
7084
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7085
    return (nl, nl + [instance.primary_node])
7086

    
7087

    
7088
class LUInstanceMove(LogicalUnit):
7089
  """Move an instance by data-copying.
7090

7091
  """
7092
  HPATH = "instance-move"
7093
  HTYPE = constants.HTYPE_INSTANCE
7094
  REQ_BGL = False
7095

    
7096
  def ExpandNames(self):
7097
    self._ExpandAndLockInstance()
7098
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7099
    self.op.target_node = target_node
7100
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
7101
    self.needed_locks[locking.LEVEL_NODE_RES] = []
7102
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7103

    
7104
  def DeclareLocks(self, level):
7105
    if level == locking.LEVEL_NODE:
7106
      self._LockInstancesNodes(primary_only=True)
7107
    elif level == locking.LEVEL_NODE_RES:
7108
      # Copy node locks
7109
      self.needed_locks[locking.LEVEL_NODE_RES] = \
7110
        self.needed_locks[locking.LEVEL_NODE][:]
7111

    
7112
  def BuildHooksEnv(self):
7113
    """Build hooks env.
7114

7115
    This runs on master, primary and secondary nodes of the instance.
7116

7117
    """
7118
    env = {
7119
      "TARGET_NODE": self.op.target_node,
7120
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7121
      }
7122
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7123
    return env
7124

    
7125
  def BuildHooksNodes(self):
7126
    """Build hooks nodes.
7127

7128
    """
7129
    nl = [
7130
      self.cfg.GetMasterNode(),
7131
      self.instance.primary_node,
7132
      self.op.target_node,
7133
      ]
7134
    return (nl, nl)
7135

    
7136
  def CheckPrereq(self):
7137
    """Check prerequisites.
7138

7139
    This checks that the instance is in the cluster.
7140

7141
    """
7142
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7143
    assert self.instance is not None, \
7144
      "Cannot retrieve locked instance %s" % self.op.instance_name
7145

    
7146
    node = self.cfg.GetNodeInfo(self.op.target_node)
7147
    assert node is not None, \
7148
      "Cannot retrieve locked node %s" % self.op.target_node
7149

    
7150
    self.target_node = target_node = node.name
7151

    
7152
    if target_node == instance.primary_node:
7153
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
7154
                                 (instance.name, target_node),
7155
                                 errors.ECODE_STATE)
7156

    
7157
    bep = self.cfg.GetClusterInfo().FillBE(instance)
7158

    
7159
    for idx, dsk in enumerate(instance.disks):
7160
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7161
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7162
                                   " cannot copy" % idx, errors.ECODE_STATE)
7163

    
7164
    _CheckNodeOnline(self, target_node)
7165
    _CheckNodeNotDrained(self, target_node)
7166
    _CheckNodeVmCapable(self, target_node)
7167

    
7168
    if instance.admin_state == constants.ADMINST_UP:
7169
      # check memory requirements on the secondary node
7170
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7171
                           instance.name, bep[constants.BE_MAXMEM],
7172
                           instance.hypervisor)
7173
    else:
7174
      self.LogInfo("Not checking memory on the secondary node as"
7175
                   " instance will not be started")
7176

    
7177
    # check bridge existance
7178
    _CheckInstanceBridgesExist(self, instance, node=target_node)
7179

    
7180
  def Exec(self, feedback_fn):
7181
    """Move an instance.
7182

7183
    The move is done by shutting it down on its present node, copying
7184
    the data over (slow) and starting it on the new node.
7185

7186
    """
7187
    instance = self.instance
7188

    
7189
    source_node = instance.primary_node
7190
    target_node = self.target_node
7191

    
7192
    self.LogInfo("Shutting down instance %s on source node %s",
7193
                 instance.name, source_node)
7194

    
7195
    assert (self.owned_locks(locking.LEVEL_NODE) ==
7196
            self.owned_locks(locking.LEVEL_NODE_RES))
7197

    
7198
    result = self.rpc.call_instance_shutdown(source_node, instance,
7199
                                             self.op.shutdown_timeout)
7200
    msg = result.fail_msg
7201
    if msg:
7202
      if self.op.ignore_consistency:
7203
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
7204
                             " Proceeding anyway. Please make sure node"
7205
                             " %s is down. Error details: %s",
7206
                             instance.name, source_node, source_node, msg)
7207
      else:
7208
        raise errors.OpExecError("Could not shutdown instance %s on"
7209
                                 " node %s: %s" %
7210
                                 (instance.name, source_node, msg))
7211

    
7212
    # create the target disks
7213
    try:
7214
      _CreateDisks(self, instance, target_node=target_node)
7215
    except errors.OpExecError:
7216
      self.LogWarning("Device creation failed, reverting...")
7217
      try:
7218
        _RemoveDisks(self, instance, target_node=target_node)
7219
      finally:
7220
        self.cfg.ReleaseDRBDMinors(instance.name)
7221
        raise
7222

    
7223
    cluster_name = self.cfg.GetClusterInfo().cluster_name
7224

    
7225
    errs = []
7226
    # activate, get path, copy the data over
7227
    for idx, disk in enumerate(instance.disks):
7228
      self.LogInfo("Copying data for disk %d", idx)
7229
      result = self.rpc.call_blockdev_assemble(target_node, disk,
7230
                                               instance.name, True, idx)
7231
      if result.fail_msg:
7232
        self.LogWarning("Can't assemble newly created disk %d: %s",
7233
                        idx, result.fail_msg)
7234
        errs.append(result.fail_msg)
7235
        break
7236
      dev_path = result.payload
7237
      result = self.rpc.call_blockdev_export(source_node, disk,
7238
                                             target_node, dev_path,
7239
                                             cluster_name)
7240
      if result.fail_msg:
7241
        self.LogWarning("Can't copy data over for disk %d: %s",
7242
                        idx, result.fail_msg)
7243
        errs.append(result.fail_msg)
7244
        break
7245

    
7246
    if errs:
7247
      self.LogWarning("Some disks failed to copy, aborting")
7248
      try:
7249
        _RemoveDisks(self, instance, target_node=target_node)
7250
      finally:
7251
        self.cfg.ReleaseDRBDMinors(instance.name)
7252
        raise errors.OpExecError("Errors during disk copy: %s" %
7253
                                 (",".join(errs),))
7254

    
7255
    instance.primary_node = target_node
7256
    self.cfg.Update(instance, feedback_fn)
7257

    
7258
    self.LogInfo("Removing the disks on the original node")
7259
    _RemoveDisks(self, instance, target_node=source_node)
7260

    
7261
    # Only start the instance if it's marked as up
7262
    if instance.admin_state == constants.ADMINST_UP:
7263
      self.LogInfo("Starting instance %s on node %s",
7264
                   instance.name, target_node)
7265

    
7266
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7267
                                           ignore_secondaries=True)
7268
      if not disks_ok:
7269
        _ShutdownInstanceDisks(self, instance)
7270
        raise errors.OpExecError("Can't activate the instance's disks")
7271

    
7272
      result = self.rpc.call_instance_start(target_node,
7273
                                            (instance, None, None), False)
7274
      msg = result.fail_msg
7275
      if msg:
7276
        _ShutdownInstanceDisks(self, instance)
7277
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7278
                                 (instance.name, target_node, msg))
7279

    
7280

    
7281
class LUNodeMigrate(LogicalUnit):
7282
  """Migrate all instances from a node.
7283

7284
  """
7285
  HPATH = "node-migrate"
7286
  HTYPE = constants.HTYPE_NODE
7287
  REQ_BGL = False
7288

    
7289
  def CheckArguments(self):
7290
    pass
7291

    
7292
  def ExpandNames(self):
7293
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7294

    
7295
    self.share_locks = _ShareAll()
7296
    self.needed_locks = {
7297
      locking.LEVEL_NODE: [self.op.node_name],
7298
      }
7299

    
7300
  def BuildHooksEnv(self):
7301
    """Build hooks env.
7302

7303
    This runs on the master, the primary and all the secondaries.
7304

7305
    """
7306
    return {
7307
      "NODE_NAME": self.op.node_name,
7308
      }
7309

    
7310
  def BuildHooksNodes(self):
7311
    """Build hooks nodes.
7312

7313
    """
7314
    nl = [self.cfg.GetMasterNode()]
7315
    return (nl, nl)
7316

    
7317
  def CheckPrereq(self):
7318
    pass
7319

    
7320
  def Exec(self, feedback_fn):
7321
    # Prepare jobs for migration instances
7322
    jobs = [
7323
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7324
                                 mode=self.op.mode,
7325
                                 live=self.op.live,
7326
                                 iallocator=self.op.iallocator,
7327
                                 target_node=self.op.target_node)]
7328
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7329
      ]
7330

    
7331
    # TODO: Run iallocator in this opcode and pass correct placement options to
7332
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7333
    # running the iallocator and the actual migration, a good consistency model
7334
    # will have to be found.
7335

    
7336
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7337
            frozenset([self.op.node_name]))
7338

    
7339
    return ResultWithJobs(jobs)
7340

    
7341

    
7342
class TLMigrateInstance(Tasklet):
7343
  """Tasklet class for instance migration.
7344

7345
  @type live: boolean
7346
  @ivar live: whether the migration will be done live or non-live;
7347
      this variable is initalized only after CheckPrereq has run
7348
  @type cleanup: boolean
7349
  @ivar cleanup: Wheater we cleanup from a failed migration
7350
  @type iallocator: string
7351
  @ivar iallocator: The iallocator used to determine target_node
7352
  @type target_node: string
7353
  @ivar target_node: If given, the target_node to reallocate the instance to
7354
  @type failover: boolean
7355
  @ivar failover: Whether operation results in failover or migration
7356
  @type fallback: boolean
7357
  @ivar fallback: Whether fallback to failover is allowed if migration not
7358
                  possible
7359
  @type ignore_consistency: boolean
7360
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7361
                            and target node
7362
  @type shutdown_timeout: int
7363
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7364

7365
  """
7366

    
7367
  # Constants
7368
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7369
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7370

    
7371
  def __init__(self, lu, instance_name, cleanup=False,
7372
               failover=False, fallback=False,
7373
               ignore_consistency=False,
7374
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7375
    """Initializes this class.
7376

7377
    """
7378
    Tasklet.__init__(self, lu)
7379

    
7380
    # Parameters
7381
    self.instance_name = instance_name
7382
    self.cleanup = cleanup
7383
    self.live = False # will be overridden later
7384
    self.failover = failover
7385
    self.fallback = fallback
7386
    self.ignore_consistency = ignore_consistency
7387
    self.shutdown_timeout = shutdown_timeout
7388

    
7389
  def CheckPrereq(self):
7390
    """Check prerequisites.
7391

7392
    This checks that the instance is in the cluster.
7393

7394
    """
7395
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7396
    instance = self.cfg.GetInstanceInfo(instance_name)
7397
    assert instance is not None
7398
    self.instance = instance
7399

    
7400
    if (not self.cleanup and
7401
        not instance.admin_state == constants.ADMINST_UP and
7402
        not self.failover and self.fallback):
7403
      self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7404
                      " switching to failover")
7405
      self.failover = True
7406

    
7407
    if instance.disk_template not in constants.DTS_MIRRORED:
7408
      if self.failover:
7409
        text = "failovers"
7410
      else:
7411
        text = "migrations"
7412
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7413
                                 " %s" % (instance.disk_template, text),
7414
                                 errors.ECODE_STATE)
7415

    
7416
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7417
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7418

    
7419
      if self.lu.op.iallocator:
7420
        self._RunAllocator()
7421
      else:
7422
        # We set set self.target_node as it is required by
7423
        # BuildHooksEnv
7424
        self.target_node = self.lu.op.target_node
7425

    
7426
      # self.target_node is already populated, either directly or by the
7427
      # iallocator run
7428
      target_node = self.target_node
7429
      if self.target_node == instance.primary_node:
7430
        raise errors.OpPrereqError("Cannot migrate instance %s"
7431
                                   " to its primary (%s)" %
7432
                                   (instance.name, instance.primary_node))
7433

    
7434
      if len(self.lu.tasklets) == 1:
7435
        # It is safe to release locks only when we're the only tasklet
7436
        # in the LU
7437
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7438
                      keep=[instance.primary_node, self.target_node])
7439

    
7440
    else:
7441
      secondary_nodes = instance.secondary_nodes
7442
      if not secondary_nodes:
7443
        raise errors.ConfigurationError("No secondary node but using"
7444
                                        " %s disk template" %
7445
                                        instance.disk_template)
7446
      target_node = secondary_nodes[0]
7447
      if self.lu.op.iallocator or (self.lu.op.target_node and
7448
                                   self.lu.op.target_node != target_node):
7449
        if self.failover:
7450
          text = "failed over"
7451
        else:
7452
          text = "migrated"
7453
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7454
                                   " be %s to arbitrary nodes"
7455
                                   " (neither an iallocator nor a target"
7456
                                   " node can be passed)" %
7457
                                   (instance.disk_template, text),
7458
                                   errors.ECODE_INVAL)
7459

    
7460
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7461

    
7462
    # check memory requirements on the secondary node
7463
    if not self.failover or instance.admin_state == constants.ADMINST_UP:
7464
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7465
                           instance.name, i_be[constants.BE_MAXMEM],
7466
                           instance.hypervisor)
7467
    else:
7468
      self.lu.LogInfo("Not checking memory on the secondary node as"
7469
                      " instance will not be started")
7470

    
7471
    # check bridge existance
7472
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7473

    
7474
    if not self.cleanup:
7475
      _CheckNodeNotDrained(self.lu, target_node)
7476
      if not self.failover:
7477
        result = self.rpc.call_instance_migratable(instance.primary_node,
7478
                                                   instance)
7479
        if result.fail_msg and self.fallback:
7480
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7481
                          " failover")
7482
          self.failover = True
7483
        else:
7484
          result.Raise("Can't migrate, please use failover",
7485
                       prereq=True, ecode=errors.ECODE_STATE)
7486

    
7487
    assert not (self.failover and self.cleanup)
7488

    
7489
    if not self.failover:
7490
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7491
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7492
                                   " parameters are accepted",
7493
                                   errors.ECODE_INVAL)
7494
      if self.lu.op.live is not None:
7495
        if self.lu.op.live:
7496
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7497
        else:
7498
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7499
        # reset the 'live' parameter to None so that repeated
7500
        # invocations of CheckPrereq do not raise an exception
7501
        self.lu.op.live = None
7502
      elif self.lu.op.mode is None:
7503
        # read the default value from the hypervisor
7504
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7505
                                                skip_globals=False)
7506
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7507

    
7508
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7509
    else:
7510
      # Failover is never live
7511
      self.live = False
7512

    
7513
  def _RunAllocator(self):
7514
    """Run the allocator based on input opcode.
7515

7516
    """
7517
    ial = IAllocator(self.cfg, self.rpc,
7518
                     mode=constants.IALLOCATOR_MODE_RELOC,
7519
                     name=self.instance_name,
7520
                     # TODO See why hail breaks with a single node below
7521
                     relocate_from=[self.instance.primary_node,
7522
                                    self.instance.primary_node],
7523
                     )
7524

    
7525
    ial.Run(self.lu.op.iallocator)
7526

    
7527
    if not ial.success:
7528
      raise errors.OpPrereqError("Can't compute nodes using"
7529
                                 " iallocator '%s': %s" %
7530
                                 (self.lu.op.iallocator, ial.info),
7531
                                 errors.ECODE_NORES)
7532
    if len(ial.result) != ial.required_nodes:
7533
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7534
                                 " of nodes (%s), required %s" %
7535
                                 (self.lu.op.iallocator, len(ial.result),
7536
                                  ial.required_nodes), errors.ECODE_FAULT)
7537
    self.target_node = ial.result[0]
7538
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7539
                 self.instance_name, self.lu.op.iallocator,
7540
                 utils.CommaJoin(ial.result))
7541

    
7542
  def _WaitUntilSync(self):
7543
    """Poll with custom rpc for disk sync.
7544

7545
    This uses our own step-based rpc call.
7546

7547
    """
7548
    self.feedback_fn("* wait until resync is done")
7549
    all_done = False
7550
    while not all_done:
7551
      all_done = True
7552
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7553
                                            self.nodes_ip,
7554
                                            self.instance.disks)
7555
      min_percent = 100
7556
      for node, nres in result.items():
7557
        nres.Raise("Cannot resync disks on node %s" % node)
7558
        node_done, node_percent = nres.payload
7559
        all_done = all_done and node_done
7560
        if node_percent is not None:
7561
          min_percent = min(min_percent, node_percent)
7562
      if not all_done:
7563
        if min_percent < 100:
7564
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7565
        time.sleep(2)
7566

    
7567
  def _EnsureSecondary(self, node):
7568
    """Demote a node to secondary.
7569

7570
    """
7571
    self.feedback_fn("* switching node %s to secondary mode" % node)
7572

    
7573
    for dev in self.instance.disks:
7574
      self.cfg.SetDiskID(dev, node)
7575

    
7576
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7577
                                          self.instance.disks)
7578
    result.Raise("Cannot change disk to secondary on node %s" % node)
7579

    
7580
  def _GoStandalone(self):
7581
    """Disconnect from the network.
7582

7583
    """
7584
    self.feedback_fn("* changing into standalone mode")
7585
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7586
                                               self.instance.disks)
7587
    for node, nres in result.items():
7588
      nres.Raise("Cannot disconnect disks node %s" % node)
7589

    
7590
  def _GoReconnect(self, multimaster):
7591
    """Reconnect to the network.
7592

7593
    """
7594
    if multimaster:
7595
      msg = "dual-master"
7596
    else:
7597
      msg = "single-master"
7598
    self.feedback_fn("* changing disks into %s mode" % msg)
7599
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7600
                                           self.instance.disks,
7601
                                           self.instance.name, multimaster)
7602
    for node, nres in result.items():
7603
      nres.Raise("Cannot change disks config on node %s" % node)
7604

    
7605
  def _ExecCleanup(self):
7606
    """Try to cleanup after a failed migration.
7607

7608
    The cleanup is done by:
7609
      - check that the instance is running only on one node
7610
        (and update the config if needed)
7611
      - change disks on its secondary node to secondary
7612
      - wait until disks are fully synchronized
7613
      - disconnect from the network
7614
      - change disks into single-master mode
7615
      - wait again until disks are fully synchronized
7616

7617
    """
7618
    instance = self.instance
7619
    target_node = self.target_node
7620
    source_node = self.source_node
7621

    
7622
    # check running on only one node
7623
    self.feedback_fn("* checking where the instance actually runs"
7624
                     " (if this hangs, the hypervisor might be in"
7625
                     " a bad state)")
7626
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7627
    for node, result in ins_l.items():
7628
      result.Raise("Can't contact node %s" % node)
7629

    
7630
    runningon_source = instance.name in ins_l[source_node].payload
7631
    runningon_target = instance.name in ins_l[target_node].payload
7632

    
7633
    if runningon_source and runningon_target:
7634
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7635
                               " or the hypervisor is confused; you will have"
7636
                               " to ensure manually that it runs only on one"
7637
                               " and restart this operation")
7638

    
7639
    if not (runningon_source or runningon_target):
7640
      raise errors.OpExecError("Instance does not seem to be running at all;"
7641
                               " in this case it's safer to repair by"
7642
                               " running 'gnt-instance stop' to ensure disk"
7643
                               " shutdown, and then restarting it")
7644

    
7645
    if runningon_target:
7646
      # the migration has actually succeeded, we need to update the config
7647
      self.feedback_fn("* instance running on secondary node (%s),"
7648
                       " updating config" % target_node)
7649
      instance.primary_node = target_node
7650
      self.cfg.Update(instance, self.feedback_fn)
7651
      demoted_node = source_node
7652
    else:
7653
      self.feedback_fn("* instance confirmed to be running on its"
7654
                       " primary node (%s)" % source_node)
7655
      demoted_node = target_node
7656

    
7657
    if instance.disk_template in constants.DTS_INT_MIRROR:
7658
      self._EnsureSecondary(demoted_node)
7659
      try:
7660
        self._WaitUntilSync()
7661
      except errors.OpExecError:
7662
        # we ignore here errors, since if the device is standalone, it
7663
        # won't be able to sync
7664
        pass
7665
      self._GoStandalone()
7666
      self._GoReconnect(False)
7667
      self._WaitUntilSync()
7668

    
7669
    self.feedback_fn("* done")
7670

    
7671
  def _RevertDiskStatus(self):
7672
    """Try to revert the disk status after a failed migration.
7673

7674
    """
7675
    target_node = self.target_node
7676
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7677
      return
7678

    
7679
    try:
7680
      self._EnsureSecondary(target_node)
7681
      self._GoStandalone()
7682
      self._GoReconnect(False)
7683
      self._WaitUntilSync()
7684
    except errors.OpExecError, err:
7685
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7686
                         " please try to recover the instance manually;"
7687
                         " error '%s'" % str(err))
7688

    
7689
  def _AbortMigration(self):
7690
    """Call the hypervisor code to abort a started migration.
7691

7692
    """
7693
    instance = self.instance
7694
    target_node = self.target_node
7695
    source_node = self.source_node
7696
    migration_info = self.migration_info
7697

    
7698
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7699
                                                                 instance,
7700
                                                                 migration_info,
7701
                                                                 False)
7702
    abort_msg = abort_result.fail_msg
7703
    if abort_msg:
7704
      logging.error("Aborting migration failed on target node %s: %s",
7705
                    target_node, abort_msg)
7706
      # Don't raise an exception here, as we stil have to try to revert the
7707
      # disk status, even if this step failed.
7708

    
7709
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7710
        instance, False, self.live)
7711
    abort_msg = abort_result.fail_msg
7712
    if abort_msg:
7713
      logging.error("Aborting migration failed on source node %s: %s",
7714
                    source_node, abort_msg)
7715

    
7716
  def _ExecMigration(self):
7717
    """Migrate an instance.
7718

7719
    The migrate is done by:
7720
      - change the disks into dual-master mode
7721
      - wait until disks are fully synchronized again
7722
      - migrate the instance
7723
      - change disks on the new secondary node (the old primary) to secondary
7724
      - wait until disks are fully synchronized
7725
      - change disks into single-master mode
7726

7727
    """
7728
    instance = self.instance
7729
    target_node = self.target_node
7730
    source_node = self.source_node
7731

    
7732
    # Check for hypervisor version mismatch and warn the user.
7733
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7734
                                       None, [self.instance.hypervisor])
7735
    for ninfo in nodeinfo.values():
7736
      ninfo.Raise("Unable to retrieve node information from node '%s'" %
7737
                  ninfo.node)
7738
    (_, _, (src_info, )) = nodeinfo[source_node].payload
7739
    (_, _, (dst_info, )) = nodeinfo[target_node].payload
7740

    
7741
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
7742
        (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
7743
      src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
7744
      dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
7745
      if src_version != dst_version:
7746
        self.feedback_fn("* warning: hypervisor version mismatch between"
7747
                         " source (%s) and target (%s) node" %
7748
                         (src_version, dst_version))
7749

    
7750
    self.feedback_fn("* checking disk consistency between source and target")
7751
    for dev in instance.disks:
7752
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7753
        raise errors.OpExecError("Disk %s is degraded or not fully"
7754
                                 " synchronized on target node,"
7755
                                 " aborting migration" % dev.iv_name)
7756

    
7757
    # First get the migration information from the remote node
7758
    result = self.rpc.call_migration_info(source_node, instance)
7759
    msg = result.fail_msg
7760
    if msg:
7761
      log_err = ("Failed fetching source migration information from %s: %s" %
7762
                 (source_node, msg))
7763
      logging.error(log_err)
7764
      raise errors.OpExecError(log_err)
7765

    
7766
    self.migration_info = migration_info = result.payload
7767

    
7768
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7769
      # Then switch the disks to master/master mode
7770
      self._EnsureSecondary(target_node)
7771
      self._GoStandalone()
7772
      self._GoReconnect(True)
7773
      self._WaitUntilSync()
7774

    
7775
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7776
    result = self.rpc.call_accept_instance(target_node,
7777
                                           instance,
7778
                                           migration_info,
7779
                                           self.nodes_ip[target_node])
7780

    
7781
    msg = result.fail_msg
7782
    if msg:
7783
      logging.error("Instance pre-migration failed, trying to revert"
7784
                    " disk status: %s", msg)
7785
      self.feedback_fn("Pre-migration failed, aborting")
7786
      self._AbortMigration()
7787
      self._RevertDiskStatus()
7788
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7789
                               (instance.name, msg))
7790

    
7791
    self.feedback_fn("* migrating instance to %s" % target_node)
7792
    result = self.rpc.call_instance_migrate(source_node, instance,
7793
                                            self.nodes_ip[target_node],
7794
                                            self.live)
7795
    msg = result.fail_msg
7796
    if msg:
7797
      logging.error("Instance migration failed, trying to revert"
7798
                    " disk status: %s", msg)
7799
      self.feedback_fn("Migration failed, aborting")
7800
      self._AbortMigration()
7801
      self._RevertDiskStatus()
7802
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7803
                               (instance.name, msg))
7804

    
7805
    self.feedback_fn("* starting memory transfer")
7806
    last_feedback = time.time()
7807
    while True:
7808
      result = self.rpc.call_instance_get_migration_status(source_node,
7809
                                                           instance)
7810
      msg = result.fail_msg
7811
      ms = result.payload   # MigrationStatus instance
7812
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7813
        logging.error("Instance migration failed, trying to revert"
7814
                      " disk status: %s", msg)
7815
        self.feedback_fn("Migration failed, aborting")
7816
        self._AbortMigration()
7817
        self._RevertDiskStatus()
7818
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7819
                                 (instance.name, msg))
7820

    
7821
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7822
        self.feedback_fn("* memory transfer complete")
7823
        break
7824

    
7825
      if (utils.TimeoutExpired(last_feedback,
7826
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7827
          ms.transferred_ram is not None):
7828
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7829
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7830
        last_feedback = time.time()
7831

    
7832
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7833

    
7834
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7835
                                                           instance,
7836
                                                           True,
7837
                                                           self.live)
7838
    msg = result.fail_msg
7839
    if msg:
7840
      logging.error("Instance migration succeeded, but finalization failed"
7841
                    " on the source node: %s", msg)
7842
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7843
                               msg)
7844

    
7845
    instance.primary_node = target_node
7846

    
7847
    # distribute new instance config to the other nodes
7848
    self.cfg.Update(instance, self.feedback_fn)
7849

    
7850
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7851
                                                           instance,
7852
                                                           migration_info,
7853
                                                           True)
7854
    msg = result.fail_msg
7855
    if msg:
7856
      logging.error("Instance migration succeeded, but finalization failed"
7857
                    " on the target node: %s", msg)
7858
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7859
                               msg)
7860

    
7861
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7862
      self._EnsureSecondary(source_node)
7863
      self._WaitUntilSync()
7864
      self._GoStandalone()
7865
      self._GoReconnect(False)
7866
      self._WaitUntilSync()
7867

    
7868
    self.feedback_fn("* done")
7869

    
7870
  def _ExecFailover(self):
7871
    """Failover an instance.
7872

7873
    The failover is done by shutting it down on its present node and
7874
    starting it on the secondary.
7875

7876
    """
7877
    instance = self.instance
7878
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7879

    
7880
    source_node = instance.primary_node
7881
    target_node = self.target_node
7882

    
7883
    if instance.admin_state == constants.ADMINST_UP:
7884
      self.feedback_fn("* checking disk consistency between source and target")
7885
      for dev in instance.disks:
7886
        # for drbd, these are drbd over lvm
7887
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7888
          if primary_node.offline:
7889
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7890
                             " target node %s" %
7891
                             (primary_node.name, dev.iv_name, target_node))
7892
          elif not self.ignore_consistency:
7893
            raise errors.OpExecError("Disk %s is degraded on target node,"
7894
                                     " aborting failover" % dev.iv_name)
7895
    else:
7896
      self.feedback_fn("* not checking disk consistency as instance is not"
7897
                       " running")
7898

    
7899
    self.feedback_fn("* shutting down instance on source node")
7900
    logging.info("Shutting down instance %s on node %s",
7901
                 instance.name, source_node)
7902

    
7903
    result = self.rpc.call_instance_shutdown(source_node, instance,
7904
                                             self.shutdown_timeout)
7905
    msg = result.fail_msg
7906
    if msg:
7907
      if self.ignore_consistency or primary_node.offline:
7908
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7909
                           " proceeding anyway; please make sure node"
7910
                           " %s is down; error details: %s",
7911
                           instance.name, source_node, source_node, msg)
7912
      else:
7913
        raise errors.OpExecError("Could not shutdown instance %s on"
7914
                                 " node %s: %s" %
7915
                                 (instance.name, source_node, msg))
7916

    
7917
    self.feedback_fn("* deactivating the instance's disks on source node")
7918
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7919
      raise errors.OpExecError("Can't shut down the instance's disks")
7920

    
7921
    instance.primary_node = target_node
7922
    # distribute new instance config to the other nodes
7923
    self.cfg.Update(instance, self.feedback_fn)
7924

    
7925
    # Only start the instance if it's marked as up
7926
    if instance.admin_state == constants.ADMINST_UP:
7927
      self.feedback_fn("* activating the instance's disks on target node %s" %
7928
                       target_node)
7929
      logging.info("Starting instance %s on node %s",
7930
                   instance.name, target_node)
7931

    
7932
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7933
                                           ignore_secondaries=True)
7934
      if not disks_ok:
7935
        _ShutdownInstanceDisks(self.lu, instance)
7936
        raise errors.OpExecError("Can't activate the instance's disks")
7937

    
7938
      self.feedback_fn("* starting the instance on the target node %s" %
7939
                       target_node)
7940
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
7941
                                            False)
7942
      msg = result.fail_msg
7943
      if msg:
7944
        _ShutdownInstanceDisks(self.lu, instance)
7945
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7946
                                 (instance.name, target_node, msg))
7947

    
7948
  def Exec(self, feedback_fn):
7949
    """Perform the migration.
7950

7951
    """
7952
    self.feedback_fn = feedback_fn
7953
    self.source_node = self.instance.primary_node
7954

    
7955
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7956
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7957
      self.target_node = self.instance.secondary_nodes[0]
7958
      # Otherwise self.target_node has been populated either
7959
      # directly, or through an iallocator.
7960

    
7961
    self.all_nodes = [self.source_node, self.target_node]
7962
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7963
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7964

    
7965
    if self.failover:
7966
      feedback_fn("Failover instance %s" % self.instance.name)
7967
      self._ExecFailover()
7968
    else:
7969
      feedback_fn("Migrating instance %s" % self.instance.name)
7970

    
7971
      if self.cleanup:
7972
        return self._ExecCleanup()
7973
      else:
7974
        return self._ExecMigration()
7975

    
7976

    
7977
def _CreateBlockDev(lu, node, instance, device, force_create,
7978
                    info, force_open):
7979
  """Create a tree of block devices on a given node.
7980

7981
  If this device type has to be created on secondaries, create it and
7982
  all its children.
7983

7984
  If not, just recurse to children keeping the same 'force' value.
7985

7986
  @param lu: the lu on whose behalf we execute
7987
  @param node: the node on which to create the device
7988
  @type instance: L{objects.Instance}
7989
  @param instance: the instance which owns the device
7990
  @type device: L{objects.Disk}
7991
  @param device: the device to create
7992
  @type force_create: boolean
7993
  @param force_create: whether to force creation of this device; this
7994
      will be change to True whenever we find a device which has
7995
      CreateOnSecondary() attribute
7996
  @param info: the extra 'metadata' we should attach to the device
7997
      (this will be represented as a LVM tag)
7998
  @type force_open: boolean
7999
  @param force_open: this parameter will be passes to the
8000
      L{backend.BlockdevCreate} function where it specifies
8001
      whether we run on primary or not, and it affects both
8002
      the child assembly and the device own Open() execution
8003

8004
  """
8005
  if device.CreateOnSecondary():
8006
    force_create = True
8007

    
8008
  if device.children:
8009
    for child in device.children:
8010
      _CreateBlockDev(lu, node, instance, child, force_create,
8011
                      info, force_open)
8012

    
8013
  if not force_create:
8014
    return
8015

    
8016
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
8017

    
8018

    
8019
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8020
  """Create a single block device on a given node.
8021

8022
  This will not recurse over children of the device, so they must be
8023
  created in advance.
8024

8025
  @param lu: the lu on whose behalf we execute
8026
  @param node: the node on which to create the device
8027
  @type instance: L{objects.Instance}
8028
  @param instance: the instance which owns the device
8029
  @type device: L{objects.Disk}
8030
  @param device: the device to create
8031
  @param info: the extra 'metadata' we should attach to the device
8032
      (this will be represented as a LVM tag)
8033
  @type force_open: boolean
8034
  @param force_open: this parameter will be passes to the
8035
      L{backend.BlockdevCreate} function where it specifies
8036
      whether we run on primary or not, and it affects both
8037
      the child assembly and the device own Open() execution
8038

8039
  """
8040
  lu.cfg.SetDiskID(device, node)
8041
  result = lu.rpc.call_blockdev_create(node, device, device.size,
8042
                                       instance.name, force_open, info)
8043
  result.Raise("Can't create block device %s on"
8044
               " node %s for instance %s" % (device, node, instance.name))
8045
  if device.physical_id is None:
8046
    device.physical_id = result.payload
8047

    
8048

    
8049
def _GenerateUniqueNames(lu, exts):
8050
  """Generate a suitable LV name.
8051

8052
  This will generate a logical volume name for the given instance.
8053

8054
  """
8055
  results = []
8056
  for val in exts:
8057
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8058
    results.append("%s%s" % (new_id, val))
8059
  return results
8060

    
8061

    
8062
def _ComputeLDParams(disk_template, disk_params):
8063
  """Computes Logical Disk parameters from Disk Template parameters.
8064

8065
  @type disk_template: string
8066
  @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8067
  @type disk_params: dict
8068
  @param disk_params: disk template parameters; dict(template_name -> parameters
8069
  @rtype: list(dict)
8070
  @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8071
    contains the LD parameters of the node. The tree is flattened in-order.
8072

8073
  """
8074
  if disk_template not in constants.DISK_TEMPLATES:
8075
    raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8076

    
8077
  result = list()
8078
  dt_params = disk_params[disk_template]
8079
  if disk_template == constants.DT_DRBD8:
8080
    drbd_params = {
8081
      constants.RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE],
8082
      constants.BARRIERS: dt_params[constants.DRBD_DISK_BARRIERS],
8083
      constants.NO_META_FLUSH: dt_params[constants.DRBD_META_BARRIERS],
8084
      }
8085

    
8086
    drbd_params = \
8087
      objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8],
8088
                       drbd_params)
8089

    
8090
    result.append(drbd_params)
8091

    
8092
    # data LV
8093
    data_params = {
8094
      constants.STRIPES: dt_params[constants.DRBD_DATA_STRIPES],
8095
      }
8096
    data_params = \
8097
      objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8098
                       data_params)
8099
    result.append(data_params)
8100

    
8101
    # metadata LV
8102
    meta_params = {
8103
      constants.STRIPES: dt_params[constants.DRBD_META_STRIPES],
8104
      }
8105
    meta_params = \
8106
      objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8107
                       meta_params)
8108
    result.append(meta_params)
8109

    
8110
  elif (disk_template == constants.DT_FILE or
8111
        disk_template == constants.DT_SHARED_FILE):
8112
    result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8113

    
8114
  elif disk_template == constants.DT_PLAIN:
8115
    params = {
8116
      constants.STRIPES: dt_params[constants.LV_STRIPES],
8117
      }
8118
    params = \
8119
      objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_LV],
8120
                       params)
8121
    result.append(params)
8122

    
8123
  elif disk_template == constants.DT_BLOCK:
8124
    result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8125

    
8126
  return result
8127

    
8128

    
8129
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8130
                         iv_name, p_minor, s_minor, drbd_params, data_params,
8131
                         meta_params):
8132
  """Generate a drbd8 device complete with its children.
8133

8134
  """
8135
  assert len(vgnames) == len(names) == 2
8136
  port = lu.cfg.AllocatePort()
8137
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8138

    
8139
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8140
                          logical_id=(vgnames[0], names[0]),
8141
                          params=data_params)
8142
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8143
                          logical_id=(vgnames[1], names[1]),
8144
                          params=meta_params)
8145
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8146
                          logical_id=(primary, secondary, port,
8147
                                      p_minor, s_minor,
8148
                                      shared_secret),
8149
                          children=[dev_data, dev_meta],
8150
                          iv_name=iv_name, params=drbd_params)
8151
  return drbd_dev
8152

    
8153

    
8154
def _GenerateDiskTemplate(lu, template_name,
8155
                          instance_name, primary_node,
8156
                          secondary_nodes, disk_info,
8157
                          file_storage_dir, file_driver,
8158
                          base_index, feedback_fn, disk_params):
8159
  """Generate the entire disk layout for a given template type.
8160

8161
  """
8162
  #TODO: compute space requirements
8163

    
8164
  vgname = lu.cfg.GetVGName()
8165
  disk_count = len(disk_info)
8166
  disks = []
8167
  ld_params = _ComputeLDParams(template_name, disk_params)
8168
  if template_name == constants.DT_DISKLESS:
8169
    pass
8170
  elif template_name == constants.DT_PLAIN:
8171
    if len(secondary_nodes) != 0:
8172
      raise errors.ProgrammerError("Wrong template configuration")
8173

    
8174
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8175
                                      for i in range(disk_count)])
8176
    for idx, disk in enumerate(disk_info):
8177
      disk_index = idx + base_index
8178
      vg = disk.get(constants.IDISK_VG, vgname)
8179
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8180
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
8181
                              size=disk[constants.IDISK_SIZE],
8182
                              logical_id=(vg, names[idx]),
8183
                              iv_name="disk/%d" % disk_index,
8184
                              mode=disk[constants.IDISK_MODE],
8185
                              params=ld_params[0])
8186
      disks.append(disk_dev)
8187
  elif template_name == constants.DT_DRBD8:
8188
    drbd_params, data_params, meta_params = ld_params
8189
    if len(secondary_nodes) != 1:
8190
      raise errors.ProgrammerError("Wrong template configuration")
8191
    remote_node = secondary_nodes[0]
8192
    minors = lu.cfg.AllocateDRBDMinor(
8193
      [primary_node, remote_node] * len(disk_info), instance_name)
8194

    
8195
    names = []
8196
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8197
                                               for i in range(disk_count)]):
8198
      names.append(lv_prefix + "_data")
8199
      names.append(lv_prefix + "_meta")
8200
    for idx, disk in enumerate(disk_info):
8201
      disk_index = idx + base_index
8202
      data_vg = disk.get(constants.IDISK_VG, vgname)
8203
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
8204
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8205
                                      disk[constants.IDISK_SIZE],
8206
                                      [data_vg, meta_vg],
8207
                                      names[idx * 2:idx * 2 + 2],
8208
                                      "disk/%d" % disk_index,
8209
                                      minors[idx * 2], minors[idx * 2 + 1],
8210
                                      drbd_params, data_params, meta_params)
8211
      disk_dev.mode = disk[constants.IDISK_MODE]
8212
      disks.append(disk_dev)
8213
  elif template_name == constants.DT_FILE:
8214
    if len(secondary_nodes) != 0:
8215
      raise errors.ProgrammerError("Wrong template configuration")
8216

    
8217
    opcodes.RequireFileStorage()
8218

    
8219
    for idx, disk in enumerate(disk_info):
8220
      disk_index = idx + base_index
8221
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8222
                              size=disk[constants.IDISK_SIZE],
8223
                              iv_name="disk/%d" % disk_index,
8224
                              logical_id=(file_driver,
8225
                                          "%s/disk%d" % (file_storage_dir,
8226
                                                         disk_index)),
8227
                              mode=disk[constants.IDISK_MODE],
8228
                              params=ld_params[0])
8229
      disks.append(disk_dev)
8230
  elif template_name == constants.DT_SHARED_FILE:
8231
    if len(secondary_nodes) != 0:
8232
      raise errors.ProgrammerError("Wrong template configuration")
8233

    
8234
    opcodes.RequireSharedFileStorage()
8235

    
8236
    for idx, disk in enumerate(disk_info):
8237
      disk_index = idx + base_index
8238
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8239
                              size=disk[constants.IDISK_SIZE],
8240
                              iv_name="disk/%d" % disk_index,
8241
                              logical_id=(file_driver,
8242
                                          "%s/disk%d" % (file_storage_dir,
8243
                                                         disk_index)),
8244
                              mode=disk[constants.IDISK_MODE],
8245
                              params=ld_params[0])
8246
      disks.append(disk_dev)
8247
  elif template_name == constants.DT_BLOCK:
8248
    if len(secondary_nodes) != 0:
8249
      raise errors.ProgrammerError("Wrong template configuration")
8250

    
8251
    for idx, disk in enumerate(disk_info):
8252
      disk_index = idx + base_index
8253
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8254
                              size=disk[constants.IDISK_SIZE],
8255
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8256
                                          disk[constants.IDISK_ADOPT]),
8257
                              iv_name="disk/%d" % disk_index,
8258
                              mode=disk[constants.IDISK_MODE],
8259
                              params=ld_params[0])
8260
      disks.append(disk_dev)
8261

    
8262
  else:
8263
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8264
  return disks
8265

    
8266

    
8267
def _GetInstanceInfoText(instance):
8268
  """Compute that text that should be added to the disk's metadata.
8269

8270
  """
8271
  return "originstname+%s" % instance.name
8272

    
8273

    
8274
def _CalcEta(time_taken, written, total_size):
8275
  """Calculates the ETA based on size written and total size.
8276

8277
  @param time_taken: The time taken so far
8278
  @param written: amount written so far
8279
  @param total_size: The total size of data to be written
8280
  @return: The remaining time in seconds
8281

8282
  """
8283
  avg_time = time_taken / float(written)
8284
  return (total_size - written) * avg_time
8285

    
8286

    
8287
def _WipeDisks(lu, instance):
8288
  """Wipes instance disks.
8289

8290
  @type lu: L{LogicalUnit}
8291
  @param lu: the logical unit on whose behalf we execute
8292
  @type instance: L{objects.Instance}
8293
  @param instance: the instance whose disks we should create
8294
  @return: the success of the wipe
8295

8296
  """
8297
  node = instance.primary_node
8298

    
8299
  for device in instance.disks:
8300
    lu.cfg.SetDiskID(device, node)
8301

    
8302
  logging.info("Pause sync of instance %s disks", instance.name)
8303
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8304

    
8305
  for idx, success in enumerate(result.payload):
8306
    if not success:
8307
      logging.warn("pause-sync of instance %s for disks %d failed",
8308
                   instance.name, idx)
8309

    
8310
  try:
8311
    for idx, device in enumerate(instance.disks):
8312
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8313
      # MAX_WIPE_CHUNK at max
8314
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8315
                            constants.MIN_WIPE_CHUNK_PERCENT)
8316
      # we _must_ make this an int, otherwise rounding errors will
8317
      # occur
8318
      wipe_chunk_size = int(wipe_chunk_size)
8319

    
8320
      lu.LogInfo("* Wiping disk %d", idx)
8321
      logging.info("Wiping disk %d for instance %s, node %s using"
8322
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8323

    
8324
      offset = 0
8325
      size = device.size
8326
      last_output = 0
8327
      start_time = time.time()
8328

    
8329
      while offset < size:
8330
        wipe_size = min(wipe_chunk_size, size - offset)
8331
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8332
                      idx, offset, wipe_size)
8333
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8334
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8335
                     (idx, offset, wipe_size))
8336
        now = time.time()
8337
        offset += wipe_size
8338
        if now - last_output >= 60:
8339
          eta = _CalcEta(now - start_time, offset, size)
8340
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8341
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8342
          last_output = now
8343
  finally:
8344
    logging.info("Resume sync of instance %s disks", instance.name)
8345

    
8346
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8347

    
8348
    for idx, success in enumerate(result.payload):
8349
      if not success:
8350
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8351
                      " look at the status and troubleshoot the issue", idx)
8352
        logging.warn("resume-sync of instance %s for disks %d failed",
8353
                     instance.name, idx)
8354

    
8355

    
8356
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8357
  """Create all disks for an instance.
8358

8359
  This abstracts away some work from AddInstance.
8360

8361
  @type lu: L{LogicalUnit}
8362
  @param lu: the logical unit on whose behalf we execute
8363
  @type instance: L{objects.Instance}
8364
  @param instance: the instance whose disks we should create
8365
  @type to_skip: list
8366
  @param to_skip: list of indices to skip
8367
  @type target_node: string
8368
  @param target_node: if passed, overrides the target node for creation
8369
  @rtype: boolean
8370
  @return: the success of the creation
8371

8372
  """
8373
  info = _GetInstanceInfoText(instance)
8374
  if target_node is None:
8375
    pnode = instance.primary_node
8376
    all_nodes = instance.all_nodes
8377
  else:
8378
    pnode = target_node
8379
    all_nodes = [pnode]
8380

    
8381
  if instance.disk_template in constants.DTS_FILEBASED:
8382
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8383
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8384

    
8385
    result.Raise("Failed to create directory '%s' on"
8386
                 " node %s" % (file_storage_dir, pnode))
8387

    
8388
  # Note: this needs to be kept in sync with adding of disks in
8389
  # LUInstanceSetParams
8390
  for idx, device in enumerate(instance.disks):
8391
    if to_skip and idx in to_skip:
8392
      continue
8393
    logging.info("Creating volume %s for instance %s",
8394
                 device.iv_name, instance.name)
8395
    #HARDCODE
8396
    for node in all_nodes:
8397
      f_create = node == pnode
8398
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8399

    
8400

    
8401
def _RemoveDisks(lu, instance, target_node=None):
8402
  """Remove all disks for an instance.
8403

8404
  This abstracts away some work from `AddInstance()` and
8405
  `RemoveInstance()`. Note that in case some of the devices couldn't
8406
  be removed, the removal will continue with the other ones (compare
8407
  with `_CreateDisks()`).
8408

8409
  @type lu: L{LogicalUnit}
8410
  @param lu: the logical unit on whose behalf we execute
8411
  @type instance: L{objects.Instance}
8412
  @param instance: the instance whose disks we should remove
8413
  @type target_node: string
8414
  @param target_node: used to override the node on which to remove the disks
8415
  @rtype: boolean
8416
  @return: the success of the removal
8417

8418
  """
8419
  logging.info("Removing block devices for instance %s", instance.name)
8420

    
8421
  all_result = True
8422
  for device in instance.disks:
8423
    if target_node:
8424
      edata = [(target_node, device)]
8425
    else:
8426
      edata = device.ComputeNodeTree(instance.primary_node)
8427
    for node, disk in edata:
8428
      lu.cfg.SetDiskID(disk, node)
8429
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8430
      if msg:
8431
        lu.LogWarning("Could not remove block device %s on node %s,"
8432
                      " continuing anyway: %s", device.iv_name, node, msg)
8433
        all_result = False
8434

    
8435
    # if this is a DRBD disk, return its port to the pool
8436
    if device.dev_type in constants.LDS_DRBD:
8437
      tcp_port = device.logical_id[2]
8438
      lu.cfg.AddTcpUdpPort(tcp_port)
8439

    
8440
  if instance.disk_template == constants.DT_FILE:
8441
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8442
    if target_node:
8443
      tgt = target_node
8444
    else:
8445
      tgt = instance.primary_node
8446
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8447
    if result.fail_msg:
8448
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8449
                    file_storage_dir, instance.primary_node, result.fail_msg)
8450
      all_result = False
8451

    
8452
  return all_result
8453

    
8454

    
8455
def _ComputeDiskSizePerVG(disk_template, disks):
8456
  """Compute disk size requirements in the volume group
8457

8458
  """
8459
  def _compute(disks, payload):
8460
    """Universal algorithm.
8461

8462
    """
8463
    vgs = {}
8464
    for disk in disks:
8465
      vgs[disk[constants.IDISK_VG]] = \
8466
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8467

    
8468
    return vgs
8469

    
8470
  # Required free disk space as a function of disk and swap space
8471
  req_size_dict = {
8472
    constants.DT_DISKLESS: {},
8473
    constants.DT_PLAIN: _compute(disks, 0),
8474
    # 128 MB are added for drbd metadata for each disk
8475
    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8476
    constants.DT_FILE: {},
8477
    constants.DT_SHARED_FILE: {},
8478
  }
8479

    
8480
  if disk_template not in req_size_dict:
8481
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8482
                                 " is unknown" % disk_template)
8483

    
8484
  return req_size_dict[disk_template]
8485

    
8486

    
8487
def _ComputeDiskSize(disk_template, disks):
8488
  """Compute disk size requirements in the volume group
8489

8490
  """
8491
  # Required free disk space as a function of disk and swap space
8492
  req_size_dict = {
8493
    constants.DT_DISKLESS: None,
8494
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8495
    # 128 MB are added for drbd metadata for each disk
8496
    constants.DT_DRBD8:
8497
      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8498
    constants.DT_FILE: None,
8499
    constants.DT_SHARED_FILE: 0,
8500
    constants.DT_BLOCK: 0,
8501
  }
8502

    
8503
  if disk_template not in req_size_dict:
8504
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8505
                                 " is unknown" % disk_template)
8506

    
8507
  return req_size_dict[disk_template]
8508

    
8509

    
8510
def _FilterVmNodes(lu, nodenames):
8511
  """Filters out non-vm_capable nodes from a list.
8512

8513
  @type lu: L{LogicalUnit}
8514
  @param lu: the logical unit for which we check
8515
  @type nodenames: list
8516
  @param nodenames: the list of nodes on which we should check
8517
  @rtype: list
8518
  @return: the list of vm-capable nodes
8519

8520
  """
8521
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8522
  return [name for name in nodenames if name not in vm_nodes]
8523

    
8524

    
8525
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8526
  """Hypervisor parameter validation.
8527

8528
  This function abstract the hypervisor parameter validation to be
8529
  used in both instance create and instance modify.
8530

8531
  @type lu: L{LogicalUnit}
8532
  @param lu: the logical unit for which we check
8533
  @type nodenames: list
8534
  @param nodenames: the list of nodes on which we should check
8535
  @type hvname: string
8536
  @param hvname: the name of the hypervisor we should use
8537
  @type hvparams: dict
8538
  @param hvparams: the parameters which we need to check
8539
  @raise errors.OpPrereqError: if the parameters are not valid
8540

8541
  """
8542
  nodenames = _FilterVmNodes(lu, nodenames)
8543

    
8544
  cluster = lu.cfg.GetClusterInfo()
8545
  hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8546

    
8547
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8548
  for node in nodenames:
8549
    info = hvinfo[node]
8550
    if info.offline:
8551
      continue
8552
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8553

    
8554

    
8555
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8556
  """OS parameters validation.
8557

8558
  @type lu: L{LogicalUnit}
8559
  @param lu: the logical unit for which we check
8560
  @type required: boolean
8561
  @param required: whether the validation should fail if the OS is not
8562
      found
8563
  @type nodenames: list
8564
  @param nodenames: the list of nodes on which we should check
8565
  @type osname: string
8566
  @param osname: the name of the hypervisor we should use
8567
  @type osparams: dict
8568
  @param osparams: the parameters which we need to check
8569
  @raise errors.OpPrereqError: if the parameters are not valid
8570

8571
  """
8572
  nodenames = _FilterVmNodes(lu, nodenames)
8573
  result = lu.rpc.call_os_validate(nodenames, required, osname,
8574
                                   [constants.OS_VALIDATE_PARAMETERS],
8575
                                   osparams)
8576
  for node, nres in result.items():
8577
    # we don't check for offline cases since this should be run only
8578
    # against the master node and/or an instance's nodes
8579
    nres.Raise("OS Parameters validation failed on node %s" % node)
8580
    if not nres.payload:
8581
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8582
                 osname, node)
8583

    
8584

    
8585
class LUInstanceCreate(LogicalUnit):
8586
  """Create an instance.
8587

8588
  """
8589
  HPATH = "instance-add"
8590
  HTYPE = constants.HTYPE_INSTANCE
8591
  REQ_BGL = False
8592

    
8593
  def CheckArguments(self):
8594
    """Check arguments.
8595

8596
    """
8597
    # do not require name_check to ease forward/backward compatibility
8598
    # for tools
8599
    if self.op.no_install and self.op.start:
8600
      self.LogInfo("No-installation mode selected, disabling startup")
8601
      self.op.start = False
8602
    # validate/normalize the instance name
8603
    self.op.instance_name = \
8604
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8605

    
8606
    if self.op.ip_check and not self.op.name_check:
8607
      # TODO: make the ip check more flexible and not depend on the name check
8608
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8609
                                 " check", errors.ECODE_INVAL)
8610

    
8611
    # check nics' parameter names
8612
    for nic in self.op.nics:
8613
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8614

    
8615
    # check disks. parameter names and consistent adopt/no-adopt strategy
8616
    has_adopt = has_no_adopt = False
8617
    for disk in self.op.disks:
8618
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8619
      if constants.IDISK_ADOPT in disk:
8620
        has_adopt = True
8621
      else:
8622
        has_no_adopt = True
8623
    if has_adopt and has_no_adopt:
8624
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8625
                                 errors.ECODE_INVAL)
8626
    if has_adopt:
8627
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8628
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8629
                                   " '%s' disk template" %
8630
                                   self.op.disk_template,
8631
                                   errors.ECODE_INVAL)
8632
      if self.op.iallocator is not None:
8633
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8634
                                   " iallocator script", errors.ECODE_INVAL)
8635
      if self.op.mode == constants.INSTANCE_IMPORT:
8636
        raise errors.OpPrereqError("Disk adoption not allowed for"
8637
                                   " instance import", errors.ECODE_INVAL)
8638
    else:
8639
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8640
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8641
                                   " but no 'adopt' parameter given" %
8642
                                   self.op.disk_template,
8643
                                   errors.ECODE_INVAL)
8644

    
8645
    self.adopt_disks = has_adopt
8646

    
8647
    # instance name verification
8648
    if self.op.name_check:
8649
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8650
      self.op.instance_name = self.hostname1.name
8651
      # used in CheckPrereq for ip ping check
8652
      self.check_ip = self.hostname1.ip
8653
    else:
8654
      self.check_ip = None
8655

    
8656
    # file storage checks
8657
    if (self.op.file_driver and
8658
        not self.op.file_driver in constants.FILE_DRIVER):
8659
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8660
                                 self.op.file_driver, errors.ECODE_INVAL)
8661

    
8662
    if self.op.disk_template == constants.DT_FILE:
8663
      opcodes.RequireFileStorage()
8664
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8665
      opcodes.RequireSharedFileStorage()
8666

    
8667
    ### Node/iallocator related checks
8668
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8669

    
8670
    if self.op.pnode is not None:
8671
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8672
        if self.op.snode is None:
8673
          raise errors.OpPrereqError("The networked disk templates need"
8674
                                     " a mirror node", errors.ECODE_INVAL)
8675
      elif self.op.snode:
8676
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8677
                        " template")
8678
        self.op.snode = None
8679

    
8680
    self._cds = _GetClusterDomainSecret()
8681

    
8682
    if self.op.mode == constants.INSTANCE_IMPORT:
8683
      # On import force_variant must be True, because if we forced it at
8684
      # initial install, our only chance when importing it back is that it
8685
      # works again!
8686
      self.op.force_variant = True
8687

    
8688
      if self.op.no_install:
8689
        self.LogInfo("No-installation mode has no effect during import")
8690

    
8691
    elif self.op.mode == constants.INSTANCE_CREATE:
8692
      if self.op.os_type is None:
8693
        raise errors.OpPrereqError("No guest OS specified",
8694
                                   errors.ECODE_INVAL)
8695
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8696
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8697
                                   " installation" % self.op.os_type,
8698
                                   errors.ECODE_STATE)
8699
      if self.op.disk_template is None:
8700
        raise errors.OpPrereqError("No disk template specified",
8701
                                   errors.ECODE_INVAL)
8702

    
8703
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8704
      # Check handshake to ensure both clusters have the same domain secret
8705
      src_handshake = self.op.source_handshake
8706
      if not src_handshake:
8707
        raise errors.OpPrereqError("Missing source handshake",
8708
                                   errors.ECODE_INVAL)
8709

    
8710
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8711
                                                           src_handshake)
8712
      if errmsg:
8713
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8714
                                   errors.ECODE_INVAL)
8715

    
8716
      # Load and check source CA
8717
      self.source_x509_ca_pem = self.op.source_x509_ca
8718
      if not self.source_x509_ca_pem:
8719
        raise errors.OpPrereqError("Missing source X509 CA",
8720
                                   errors.ECODE_INVAL)
8721

    
8722
      try:
8723
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8724
                                                    self._cds)
8725
      except OpenSSL.crypto.Error, err:
8726
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8727
                                   (err, ), errors.ECODE_INVAL)
8728

    
8729
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8730
      if errcode is not None:
8731
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8732
                                   errors.ECODE_INVAL)
8733

    
8734
      self.source_x509_ca = cert
8735

    
8736
      src_instance_name = self.op.source_instance_name
8737
      if not src_instance_name:
8738
        raise errors.OpPrereqError("Missing source instance name",
8739
                                   errors.ECODE_INVAL)
8740

    
8741
      self.source_instance_name = \
8742
          netutils.GetHostname(name=src_instance_name).name
8743

    
8744
    else:
8745
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8746
                                 self.op.mode, errors.ECODE_INVAL)
8747

    
8748
  def ExpandNames(self):
8749
    """ExpandNames for CreateInstance.
8750

8751
    Figure out the right locks for instance creation.
8752

8753
    """
8754
    self.needed_locks = {}
8755

    
8756
    instance_name = self.op.instance_name
8757
    # this is just a preventive check, but someone might still add this
8758
    # instance in the meantime, and creation will fail at lock-add time
8759
    if instance_name in self.cfg.GetInstanceList():
8760
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8761
                                 instance_name, errors.ECODE_EXISTS)
8762

    
8763
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8764

    
8765
    if self.op.iallocator:
8766
      # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8767
      # specifying a group on instance creation and then selecting nodes from
8768
      # that group
8769
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8770
      self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8771
    else:
8772
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8773
      nodelist = [self.op.pnode]
8774
      if self.op.snode is not None:
8775
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8776
        nodelist.append(self.op.snode)
8777
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8778
      # Lock resources of instance's primary and secondary nodes (copy to
8779
      # prevent accidential modification)
8780
      self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8781

    
8782
    # in case of import lock the source node too
8783
    if self.op.mode == constants.INSTANCE_IMPORT:
8784
      src_node = self.op.src_node
8785
      src_path = self.op.src_path
8786

    
8787
      if src_path is None:
8788
        self.op.src_path = src_path = self.op.instance_name
8789

    
8790
      if src_node is None:
8791
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8792
        self.op.src_node = None
8793
        if os.path.isabs(src_path):
8794
          raise errors.OpPrereqError("Importing an instance from a path"
8795
                                     " requires a source node option",
8796
                                     errors.ECODE_INVAL)
8797
      else:
8798
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8799
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8800
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8801
        if not os.path.isabs(src_path):
8802
          self.op.src_path = src_path = \
8803
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8804

    
8805
  def _RunAllocator(self):
8806
    """Run the allocator based on input opcode.
8807

8808
    """
8809
    nics = [n.ToDict() for n in self.nics]
8810
    ial = IAllocator(self.cfg, self.rpc,
8811
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8812
                     name=self.op.instance_name,
8813
                     disk_template=self.op.disk_template,
8814
                     tags=self.op.tags,
8815
                     os=self.op.os_type,
8816
                     vcpus=self.be_full[constants.BE_VCPUS],
8817
                     memory=self.be_full[constants.BE_MAXMEM],
8818
                     disks=self.disks,
8819
                     nics=nics,
8820
                     hypervisor=self.op.hypervisor,
8821
                     )
8822

    
8823
    ial.Run(self.op.iallocator)
8824

    
8825
    if not ial.success:
8826
      raise errors.OpPrereqError("Can't compute nodes using"
8827
                                 " iallocator '%s': %s" %
8828
                                 (self.op.iallocator, ial.info),
8829
                                 errors.ECODE_NORES)
8830
    if len(ial.result) != ial.required_nodes:
8831
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8832
                                 " of nodes (%s), required %s" %
8833
                                 (self.op.iallocator, len(ial.result),
8834
                                  ial.required_nodes), errors.ECODE_FAULT)
8835
    self.op.pnode = ial.result[0]
8836
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8837
                 self.op.instance_name, self.op.iallocator,
8838
                 utils.CommaJoin(ial.result))
8839
    if ial.required_nodes == 2:
8840
      self.op.snode = ial.result[1]
8841

    
8842
  def BuildHooksEnv(self):
8843
    """Build hooks env.
8844

8845
    This runs on master, primary and secondary nodes of the instance.
8846

8847
    """
8848
    env = {
8849
      "ADD_MODE": self.op.mode,
8850
      }
8851
    if self.op.mode == constants.INSTANCE_IMPORT:
8852
      env["SRC_NODE"] = self.op.src_node
8853
      env["SRC_PATH"] = self.op.src_path
8854
      env["SRC_IMAGES"] = self.src_images
8855

    
8856
    env.update(_BuildInstanceHookEnv(
8857
      name=self.op.instance_name,
8858
      primary_node=self.op.pnode,
8859
      secondary_nodes=self.secondaries,
8860
      status=self.op.start,
8861
      os_type=self.op.os_type,
8862
      minmem=self.be_full[constants.BE_MINMEM],
8863
      maxmem=self.be_full[constants.BE_MAXMEM],
8864
      vcpus=self.be_full[constants.BE_VCPUS],
8865
      nics=_NICListToTuple(self, self.nics),
8866
      disk_template=self.op.disk_template,
8867
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8868
             for d in self.disks],
8869
      bep=self.be_full,
8870
      hvp=self.hv_full,
8871
      hypervisor_name=self.op.hypervisor,
8872
      tags=self.op.tags,
8873
    ))
8874

    
8875
    return env
8876

    
8877
  def BuildHooksNodes(self):
8878
    """Build hooks nodes.
8879

8880
    """
8881
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8882
    return nl, nl
8883

    
8884
  def _ReadExportInfo(self):
8885
    """Reads the export information from disk.
8886

8887
    It will override the opcode source node and path with the actual
8888
    information, if these two were not specified before.
8889

8890
    @return: the export information
8891

8892
    """
8893
    assert self.op.mode == constants.INSTANCE_IMPORT
8894

    
8895
    src_node = self.op.src_node
8896
    src_path = self.op.src_path
8897

    
8898
    if src_node is None:
8899
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8900
      exp_list = self.rpc.call_export_list(locked_nodes)
8901
      found = False
8902
      for node in exp_list:
8903
        if exp_list[node].fail_msg:
8904
          continue
8905
        if src_path in exp_list[node].payload:
8906
          found = True
8907
          self.op.src_node = src_node = node
8908
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8909
                                                       src_path)
8910
          break
8911
      if not found:
8912
        raise errors.OpPrereqError("No export found for relative path %s" %
8913
                                    src_path, errors.ECODE_INVAL)
8914

    
8915
    _CheckNodeOnline(self, src_node)
8916
    result = self.rpc.call_export_info(src_node, src_path)
8917
    result.Raise("No export or invalid export found in dir %s" % src_path)
8918

    
8919
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8920
    if not export_info.has_section(constants.INISECT_EXP):
8921
      raise errors.ProgrammerError("Corrupted export config",
8922
                                   errors.ECODE_ENVIRON)
8923

    
8924
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8925
    if (int(ei_version) != constants.EXPORT_VERSION):
8926
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8927
                                 (ei_version, constants.EXPORT_VERSION),
8928
                                 errors.ECODE_ENVIRON)
8929
    return export_info
8930

    
8931
  def _ReadExportParams(self, einfo):
8932
    """Use export parameters as defaults.
8933

8934
    In case the opcode doesn't specify (as in override) some instance
8935
    parameters, then try to use them from the export information, if
8936
    that declares them.
8937

8938
    """
8939
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8940

    
8941
    if self.op.disk_template is None:
8942
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8943
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8944
                                          "disk_template")
8945
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8946
          raise errors.OpPrereqError("Disk template specified in configuration"
8947
                                     " file is not one of the allowed values:"
8948
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8949
      else:
8950
        raise errors.OpPrereqError("No disk template specified and the export"
8951
                                   " is missing the disk_template information",
8952
                                   errors.ECODE_INVAL)
8953

    
8954
    if not self.op.disks:
8955
      disks = []
8956
      # TODO: import the disk iv_name too
8957
      for idx in range(constants.MAX_DISKS):
8958
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8959
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8960
          disks.append({constants.IDISK_SIZE: disk_sz})
8961
      self.op.disks = disks
8962
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8963
        raise errors.OpPrereqError("No disk info specified and the export"
8964
                                   " is missing the disk information",
8965
                                   errors.ECODE_INVAL)
8966

    
8967
    if not self.op.nics:
8968
      nics = []
8969
      for idx in range(constants.MAX_NICS):
8970
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8971
          ndict = {}
8972
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8973
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8974
            ndict[name] = v
8975
          nics.append(ndict)
8976
        else:
8977
          break
8978
      self.op.nics = nics
8979

    
8980
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8981
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8982

    
8983
    if (self.op.hypervisor is None and
8984
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8985
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8986

    
8987
    if einfo.has_section(constants.INISECT_HYP):
8988
      # use the export parameters but do not override the ones
8989
      # specified by the user
8990
      for name, value in einfo.items(constants.INISECT_HYP):
8991
        if name not in self.op.hvparams:
8992
          self.op.hvparams[name] = value
8993

    
8994
    if einfo.has_section(constants.INISECT_BEP):
8995
      # use the parameters, without overriding
8996
      for name, value in einfo.items(constants.INISECT_BEP):
8997
        if name not in self.op.beparams:
8998
          self.op.beparams[name] = value
8999
        # Compatibility for the old "memory" be param
9000
        if name == constants.BE_MEMORY:
9001
          if constants.BE_MAXMEM not in self.op.beparams:
9002
            self.op.beparams[constants.BE_MAXMEM] = value
9003
          if constants.BE_MINMEM not in self.op.beparams:
9004
            self.op.beparams[constants.BE_MINMEM] = value
9005
    else:
9006
      # try to read the parameters old style, from the main section
9007
      for name in constants.BES_PARAMETERS:
9008
        if (name not in self.op.beparams and
9009
            einfo.has_option(constants.INISECT_INS, name)):
9010
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
9011

    
9012
    if einfo.has_section(constants.INISECT_OSP):
9013
      # use the parameters, without overriding
9014
      for name, value in einfo.items(constants.INISECT_OSP):
9015
        if name not in self.op.osparams:
9016
          self.op.osparams[name] = value
9017

    
9018
  def _RevertToDefaults(self, cluster):
9019
    """Revert the instance parameters to the default values.
9020

9021
    """
9022
    # hvparams
9023
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
9024
    for name in self.op.hvparams.keys():
9025
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
9026
        del self.op.hvparams[name]
9027
    # beparams
9028
    be_defs = cluster.SimpleFillBE({})
9029
    for name in self.op.beparams.keys():
9030
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
9031
        del self.op.beparams[name]
9032
    # nic params
9033
    nic_defs = cluster.SimpleFillNIC({})
9034
    for nic in self.op.nics:
9035
      for name in constants.NICS_PARAMETERS:
9036
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
9037
          del nic[name]
9038
    # osparams
9039
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
9040
    for name in self.op.osparams.keys():
9041
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
9042
        del self.op.osparams[name]
9043

    
9044
  def _CalculateFileStorageDir(self):
9045
    """Calculate final instance file storage dir.
9046

9047
    """
9048
    # file storage dir calculation/check
9049
    self.instance_file_storage_dir = None
9050
    if self.op.disk_template in constants.DTS_FILEBASED:
9051
      # build the full file storage dir path
9052
      joinargs = []
9053

    
9054
      if self.op.disk_template == constants.DT_SHARED_FILE:
9055
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
9056
      else:
9057
        get_fsd_fn = self.cfg.GetFileStorageDir
9058

    
9059
      cfg_storagedir = get_fsd_fn()
9060
      if not cfg_storagedir:
9061
        raise errors.OpPrereqError("Cluster file storage dir not defined")
9062
      joinargs.append(cfg_storagedir)
9063

    
9064
      if self.op.file_storage_dir is not None:
9065
        joinargs.append(self.op.file_storage_dir)
9066

    
9067
      joinargs.append(self.op.instance_name)
9068

    
9069
      # pylint: disable=W0142
9070
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9071

    
9072
  def CheckPrereq(self):
9073
    """Check prerequisites.
9074

9075
    """
9076
    self._CalculateFileStorageDir()
9077

    
9078
    if self.op.mode == constants.INSTANCE_IMPORT:
9079
      export_info = self._ReadExportInfo()
9080
      self._ReadExportParams(export_info)
9081

    
9082
    if (not self.cfg.GetVGName() and
9083
        self.op.disk_template not in constants.DTS_NOT_LVM):
9084
      raise errors.OpPrereqError("Cluster does not support lvm-based"
9085
                                 " instances", errors.ECODE_STATE)
9086

    
9087
    if (self.op.hypervisor is None or
9088
        self.op.hypervisor == constants.VALUE_AUTO):
9089
      self.op.hypervisor = self.cfg.GetHypervisorType()
9090

    
9091
    cluster = self.cfg.GetClusterInfo()
9092
    enabled_hvs = cluster.enabled_hypervisors
9093
    if self.op.hypervisor not in enabled_hvs:
9094
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9095
                                 " cluster (%s)" % (self.op.hypervisor,
9096
                                  ",".join(enabled_hvs)),
9097
                                 errors.ECODE_STATE)
9098

    
9099
    # Check tag validity
9100
    for tag in self.op.tags:
9101
      objects.TaggableObject.ValidateTag(tag)
9102

    
9103
    # check hypervisor parameter syntax (locally)
9104
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9105
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9106
                                      self.op.hvparams)
9107
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9108
    hv_type.CheckParameterSyntax(filled_hvp)
9109
    self.hv_full = filled_hvp
9110
    # check that we don't specify global parameters on an instance
9111
    _CheckGlobalHvParams(self.op.hvparams)
9112

    
9113
    # fill and remember the beparams dict
9114
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
9115
    for param, value in self.op.beparams.iteritems():
9116
      if value == constants.VALUE_AUTO:
9117
        self.op.beparams[param] = default_beparams[param]
9118
    objects.UpgradeBeParams(self.op.beparams)
9119
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9120
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
9121

    
9122
    # build os parameters
9123
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9124

    
9125
    # now that hvp/bep are in final format, let's reset to defaults,
9126
    # if told to do so
9127
    if self.op.identify_defaults:
9128
      self._RevertToDefaults(cluster)
9129

    
9130
    # NIC buildup
9131
    self.nics = []
9132
    for idx, nic in enumerate(self.op.nics):
9133
      nic_mode_req = nic.get(constants.INIC_MODE, None)
9134
      nic_mode = nic_mode_req
9135
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9136
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9137

    
9138
      # in routed mode, for the first nic, the default ip is 'auto'
9139
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9140
        default_ip_mode = constants.VALUE_AUTO
9141
      else:
9142
        default_ip_mode = constants.VALUE_NONE
9143

    
9144
      # ip validity checks
9145
      ip = nic.get(constants.INIC_IP, default_ip_mode)
9146
      if ip is None or ip.lower() == constants.VALUE_NONE:
9147
        nic_ip = None
9148
      elif ip.lower() == constants.VALUE_AUTO:
9149
        if not self.op.name_check:
9150
          raise errors.OpPrereqError("IP address set to auto but name checks"
9151
                                     " have been skipped",
9152
                                     errors.ECODE_INVAL)
9153
        nic_ip = self.hostname1.ip
9154
      else:
9155
        if not netutils.IPAddress.IsValid(ip):
9156
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9157
                                     errors.ECODE_INVAL)
9158
        nic_ip = ip
9159

    
9160
      # TODO: check the ip address for uniqueness
9161
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9162
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
9163
                                   errors.ECODE_INVAL)
9164

    
9165
      # MAC address verification
9166
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9167
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9168
        mac = utils.NormalizeAndValidateMac(mac)
9169

    
9170
        try:
9171
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
9172
        except errors.ReservationError:
9173
          raise errors.OpPrereqError("MAC address %s already in use"
9174
                                     " in cluster" % mac,
9175
                                     errors.ECODE_NOTUNIQUE)
9176

    
9177
      #  Build nic parameters
9178
      link = nic.get(constants.INIC_LINK, None)
9179
      if link == constants.VALUE_AUTO:
9180
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9181
      nicparams = {}
9182
      if nic_mode_req:
9183
        nicparams[constants.NIC_MODE] = nic_mode
9184
      if link:
9185
        nicparams[constants.NIC_LINK] = link
9186

    
9187
      check_params = cluster.SimpleFillNIC(nicparams)
9188
      objects.NIC.CheckParameterSyntax(check_params)
9189
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9190

    
9191
    # disk checks/pre-build
9192
    default_vg = self.cfg.GetVGName()
9193
    self.disks = []
9194
    for disk in self.op.disks:
9195
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9196
      if mode not in constants.DISK_ACCESS_SET:
9197
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9198
                                   mode, errors.ECODE_INVAL)
9199
      size = disk.get(constants.IDISK_SIZE, None)
9200
      if size is None:
9201
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9202
      try:
9203
        size = int(size)
9204
      except (TypeError, ValueError):
9205
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9206
                                   errors.ECODE_INVAL)
9207

    
9208
      data_vg = disk.get(constants.IDISK_VG, default_vg)
9209
      new_disk = {
9210
        constants.IDISK_SIZE: size,
9211
        constants.IDISK_MODE: mode,
9212
        constants.IDISK_VG: data_vg,
9213
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
9214
        }
9215
      if constants.IDISK_ADOPT in disk:
9216
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9217
      self.disks.append(new_disk)
9218

    
9219
    if self.op.mode == constants.INSTANCE_IMPORT:
9220
      disk_images = []
9221
      for idx in range(len(self.disks)):
9222
        option = "disk%d_dump" % idx
9223
        if export_info.has_option(constants.INISECT_INS, option):
9224
          # FIXME: are the old os-es, disk sizes, etc. useful?
9225
          export_name = export_info.get(constants.INISECT_INS, option)
9226
          image = utils.PathJoin(self.op.src_path, export_name)
9227
          disk_images.append(image)
9228
        else:
9229
          disk_images.append(False)
9230

    
9231
      self.src_images = disk_images
9232

    
9233
      old_name = export_info.get(constants.INISECT_INS, "name")
9234
      if self.op.instance_name == old_name:
9235
        for idx, nic in enumerate(self.nics):
9236
          if nic.mac == constants.VALUE_AUTO:
9237
            nic_mac_ini = "nic%d_mac" % idx
9238
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9239

    
9240
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9241

    
9242
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
9243
    if self.op.ip_check:
9244
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9245
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
9246
                                   (self.check_ip, self.op.instance_name),
9247
                                   errors.ECODE_NOTUNIQUE)
9248

    
9249
    #### mac address generation
9250
    # By generating here the mac address both the allocator and the hooks get
9251
    # the real final mac address rather than the 'auto' or 'generate' value.
9252
    # There is a race condition between the generation and the instance object
9253
    # creation, which means that we know the mac is valid now, but we're not
9254
    # sure it will be when we actually add the instance. If things go bad
9255
    # adding the instance will abort because of a duplicate mac, and the
9256
    # creation job will fail.
9257
    for nic in self.nics:
9258
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9259
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9260

    
9261
    #### allocator run
9262

    
9263
    if self.op.iallocator is not None:
9264
      self._RunAllocator()
9265

    
9266
    # Release all unneeded node locks
9267
    _ReleaseLocks(self, locking.LEVEL_NODE,
9268
                  keep=filter(None, [self.op.pnode, self.op.snode,
9269
                                     self.op.src_node]))
9270

    
9271
    #### node related checks
9272

    
9273
    # check primary node
9274
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9275
    assert self.pnode is not None, \
9276
      "Cannot retrieve locked node %s" % self.op.pnode
9277
    if pnode.offline:
9278
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9279
                                 pnode.name, errors.ECODE_STATE)
9280
    if pnode.drained:
9281
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9282
                                 pnode.name, errors.ECODE_STATE)
9283
    if not pnode.vm_capable:
9284
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9285
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
9286

    
9287
    self.secondaries = []
9288

    
9289
    # mirror node verification
9290
    if self.op.disk_template in constants.DTS_INT_MIRROR:
9291
      if self.op.snode == pnode.name:
9292
        raise errors.OpPrereqError("The secondary node cannot be the"
9293
                                   " primary node", errors.ECODE_INVAL)
9294
      _CheckNodeOnline(self, self.op.snode)
9295
      _CheckNodeNotDrained(self, self.op.snode)
9296
      _CheckNodeVmCapable(self, self.op.snode)
9297
      self.secondaries.append(self.op.snode)
9298

    
9299
      snode = self.cfg.GetNodeInfo(self.op.snode)
9300
      if pnode.group != snode.group:
9301
        self.LogWarning("The primary and secondary nodes are in two"
9302
                        " different node groups; the disk parameters"
9303
                        " from the first disk's node group will be"
9304
                        " used")
9305

    
9306
    nodenames = [pnode.name] + self.secondaries
9307

    
9308
    # disk parameters (not customizable at instance or node level)
9309
    # just use the primary node parameters, ignoring the secondary.
9310
    self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9311

    
9312
    if not self.adopt_disks:
9313
      # Check lv size requirements, if not adopting
9314
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9315
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9316

    
9317
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9318
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9319
                                disk[constants.IDISK_ADOPT])
9320
                     for disk in self.disks])
9321
      if len(all_lvs) != len(self.disks):
9322
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
9323
                                   errors.ECODE_INVAL)
9324
      for lv_name in all_lvs:
9325
        try:
9326
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9327
          # to ReserveLV uses the same syntax
9328
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9329
        except errors.ReservationError:
9330
          raise errors.OpPrereqError("LV named %s used by another instance" %
9331
                                     lv_name, errors.ECODE_NOTUNIQUE)
9332

    
9333
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9334
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9335

    
9336
      node_lvs = self.rpc.call_lv_list([pnode.name],
9337
                                       vg_names.payload.keys())[pnode.name]
9338
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9339
      node_lvs = node_lvs.payload
9340

    
9341
      delta = all_lvs.difference(node_lvs.keys())
9342
      if delta:
9343
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
9344
                                   utils.CommaJoin(delta),
9345
                                   errors.ECODE_INVAL)
9346
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9347
      if online_lvs:
9348
        raise errors.OpPrereqError("Online logical volumes found, cannot"
9349
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
9350
                                   errors.ECODE_STATE)
9351
      # update the size of disk based on what is found
9352
      for dsk in self.disks:
9353
        dsk[constants.IDISK_SIZE] = \
9354
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9355
                                        dsk[constants.IDISK_ADOPT])][0]))
9356

    
9357
    elif self.op.disk_template == constants.DT_BLOCK:
9358
      # Normalize and de-duplicate device paths
9359
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9360
                       for disk in self.disks])
9361
      if len(all_disks) != len(self.disks):
9362
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9363
                                   errors.ECODE_INVAL)
9364
      baddisks = [d for d in all_disks
9365
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9366
      if baddisks:
9367
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9368
                                   " cannot be adopted" %
9369
                                   (", ".join(baddisks),
9370
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9371
                                   errors.ECODE_INVAL)
9372

    
9373
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9374
                                            list(all_disks))[pnode.name]
9375
      node_disks.Raise("Cannot get block device information from node %s" %
9376
                       pnode.name)
9377
      node_disks = node_disks.payload
9378
      delta = all_disks.difference(node_disks.keys())
9379
      if delta:
9380
        raise errors.OpPrereqError("Missing block device(s): %s" %
9381
                                   utils.CommaJoin(delta),
9382
                                   errors.ECODE_INVAL)
9383
      for dsk in self.disks:
9384
        dsk[constants.IDISK_SIZE] = \
9385
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9386

    
9387
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9388

    
9389
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9390
    # check OS parameters (remotely)
9391
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9392

    
9393
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9394

    
9395
    # memory check on primary node
9396
    #TODO(dynmem): use MINMEM for checking
9397
    if self.op.start:
9398
      _CheckNodeFreeMemory(self, self.pnode.name,
9399
                           "creating instance %s" % self.op.instance_name,
9400
                           self.be_full[constants.BE_MAXMEM],
9401
                           self.op.hypervisor)
9402

    
9403
    self.dry_run_result = list(nodenames)
9404

    
9405
  def Exec(self, feedback_fn):
9406
    """Create and add the instance to the cluster.
9407

9408
    """
9409
    instance = self.op.instance_name
9410
    pnode_name = self.pnode.name
9411

    
9412
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9413
                self.owned_locks(locking.LEVEL_NODE)), \
9414
      "Node locks differ from node resource locks"
9415

    
9416
    ht_kind = self.op.hypervisor
9417
    if ht_kind in constants.HTS_REQ_PORT:
9418
      network_port = self.cfg.AllocatePort()
9419
    else:
9420
      network_port = None
9421

    
9422
    disks = _GenerateDiskTemplate(self,
9423
                                  self.op.disk_template,
9424
                                  instance, pnode_name,
9425
                                  self.secondaries,
9426
                                  self.disks,
9427
                                  self.instance_file_storage_dir,
9428
                                  self.op.file_driver,
9429
                                  0,
9430
                                  feedback_fn,
9431
                                  self.diskparams)
9432

    
9433
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9434
                            primary_node=pnode_name,
9435
                            nics=self.nics, disks=disks,
9436
                            disk_template=self.op.disk_template,
9437
                            admin_state=constants.ADMINST_DOWN,
9438
                            network_port=network_port,
9439
                            beparams=self.op.beparams,
9440
                            hvparams=self.op.hvparams,
9441
                            hypervisor=self.op.hypervisor,
9442
                            osparams=self.op.osparams,
9443
                            )
9444

    
9445
    if self.op.tags:
9446
      for tag in self.op.tags:
9447
        iobj.AddTag(tag)
9448

    
9449
    if self.adopt_disks:
9450
      if self.op.disk_template == constants.DT_PLAIN:
9451
        # rename LVs to the newly-generated names; we need to construct
9452
        # 'fake' LV disks with the old data, plus the new unique_id
9453
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9454
        rename_to = []
9455
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9456
          rename_to.append(t_dsk.logical_id)
9457
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9458
          self.cfg.SetDiskID(t_dsk, pnode_name)
9459
        result = self.rpc.call_blockdev_rename(pnode_name,
9460
                                               zip(tmp_disks, rename_to))
9461
        result.Raise("Failed to rename adoped LVs")
9462
    else:
9463
      feedback_fn("* creating instance disks...")
9464
      try:
9465
        _CreateDisks(self, iobj)
9466
      except errors.OpExecError:
9467
        self.LogWarning("Device creation failed, reverting...")
9468
        try:
9469
          _RemoveDisks(self, iobj)
9470
        finally:
9471
          self.cfg.ReleaseDRBDMinors(instance)
9472
          raise
9473

    
9474
    feedback_fn("adding instance %s to cluster config" % instance)
9475

    
9476
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9477

    
9478
    # Declare that we don't want to remove the instance lock anymore, as we've
9479
    # added the instance to the config
9480
    del self.remove_locks[locking.LEVEL_INSTANCE]
9481

    
9482
    if self.op.mode == constants.INSTANCE_IMPORT:
9483
      # Release unused nodes
9484
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9485
    else:
9486
      # Release all nodes
9487
      _ReleaseLocks(self, locking.LEVEL_NODE)
9488

    
9489
    disk_abort = False
9490
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9491
      feedback_fn("* wiping instance disks...")
9492
      try:
9493
        _WipeDisks(self, iobj)
9494
      except errors.OpExecError, err:
9495
        logging.exception("Wiping disks failed")
9496
        self.LogWarning("Wiping instance disks failed (%s)", err)
9497
        disk_abort = True
9498

    
9499
    if disk_abort:
9500
      # Something is already wrong with the disks, don't do anything else
9501
      pass
9502
    elif self.op.wait_for_sync:
9503
      disk_abort = not _WaitForSync(self, iobj)
9504
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9505
      # make sure the disks are not degraded (still sync-ing is ok)
9506
      feedback_fn("* checking mirrors status")
9507
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9508
    else:
9509
      disk_abort = False
9510

    
9511
    if disk_abort:
9512
      _RemoveDisks(self, iobj)
9513
      self.cfg.RemoveInstance(iobj.name)
9514
      # Make sure the instance lock gets removed
9515
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9516
      raise errors.OpExecError("There are some degraded disks for"
9517
                               " this instance")
9518

    
9519
    # Release all node resource locks
9520
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9521

    
9522
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9523
      if self.op.mode == constants.INSTANCE_CREATE:
9524
        if not self.op.no_install:
9525
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9526
                        not self.op.wait_for_sync)
9527
          if pause_sync:
9528
            feedback_fn("* pausing disk sync to install instance OS")
9529
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9530
                                                              iobj.disks, True)
9531
            for idx, success in enumerate(result.payload):
9532
              if not success:
9533
                logging.warn("pause-sync of instance %s for disk %d failed",
9534
                             instance, idx)
9535

    
9536
          feedback_fn("* running the instance OS create scripts...")
9537
          # FIXME: pass debug option from opcode to backend
9538
          os_add_result = \
9539
            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9540
                                          self.op.debug_level)
9541
          if pause_sync:
9542
            feedback_fn("* resuming disk sync")
9543
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9544
                                                              iobj.disks, False)
9545
            for idx, success in enumerate(result.payload):
9546
              if not success:
9547
                logging.warn("resume-sync of instance %s for disk %d failed",
9548
                             instance, idx)
9549

    
9550
          os_add_result.Raise("Could not add os for instance %s"
9551
                              " on node %s" % (instance, pnode_name))
9552

    
9553
      elif self.op.mode == constants.INSTANCE_IMPORT:
9554
        feedback_fn("* running the instance OS import scripts...")
9555

    
9556
        transfers = []
9557

    
9558
        for idx, image in enumerate(self.src_images):
9559
          if not image:
9560
            continue
9561

    
9562
          # FIXME: pass debug option from opcode to backend
9563
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9564
                                             constants.IEIO_FILE, (image, ),
9565
                                             constants.IEIO_SCRIPT,
9566
                                             (iobj.disks[idx], idx),
9567
                                             None)
9568
          transfers.append(dt)
9569

    
9570
        import_result = \
9571
          masterd.instance.TransferInstanceData(self, feedback_fn,
9572
                                                self.op.src_node, pnode_name,
9573
                                                self.pnode.secondary_ip,
9574
                                                iobj, transfers)
9575
        if not compat.all(import_result):
9576
          self.LogWarning("Some disks for instance %s on node %s were not"
9577
                          " imported successfully" % (instance, pnode_name))
9578

    
9579
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9580
        feedback_fn("* preparing remote import...")
9581
        # The source cluster will stop the instance before attempting to make a
9582
        # connection. In some cases stopping an instance can take a long time,
9583
        # hence the shutdown timeout is added to the connection timeout.
9584
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9585
                           self.op.source_shutdown_timeout)
9586
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9587

    
9588
        assert iobj.primary_node == self.pnode.name
9589
        disk_results = \
9590
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9591
                                        self.source_x509_ca,
9592
                                        self._cds, timeouts)
9593
        if not compat.all(disk_results):
9594
          # TODO: Should the instance still be started, even if some disks
9595
          # failed to import (valid for local imports, too)?
9596
          self.LogWarning("Some disks for instance %s on node %s were not"
9597
                          " imported successfully" % (instance, pnode_name))
9598

    
9599
        # Run rename script on newly imported instance
9600
        assert iobj.name == instance
9601
        feedback_fn("Running rename script for %s" % instance)
9602
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9603
                                                   self.source_instance_name,
9604
                                                   self.op.debug_level)
9605
        if result.fail_msg:
9606
          self.LogWarning("Failed to run rename script for %s on node"
9607
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9608

    
9609
      else:
9610
        # also checked in the prereq part
9611
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9612
                                     % self.op.mode)
9613

    
9614
    assert not self.owned_locks(locking.LEVEL_NODE_RES)
9615

    
9616
    if self.op.start:
9617
      iobj.admin_state = constants.ADMINST_UP
9618
      self.cfg.Update(iobj, feedback_fn)
9619
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9620
      feedback_fn("* starting instance...")
9621
      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9622
                                            False)
9623
      result.Raise("Could not start instance")
9624

    
9625
    return list(iobj.all_nodes)
9626

    
9627

    
9628
class LUInstanceConsole(NoHooksLU):
9629
  """Connect to an instance's console.
9630

9631
  This is somewhat special in that it returns the command line that
9632
  you need to run on the master node in order to connect to the
9633
  console.
9634

9635
  """
9636
  REQ_BGL = False
9637

    
9638
  def ExpandNames(self):
9639
    self.share_locks = _ShareAll()
9640
    self._ExpandAndLockInstance()
9641

    
9642
  def CheckPrereq(self):
9643
    """Check prerequisites.
9644

9645
    This checks that the instance is in the cluster.
9646

9647
    """
9648
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9649
    assert self.instance is not None, \
9650
      "Cannot retrieve locked instance %s" % self.op.instance_name
9651
    _CheckNodeOnline(self, self.instance.primary_node)
9652

    
9653
  def Exec(self, feedback_fn):
9654
    """Connect to the console of an instance
9655

9656
    """
9657
    instance = self.instance
9658
    node = instance.primary_node
9659

    
9660
    node_insts = self.rpc.call_instance_list([node],
9661
                                             [instance.hypervisor])[node]
9662
    node_insts.Raise("Can't get node information from %s" % node)
9663

    
9664
    if instance.name not in node_insts.payload:
9665
      if instance.admin_state == constants.ADMINST_UP:
9666
        state = constants.INSTST_ERRORDOWN
9667
      elif instance.admin_state == constants.ADMINST_DOWN:
9668
        state = constants.INSTST_ADMINDOWN
9669
      else:
9670
        state = constants.INSTST_ADMINOFFLINE
9671
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9672
                               (instance.name, state))
9673

    
9674
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9675

    
9676
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9677

    
9678

    
9679
def _GetInstanceConsole(cluster, instance):
9680
  """Returns console information for an instance.
9681

9682
  @type cluster: L{objects.Cluster}
9683
  @type instance: L{objects.Instance}
9684
  @rtype: dict
9685

9686
  """
9687
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9688
  # beparams and hvparams are passed separately, to avoid editing the
9689
  # instance and then saving the defaults in the instance itself.
9690
  hvparams = cluster.FillHV(instance)
9691
  beparams = cluster.FillBE(instance)
9692
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9693

    
9694
  assert console.instance == instance.name
9695
  assert console.Validate()
9696

    
9697
  return console.ToDict()
9698

    
9699

    
9700
class LUInstanceReplaceDisks(LogicalUnit):
9701
  """Replace the disks of an instance.
9702

9703
  """
9704
  HPATH = "mirrors-replace"
9705
  HTYPE = constants.HTYPE_INSTANCE
9706
  REQ_BGL = False
9707

    
9708
  def CheckArguments(self):
9709
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9710
                                  self.op.iallocator)
9711

    
9712
  def ExpandNames(self):
9713
    self._ExpandAndLockInstance()
9714

    
9715
    assert locking.LEVEL_NODE not in self.needed_locks
9716
    assert locking.LEVEL_NODE_RES not in self.needed_locks
9717
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9718

    
9719
    assert self.op.iallocator is None or self.op.remote_node is None, \
9720
      "Conflicting options"
9721

    
9722
    if self.op.remote_node is not None:
9723
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9724

    
9725
      # Warning: do not remove the locking of the new secondary here
9726
      # unless DRBD8.AddChildren is changed to work in parallel;
9727
      # currently it doesn't since parallel invocations of
9728
      # FindUnusedMinor will conflict
9729
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9730
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9731
    else:
9732
      self.needed_locks[locking.LEVEL_NODE] = []
9733
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9734

    
9735
      if self.op.iallocator is not None:
9736
        # iallocator will select a new node in the same group
9737
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9738

    
9739
    self.needed_locks[locking.LEVEL_NODE_RES] = []
9740

    
9741
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9742
                                   self.op.iallocator, self.op.remote_node,
9743
                                   self.op.disks, False, self.op.early_release)
9744

    
9745
    self.tasklets = [self.replacer]
9746

    
9747
  def DeclareLocks(self, level):
9748
    if level == locking.LEVEL_NODEGROUP:
9749
      assert self.op.remote_node is None
9750
      assert self.op.iallocator is not None
9751
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9752

    
9753
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9754
      # Lock all groups used by instance optimistically; this requires going
9755
      # via the node before it's locked, requiring verification later on
9756
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9757
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9758

    
9759
    elif level == locking.LEVEL_NODE:
9760
      if self.op.iallocator is not None:
9761
        assert self.op.remote_node is None
9762
        assert not self.needed_locks[locking.LEVEL_NODE]
9763

    
9764
        # Lock member nodes of all locked groups
9765
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9766
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9767
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9768
      else:
9769
        self._LockInstancesNodes()
9770
    elif level == locking.LEVEL_NODE_RES:
9771
      # Reuse node locks
9772
      self.needed_locks[locking.LEVEL_NODE_RES] = \
9773
        self.needed_locks[locking.LEVEL_NODE]
9774

    
9775
  def BuildHooksEnv(self):
9776
    """Build hooks env.
9777

9778
    This runs on the master, the primary and all the secondaries.
9779

9780
    """
9781
    instance = self.replacer.instance
9782
    env = {
9783
      "MODE": self.op.mode,
9784
      "NEW_SECONDARY": self.op.remote_node,
9785
      "OLD_SECONDARY": instance.secondary_nodes[0],
9786
      }
9787
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9788
    return env
9789

    
9790
  def BuildHooksNodes(self):
9791
    """Build hooks nodes.
9792

9793
    """
9794
    instance = self.replacer.instance
9795
    nl = [
9796
      self.cfg.GetMasterNode(),
9797
      instance.primary_node,
9798
      ]
9799
    if self.op.remote_node is not None:
9800
      nl.append(self.op.remote_node)
9801
    return nl, nl
9802

    
9803
  def CheckPrereq(self):
9804
    """Check prerequisites.
9805

9806
    """
9807
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9808
            self.op.iallocator is None)
9809

    
9810
    # Verify if node group locks are still correct
9811
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9812
    if owned_groups:
9813
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9814

    
9815
    return LogicalUnit.CheckPrereq(self)
9816

    
9817

    
9818
class TLReplaceDisks(Tasklet):
9819
  """Replaces disks for an instance.
9820

9821
  Note: Locking is not within the scope of this class.
9822

9823
  """
9824
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9825
               disks, delay_iallocator, early_release):
9826
    """Initializes this class.
9827

9828
    """
9829
    Tasklet.__init__(self, lu)
9830

    
9831
    # Parameters
9832
    self.instance_name = instance_name
9833
    self.mode = mode
9834
    self.iallocator_name = iallocator_name
9835
    self.remote_node = remote_node
9836
    self.disks = disks
9837
    self.delay_iallocator = delay_iallocator
9838
    self.early_release = early_release
9839

    
9840
    # Runtime data
9841
    self.instance = None
9842
    self.new_node = None
9843
    self.target_node = None
9844
    self.other_node = None
9845
    self.remote_node_info = None
9846
    self.node_secondary_ip = None
9847

    
9848
  @staticmethod
9849
  def CheckArguments(mode, remote_node, iallocator):
9850
    """Helper function for users of this class.
9851

9852
    """
9853
    # check for valid parameter combination
9854
    if mode == constants.REPLACE_DISK_CHG:
9855
      if remote_node is None and iallocator is None:
9856
        raise errors.OpPrereqError("When changing the secondary either an"
9857
                                   " iallocator script must be used or the"
9858
                                   " new node given", errors.ECODE_INVAL)
9859

    
9860
      if remote_node is not None and iallocator is not None:
9861
        raise errors.OpPrereqError("Give either the iallocator or the new"
9862
                                   " secondary, not both", errors.ECODE_INVAL)
9863

    
9864
    elif remote_node is not None or iallocator is not None:
9865
      # Not replacing the secondary
9866
      raise errors.OpPrereqError("The iallocator and new node options can"
9867
                                 " only be used when changing the"
9868
                                 " secondary node", errors.ECODE_INVAL)
9869

    
9870
  @staticmethod
9871
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9872
    """Compute a new secondary node using an IAllocator.
9873

9874
    """
9875
    ial = IAllocator(lu.cfg, lu.rpc,
9876
                     mode=constants.IALLOCATOR_MODE_RELOC,
9877
                     name=instance_name,
9878
                     relocate_from=list(relocate_from))
9879

    
9880
    ial.Run(iallocator_name)
9881

    
9882
    if not ial.success:
9883
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9884
                                 " %s" % (iallocator_name, ial.info),
9885
                                 errors.ECODE_NORES)
9886

    
9887
    if len(ial.result) != ial.required_nodes:
9888
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9889
                                 " of nodes (%s), required %s" %
9890
                                 (iallocator_name,
9891
                                  len(ial.result), ial.required_nodes),
9892
                                 errors.ECODE_FAULT)
9893

    
9894
    remote_node_name = ial.result[0]
9895

    
9896
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9897
               instance_name, remote_node_name)
9898

    
9899
    return remote_node_name
9900

    
9901
  def _FindFaultyDisks(self, node_name):
9902
    """Wrapper for L{_FindFaultyInstanceDisks}.
9903

9904
    """
9905
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9906
                                    node_name, True)
9907

    
9908
  def _CheckDisksActivated(self, instance):
9909
    """Checks if the instance disks are activated.
9910

9911
    @param instance: The instance to check disks
9912
    @return: True if they are activated, False otherwise
9913

9914
    """
9915
    nodes = instance.all_nodes
9916

    
9917
    for idx, dev in enumerate(instance.disks):
9918
      for node in nodes:
9919
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9920
        self.cfg.SetDiskID(dev, node)
9921

    
9922
        result = self.rpc.call_blockdev_find(node, dev)
9923

    
9924
        if result.offline:
9925
          continue
9926
        elif result.fail_msg or not result.payload:
9927
          return False
9928

    
9929
    return True
9930

    
9931
  def CheckPrereq(self):
9932
    """Check prerequisites.
9933

9934
    This checks that the instance is in the cluster.
9935

9936
    """
9937
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9938
    assert instance is not None, \
9939
      "Cannot retrieve locked instance %s" % self.instance_name
9940

    
9941
    if instance.disk_template != constants.DT_DRBD8:
9942
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9943
                                 " instances", errors.ECODE_INVAL)
9944

    
9945
    if len(instance.secondary_nodes) != 1:
9946
      raise errors.OpPrereqError("The instance has a strange layout,"
9947
                                 " expected one secondary but found %d" %
9948
                                 len(instance.secondary_nodes),
9949
                                 errors.ECODE_FAULT)
9950

    
9951
    if not self.delay_iallocator:
9952
      self._CheckPrereq2()
9953

    
9954
  def _CheckPrereq2(self):
9955
    """Check prerequisites, second part.
9956

9957
    This function should always be part of CheckPrereq. It was separated and is
9958
    now called from Exec because during node evacuation iallocator was only
9959
    called with an unmodified cluster model, not taking planned changes into
9960
    account.
9961

9962
    """
9963
    instance = self.instance
9964
    secondary_node = instance.secondary_nodes[0]
9965

    
9966
    if self.iallocator_name is None:
9967
      remote_node = self.remote_node
9968
    else:
9969
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9970
                                       instance.name, instance.secondary_nodes)
9971

    
9972
    if remote_node is None:
9973
      self.remote_node_info = None
9974
    else:
9975
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9976
             "Remote node '%s' is not locked" % remote_node
9977

    
9978
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9979
      assert self.remote_node_info is not None, \
9980
        "Cannot retrieve locked node %s" % remote_node
9981

    
9982
    if remote_node == self.instance.primary_node:
9983
      raise errors.OpPrereqError("The specified node is the primary node of"
9984
                                 " the instance", errors.ECODE_INVAL)
9985

    
9986
    if remote_node == secondary_node:
9987
      raise errors.OpPrereqError("The specified node is already the"
9988
                                 " secondary node of the instance",
9989
                                 errors.ECODE_INVAL)
9990

    
9991
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9992
                                    constants.REPLACE_DISK_CHG):
9993
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9994
                                 errors.ECODE_INVAL)
9995

    
9996
    if self.mode == constants.REPLACE_DISK_AUTO:
9997
      if not self._CheckDisksActivated(instance):
9998
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9999
                                   " first" % self.instance_name,
10000
                                   errors.ECODE_STATE)
10001
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
10002
      faulty_secondary = self._FindFaultyDisks(secondary_node)
10003

    
10004
      if faulty_primary and faulty_secondary:
10005
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
10006
                                   " one node and can not be repaired"
10007
                                   " automatically" % self.instance_name,
10008
                                   errors.ECODE_STATE)
10009

    
10010
      if faulty_primary:
10011
        self.disks = faulty_primary
10012
        self.target_node = instance.primary_node
10013
        self.other_node = secondary_node
10014
        check_nodes = [self.target_node, self.other_node]
10015
      elif faulty_secondary:
10016
        self.disks = faulty_secondary
10017
        self.target_node = secondary_node
10018
        self.other_node = instance.primary_node
10019
        check_nodes = [self.target_node, self.other_node]
10020
      else:
10021
        self.disks = []
10022
        check_nodes = []
10023

    
10024
    else:
10025
      # Non-automatic modes
10026
      if self.mode == constants.REPLACE_DISK_PRI:
10027
        self.target_node = instance.primary_node
10028
        self.other_node = secondary_node
10029
        check_nodes = [self.target_node, self.other_node]
10030

    
10031
      elif self.mode == constants.REPLACE_DISK_SEC:
10032
        self.target_node = secondary_node
10033
        self.other_node = instance.primary_node
10034
        check_nodes = [self.target_node, self.other_node]
10035

    
10036
      elif self.mode == constants.REPLACE_DISK_CHG:
10037
        self.new_node = remote_node
10038
        self.other_node = instance.primary_node
10039
        self.target_node = secondary_node
10040
        check_nodes = [self.new_node, self.other_node]
10041

    
10042
        _CheckNodeNotDrained(self.lu, remote_node)
10043
        _CheckNodeVmCapable(self.lu, remote_node)
10044

    
10045
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
10046
        assert old_node_info is not None
10047
        if old_node_info.offline and not self.early_release:
10048
          # doesn't make sense to delay the release
10049
          self.early_release = True
10050
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10051
                          " early-release mode", secondary_node)
10052

    
10053
      else:
10054
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10055
                                     self.mode)
10056

    
10057
      # If not specified all disks should be replaced
10058
      if not self.disks:
10059
        self.disks = range(len(self.instance.disks))
10060

    
10061
    # TODO: compute disk parameters
10062
    primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10063
    secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10064
    if primary_node_info.group != secondary_node_info.group:
10065
      self.lu.LogInfo("The instance primary and secondary nodes are in two"
10066
                      " different node groups; the disk parameters of the"
10067
                      " primary node's group will be applied.")
10068

    
10069
    self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10070

    
10071
    for node in check_nodes:
10072
      _CheckNodeOnline(self.lu, node)
10073

    
10074
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
10075
                                                          self.other_node,
10076
                                                          self.target_node]
10077
                              if node_name is not None)
10078

    
10079
    # Release unneeded node and node resource locks
10080
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10081
    _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10082

    
10083
    # Release any owned node group
10084
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10085
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10086

    
10087
    # Check whether disks are valid
10088
    for disk_idx in self.disks:
10089
      instance.FindDisk(disk_idx)
10090

    
10091
    # Get secondary node IP addresses
10092
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10093
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
10094

    
10095
  def Exec(self, feedback_fn):
10096
    """Execute disk replacement.
10097

10098
    This dispatches the disk replacement to the appropriate handler.
10099

10100
    """
10101
    if self.delay_iallocator:
10102
      self._CheckPrereq2()
10103

    
10104
    if __debug__:
10105
      # Verify owned locks before starting operation
10106
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10107
      assert set(owned_nodes) == set(self.node_secondary_ip), \
10108
          ("Incorrect node locks, owning %s, expected %s" %
10109
           (owned_nodes, self.node_secondary_ip.keys()))
10110
      assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10111
              self.lu.owned_locks(locking.LEVEL_NODE_RES))
10112

    
10113
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10114
      assert list(owned_instances) == [self.instance_name], \
10115
          "Instance '%s' not locked" % self.instance_name
10116

    
10117
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10118
          "Should not own any node group lock at this point"
10119

    
10120
    if not self.disks:
10121
      feedback_fn("No disks need replacement")
10122
      return
10123

    
10124
    feedback_fn("Replacing disk(s) %s for %s" %
10125
                (utils.CommaJoin(self.disks), self.instance.name))
10126

    
10127
    activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10128

    
10129
    # Activate the instance disks if we're replacing them on a down instance
10130
    if activate_disks:
10131
      _StartInstanceDisks(self.lu, self.instance, True)
10132

    
10133
    try:
10134
      # Should we replace the secondary node?
10135
      if self.new_node is not None:
10136
        fn = self._ExecDrbd8Secondary
10137
      else:
10138
        fn = self._ExecDrbd8DiskOnly
10139

    
10140
      result = fn(feedback_fn)
10141
    finally:
10142
      # Deactivate the instance disks if we're replacing them on a
10143
      # down instance
10144
      if activate_disks:
10145
        _SafeShutdownInstanceDisks(self.lu, self.instance)
10146

    
10147
    assert not self.lu.owned_locks(locking.LEVEL_NODE)
10148

    
10149
    if __debug__:
10150
      # Verify owned locks
10151
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10152
      nodes = frozenset(self.node_secondary_ip)
10153
      assert ((self.early_release and not owned_nodes) or
10154
              (not self.early_release and not (set(owned_nodes) - nodes))), \
10155
        ("Not owning the correct locks, early_release=%s, owned=%r,"
10156
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
10157

    
10158
    return result
10159

    
10160
  def _CheckVolumeGroup(self, nodes):
10161
    self.lu.LogInfo("Checking volume groups")
10162

    
10163
    vgname = self.cfg.GetVGName()
10164

    
10165
    # Make sure volume group exists on all involved nodes
10166
    results = self.rpc.call_vg_list(nodes)
10167
    if not results:
10168
      raise errors.OpExecError("Can't list volume groups on the nodes")
10169

    
10170
    for node in nodes:
10171
      res = results[node]
10172
      res.Raise("Error checking node %s" % node)
10173
      if vgname not in res.payload:
10174
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
10175
                                 (vgname, node))
10176

    
10177
  def _CheckDisksExistence(self, nodes):
10178
    # Check disk existence
10179
    for idx, dev in enumerate(self.instance.disks):
10180
      if idx not in self.disks:
10181
        continue
10182

    
10183
      for node in nodes:
10184
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10185
        self.cfg.SetDiskID(dev, node)
10186

    
10187
        result = self.rpc.call_blockdev_find(node, dev)
10188

    
10189
        msg = result.fail_msg
10190
        if msg or not result.payload:
10191
          if not msg:
10192
            msg = "disk not found"
10193
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10194
                                   (idx, node, msg))
10195

    
10196
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10197
    for idx, dev in enumerate(self.instance.disks):
10198
      if idx not in self.disks:
10199
        continue
10200

    
10201
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10202
                      (idx, node_name))
10203

    
10204
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10205
                                   ldisk=ldisk):
10206
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10207
                                 " replace disks for instance %s" %
10208
                                 (node_name, self.instance.name))
10209

    
10210
  def _CreateNewStorage(self, node_name):
10211
    """Create new storage on the primary or secondary node.
10212

10213
    This is only used for same-node replaces, not for changing the
10214
    secondary node, hence we don't want to modify the existing disk.
10215

10216
    """
10217
    iv_names = {}
10218

    
10219
    for idx, dev in enumerate(self.instance.disks):
10220
      if idx not in self.disks:
10221
        continue
10222

    
10223
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10224

    
10225
      self.cfg.SetDiskID(dev, node_name)
10226

    
10227
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10228
      names = _GenerateUniqueNames(self.lu, lv_names)
10229

    
10230
      _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10231

    
10232
      vg_data = dev.children[0].logical_id[0]
10233
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10234
                             logical_id=(vg_data, names[0]), params=data_p)
10235
      vg_meta = dev.children[1].logical_id[0]
10236
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10237
                             logical_id=(vg_meta, names[1]), params=meta_p)
10238

    
10239
      new_lvs = [lv_data, lv_meta]
10240
      old_lvs = [child.Copy() for child in dev.children]
10241
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10242

    
10243
      # we pass force_create=True to force the LVM creation
10244
      for new_lv in new_lvs:
10245
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10246
                        _GetInstanceInfoText(self.instance), False)
10247

    
10248
    return iv_names
10249

    
10250
  def _CheckDevices(self, node_name, iv_names):
10251
    for name, (dev, _, _) in iv_names.iteritems():
10252
      self.cfg.SetDiskID(dev, node_name)
10253

    
10254
      result = self.rpc.call_blockdev_find(node_name, dev)
10255

    
10256
      msg = result.fail_msg
10257
      if msg or not result.payload:
10258
        if not msg:
10259
          msg = "disk not found"
10260
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
10261
                                 (name, msg))
10262

    
10263
      if result.payload.is_degraded:
10264
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
10265

    
10266
  def _RemoveOldStorage(self, node_name, iv_names):
10267
    for name, (_, old_lvs, _) in iv_names.iteritems():
10268
      self.lu.LogInfo("Remove logical volumes for %s" % name)
10269

    
10270
      for lv in old_lvs:
10271
        self.cfg.SetDiskID(lv, node_name)
10272

    
10273
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10274
        if msg:
10275
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
10276
                             hint="remove unused LVs manually")
10277

    
10278
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10279
    """Replace a disk on the primary or secondary for DRBD 8.
10280

10281
    The algorithm for replace is quite complicated:
10282

10283
      1. for each disk to be replaced:
10284

10285
        1. create new LVs on the target node with unique names
10286
        1. detach old LVs from the drbd device
10287
        1. rename old LVs to name_replaced.<time_t>
10288
        1. rename new LVs to old LVs
10289
        1. attach the new LVs (with the old names now) to the drbd device
10290

10291
      1. wait for sync across all devices
10292

10293
      1. for each modified disk:
10294

10295
        1. remove old LVs (which have the name name_replaces.<time_t>)
10296

10297
    Failures are not very well handled.
10298

10299
    """
10300
    steps_total = 6
10301

    
10302
    # Step: check device activation
10303
    self.lu.LogStep(1, steps_total, "Check device existence")
10304
    self._CheckDisksExistence([self.other_node, self.target_node])
10305
    self._CheckVolumeGroup([self.target_node, self.other_node])
10306

    
10307
    # Step: check other node consistency
10308
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10309
    self._CheckDisksConsistency(self.other_node,
10310
                                self.other_node == self.instance.primary_node,
10311
                                False)
10312

    
10313
    # Step: create new storage
10314
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10315
    iv_names = self._CreateNewStorage(self.target_node)
10316

    
10317
    # Step: for each lv, detach+rename*2+attach
10318
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10319
    for dev, old_lvs, new_lvs in iv_names.itervalues():
10320
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10321

    
10322
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10323
                                                     old_lvs)
10324
      result.Raise("Can't detach drbd from local storage on node"
10325
                   " %s for device %s" % (self.target_node, dev.iv_name))
10326
      #dev.children = []
10327
      #cfg.Update(instance)
10328

    
10329
      # ok, we created the new LVs, so now we know we have the needed
10330
      # storage; as such, we proceed on the target node to rename
10331
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10332
      # using the assumption that logical_id == physical_id (which in
10333
      # turn is the unique_id on that node)
10334

    
10335
      # FIXME(iustin): use a better name for the replaced LVs
10336
      temp_suffix = int(time.time())
10337
      ren_fn = lambda d, suff: (d.physical_id[0],
10338
                                d.physical_id[1] + "_replaced-%s" % suff)
10339

    
10340
      # Build the rename list based on what LVs exist on the node
10341
      rename_old_to_new = []
10342
      for to_ren in old_lvs:
10343
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10344
        if not result.fail_msg and result.payload:
10345
          # device exists
10346
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10347

    
10348
      self.lu.LogInfo("Renaming the old LVs on the target node")
10349
      result = self.rpc.call_blockdev_rename(self.target_node,
10350
                                             rename_old_to_new)
10351
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
10352

    
10353
      # Now we rename the new LVs to the old LVs
10354
      self.lu.LogInfo("Renaming the new LVs on the target node")
10355
      rename_new_to_old = [(new, old.physical_id)
10356
                           for old, new in zip(old_lvs, new_lvs)]
10357
      result = self.rpc.call_blockdev_rename(self.target_node,
10358
                                             rename_new_to_old)
10359
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
10360

    
10361
      # Intermediate steps of in memory modifications
10362
      for old, new in zip(old_lvs, new_lvs):
10363
        new.logical_id = old.logical_id
10364
        self.cfg.SetDiskID(new, self.target_node)
10365

    
10366
      # We need to modify old_lvs so that removal later removes the
10367
      # right LVs, not the newly added ones; note that old_lvs is a
10368
      # copy here
10369
      for disk in old_lvs:
10370
        disk.logical_id = ren_fn(disk, temp_suffix)
10371
        self.cfg.SetDiskID(disk, self.target_node)
10372

    
10373
      # Now that the new lvs have the old name, we can add them to the device
10374
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10375
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10376
                                                  new_lvs)
10377
      msg = result.fail_msg
10378
      if msg:
10379
        for new_lv in new_lvs:
10380
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
10381
                                               new_lv).fail_msg
10382
          if msg2:
10383
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10384
                               hint=("cleanup manually the unused logical"
10385
                                     "volumes"))
10386
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10387

    
10388
    cstep = itertools.count(5)
10389

    
10390
    if self.early_release:
10391
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10392
      self._RemoveOldStorage(self.target_node, iv_names)
10393
      # TODO: Check if releasing locks early still makes sense
10394
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10395
    else:
10396
      # Release all resource locks except those used by the instance
10397
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10398
                    keep=self.node_secondary_ip.keys())
10399

    
10400
    # Release all node locks while waiting for sync
10401
    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10402

    
10403
    # TODO: Can the instance lock be downgraded here? Take the optional disk
10404
    # shutdown in the caller into consideration.
10405

    
10406
    # Wait for sync
10407
    # This can fail as the old devices are degraded and _WaitForSync
10408
    # does a combined result over all disks, so we don't check its return value
10409
    self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10410
    _WaitForSync(self.lu, self.instance)
10411

    
10412
    # Check all devices manually
10413
    self._CheckDevices(self.instance.primary_node, iv_names)
10414

    
10415
    # Step: remove old storage
10416
    if not self.early_release:
10417
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10418
      self._RemoveOldStorage(self.target_node, iv_names)
10419

    
10420
  def _ExecDrbd8Secondary(self, feedback_fn):
10421
    """Replace the secondary node for DRBD 8.
10422

10423
    The algorithm for replace is quite complicated:
10424
      - for all disks of the instance:
10425
        - create new LVs on the new node with same names
10426
        - shutdown the drbd device on the old secondary
10427
        - disconnect the drbd network on the primary
10428
        - create the drbd device on the new secondary
10429
        - network attach the drbd on the primary, using an artifice:
10430
          the drbd code for Attach() will connect to the network if it
10431
          finds a device which is connected to the good local disks but
10432
          not network enabled
10433
      - wait for sync across all devices
10434
      - remove all disks from the old secondary
10435

10436
    Failures are not very well handled.
10437

10438
    """
10439
    steps_total = 6
10440

    
10441
    pnode = self.instance.primary_node
10442

    
10443
    # Step: check device activation
10444
    self.lu.LogStep(1, steps_total, "Check device existence")
10445
    self._CheckDisksExistence([self.instance.primary_node])
10446
    self._CheckVolumeGroup([self.instance.primary_node])
10447

    
10448
    # Step: check other node consistency
10449
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10450
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10451

    
10452
    # Step: create new storage
10453
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10454
    for idx, dev in enumerate(self.instance.disks):
10455
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10456
                      (self.new_node, idx))
10457
      # we pass force_create=True to force LVM creation
10458
      for new_lv in dev.children:
10459
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10460
                        _GetInstanceInfoText(self.instance), False)
10461

    
10462
    # Step 4: dbrd minors and drbd setups changes
10463
    # after this, we must manually remove the drbd minors on both the
10464
    # error and the success paths
10465
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10466
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10467
                                         for dev in self.instance.disks],
10468
                                        self.instance.name)
10469
    logging.debug("Allocated minors %r", minors)
10470

    
10471
    iv_names = {}
10472
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10473
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10474
                      (self.new_node, idx))
10475
      # create new devices on new_node; note that we create two IDs:
10476
      # one without port, so the drbd will be activated without
10477
      # networking information on the new node at this stage, and one
10478
      # with network, for the latter activation in step 4
10479
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10480
      if self.instance.primary_node == o_node1:
10481
        p_minor = o_minor1
10482
      else:
10483
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10484
        p_minor = o_minor2
10485

    
10486
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10487
                      p_minor, new_minor, o_secret)
10488
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10489
                    p_minor, new_minor, o_secret)
10490

    
10491
      iv_names[idx] = (dev, dev.children, new_net_id)
10492
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10493
                    new_net_id)
10494
      drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10495
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10496
                              logical_id=new_alone_id,
10497
                              children=dev.children,
10498
                              size=dev.size,
10499
                              params=drbd_params)
10500
      try:
10501
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10502
                              _GetInstanceInfoText(self.instance), False)
10503
      except errors.GenericError:
10504
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10505
        raise
10506

    
10507
    # We have new devices, shutdown the drbd on the old secondary
10508
    for idx, dev in enumerate(self.instance.disks):
10509
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10510
      self.cfg.SetDiskID(dev, self.target_node)
10511
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10512
      if msg:
10513
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10514
                           "node: %s" % (idx, msg),
10515
                           hint=("Please cleanup this device manually as"
10516
                                 " soon as possible"))
10517

    
10518
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10519
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10520
                                               self.instance.disks)[pnode]
10521

    
10522
    msg = result.fail_msg
10523
    if msg:
10524
      # detaches didn't succeed (unlikely)
10525
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10526
      raise errors.OpExecError("Can't detach the disks from the network on"
10527
                               " old node: %s" % (msg,))
10528

    
10529
    # if we managed to detach at least one, we update all the disks of
10530
    # the instance to point to the new secondary
10531
    self.lu.LogInfo("Updating instance configuration")
10532
    for dev, _, new_logical_id in iv_names.itervalues():
10533
      dev.logical_id = new_logical_id
10534
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10535

    
10536
    self.cfg.Update(self.instance, feedback_fn)
10537

    
10538
    # Release all node locks (the configuration has been updated)
10539
    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10540

    
10541
    # and now perform the drbd attach
10542
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10543
                    " (standalone => connected)")
10544
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10545
                                            self.new_node],
10546
                                           self.node_secondary_ip,
10547
                                           self.instance.disks,
10548
                                           self.instance.name,
10549
                                           False)
10550
    for to_node, to_result in result.items():
10551
      msg = to_result.fail_msg
10552
      if msg:
10553
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10554
                           to_node, msg,
10555
                           hint=("please do a gnt-instance info to see the"
10556
                                 " status of disks"))
10557

    
10558
    cstep = itertools.count(5)
10559

    
10560
    if self.early_release:
10561
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10562
      self._RemoveOldStorage(self.target_node, iv_names)
10563
      # TODO: Check if releasing locks early still makes sense
10564
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10565
    else:
10566
      # Release all resource locks except those used by the instance
10567
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10568
                    keep=self.node_secondary_ip.keys())
10569

    
10570
    # TODO: Can the instance lock be downgraded here? Take the optional disk
10571
    # shutdown in the caller into consideration.
10572

    
10573
    # Wait for sync
10574
    # This can fail as the old devices are degraded and _WaitForSync
10575
    # does a combined result over all disks, so we don't check its return value
10576
    self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10577
    _WaitForSync(self.lu, self.instance)
10578

    
10579
    # Check all devices manually
10580
    self._CheckDevices(self.instance.primary_node, iv_names)
10581

    
10582
    # Step: remove old storage
10583
    if not self.early_release:
10584
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10585
      self._RemoveOldStorage(self.target_node, iv_names)
10586

    
10587

    
10588
class LURepairNodeStorage(NoHooksLU):
10589
  """Repairs the volume group on a node.
10590

10591
  """
10592
  REQ_BGL = False
10593

    
10594
  def CheckArguments(self):
10595
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10596

    
10597
    storage_type = self.op.storage_type
10598

    
10599
    if (constants.SO_FIX_CONSISTENCY not in
10600
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10601
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10602
                                 " repaired" % storage_type,
10603
                                 errors.ECODE_INVAL)
10604

    
10605
  def ExpandNames(self):
10606
    self.needed_locks = {
10607
      locking.LEVEL_NODE: [self.op.node_name],
10608
      }
10609

    
10610
  def _CheckFaultyDisks(self, instance, node_name):
10611
    """Ensure faulty disks abort the opcode or at least warn."""
10612
    try:
10613
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10614
                                  node_name, True):
10615
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10616
                                   " node '%s'" % (instance.name, node_name),
10617
                                   errors.ECODE_STATE)
10618
    except errors.OpPrereqError, err:
10619
      if self.op.ignore_consistency:
10620
        self.proc.LogWarning(str(err.args[0]))
10621
      else:
10622
        raise
10623

    
10624
  def CheckPrereq(self):
10625
    """Check prerequisites.
10626

10627
    """
10628
    # Check whether any instance on this node has faulty disks
10629
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10630
      if inst.admin_state != constants.ADMINST_UP:
10631
        continue
10632
      check_nodes = set(inst.all_nodes)
10633
      check_nodes.discard(self.op.node_name)
10634
      for inst_node_name in check_nodes:
10635
        self._CheckFaultyDisks(inst, inst_node_name)
10636

    
10637
  def Exec(self, feedback_fn):
10638
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10639
                (self.op.name, self.op.node_name))
10640

    
10641
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10642
    result = self.rpc.call_storage_execute(self.op.node_name,
10643
                                           self.op.storage_type, st_args,
10644
                                           self.op.name,
10645
                                           constants.SO_FIX_CONSISTENCY)
10646
    result.Raise("Failed to repair storage unit '%s' on %s" %
10647
                 (self.op.name, self.op.node_name))
10648

    
10649

    
10650
class LUNodeEvacuate(NoHooksLU):
10651
  """Evacuates instances off a list of nodes.
10652

10653
  """
10654
  REQ_BGL = False
10655

    
10656
  _MODE2IALLOCATOR = {
10657
    constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10658
    constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10659
    constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10660
    }
10661
  assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10662
  assert (frozenset(_MODE2IALLOCATOR.values()) ==
10663
          constants.IALLOCATOR_NEVAC_MODES)
10664

    
10665
  def CheckArguments(self):
10666
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10667

    
10668
  def ExpandNames(self):
10669
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10670

    
10671
    if self.op.remote_node is not None:
10672
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10673
      assert self.op.remote_node
10674

    
10675
      if self.op.remote_node == self.op.node_name:
10676
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10677
                                   " secondary node", errors.ECODE_INVAL)
10678

    
10679
      if self.op.mode != constants.NODE_EVAC_SEC:
10680
        raise errors.OpPrereqError("Without the use of an iallocator only"
10681
                                   " secondary instances can be evacuated",
10682
                                   errors.ECODE_INVAL)
10683

    
10684
    # Declare locks
10685
    self.share_locks = _ShareAll()
10686
    self.needed_locks = {
10687
      locking.LEVEL_INSTANCE: [],
10688
      locking.LEVEL_NODEGROUP: [],
10689
      locking.LEVEL_NODE: [],
10690
      }
10691

    
10692
    # Determine nodes (via group) optimistically, needs verification once locks
10693
    # have been acquired
10694
    self.lock_nodes = self._DetermineNodes()
10695

    
10696
  def _DetermineNodes(self):
10697
    """Gets the list of nodes to operate on.
10698

10699
    """
10700
    if self.op.remote_node is None:
10701
      # Iallocator will choose any node(s) in the same group
10702
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10703
    else:
10704
      group_nodes = frozenset([self.op.remote_node])
10705

    
10706
    # Determine nodes to be locked
10707
    return set([self.op.node_name]) | group_nodes
10708

    
10709
  def _DetermineInstances(self):
10710
    """Builds list of instances to operate on.
10711

10712
    """
10713
    assert self.op.mode in constants.NODE_EVAC_MODES
10714

    
10715
    if self.op.mode == constants.NODE_EVAC_PRI:
10716
      # Primary instances only
10717
      inst_fn = _GetNodePrimaryInstances
10718
      assert self.op.remote_node is None, \
10719
        "Evacuating primary instances requires iallocator"
10720
    elif self.op.mode == constants.NODE_EVAC_SEC:
10721
      # Secondary instances only
10722
      inst_fn = _GetNodeSecondaryInstances
10723
    else:
10724
      # All instances
10725
      assert self.op.mode == constants.NODE_EVAC_ALL
10726
      inst_fn = _GetNodeInstances
10727
      # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10728
      # per instance
10729
      raise errors.OpPrereqError("Due to an issue with the iallocator"
10730
                                 " interface it is not possible to evacuate"
10731
                                 " all instances at once; specify explicitly"
10732
                                 " whether to evacuate primary or secondary"
10733
                                 " instances",
10734
                                 errors.ECODE_INVAL)
10735

    
10736
    return inst_fn(self.cfg, self.op.node_name)
10737

    
10738
  def DeclareLocks(self, level):
10739
    if level == locking.LEVEL_INSTANCE:
10740
      # Lock instances optimistically, needs verification once node and group
10741
      # locks have been acquired
10742
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10743
        set(i.name for i in self._DetermineInstances())
10744

    
10745
    elif level == locking.LEVEL_NODEGROUP:
10746
      # Lock node groups for all potential target nodes optimistically, needs
10747
      # verification once nodes have been acquired
10748
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10749
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10750

    
10751
    elif level == locking.LEVEL_NODE:
10752
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10753

    
10754
  def CheckPrereq(self):
10755
    # Verify locks
10756
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10757
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10758
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10759

    
10760
    need_nodes = self._DetermineNodes()
10761

    
10762
    if not owned_nodes.issuperset(need_nodes):
10763
      raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10764
                                 " locks were acquired, current nodes are"
10765
                                 " are '%s', used to be '%s'; retry the"
10766
                                 " operation" %
10767
                                 (self.op.node_name,
10768
                                  utils.CommaJoin(need_nodes),
10769
                                  utils.CommaJoin(owned_nodes)),
10770
                                 errors.ECODE_STATE)
10771

    
10772
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10773
    if owned_groups != wanted_groups:
10774
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10775
                               " current groups are '%s', used to be '%s';"
10776
                               " retry the operation" %
10777
                               (utils.CommaJoin(wanted_groups),
10778
                                utils.CommaJoin(owned_groups)))
10779

    
10780
    # Determine affected instances
10781
    self.instances = self._DetermineInstances()
10782
    self.instance_names = [i.name for i in self.instances]
10783

    
10784
    if set(self.instance_names) != owned_instances:
10785
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10786
                               " were acquired, current instances are '%s',"
10787
                               " used to be '%s'; retry the operation" %
10788
                               (self.op.node_name,
10789
                                utils.CommaJoin(self.instance_names),
10790
                                utils.CommaJoin(owned_instances)))
10791

    
10792
    if self.instance_names:
10793
      self.LogInfo("Evacuating instances from node '%s': %s",
10794
                   self.op.node_name,
10795
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10796
    else:
10797
      self.LogInfo("No instances to evacuate from node '%s'",
10798
                   self.op.node_name)
10799

    
10800
    if self.op.remote_node is not None:
10801
      for i in self.instances:
10802
        if i.primary_node == self.op.remote_node:
10803
          raise errors.OpPrereqError("Node %s is the primary node of"
10804
                                     " instance %s, cannot use it as"
10805
                                     " secondary" %
10806
                                     (self.op.remote_node, i.name),
10807
                                     errors.ECODE_INVAL)
10808

    
10809
  def Exec(self, feedback_fn):
10810
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10811

    
10812
    if not self.instance_names:
10813
      # No instances to evacuate
10814
      jobs = []
10815

    
10816
    elif self.op.iallocator is not None:
10817
      # TODO: Implement relocation to other group
10818
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10819
                       evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10820
                       instances=list(self.instance_names))
10821

    
10822
      ial.Run(self.op.iallocator)
10823

    
10824
      if not ial.success:
10825
        raise errors.OpPrereqError("Can't compute node evacuation using"
10826
                                   " iallocator '%s': %s" %
10827
                                   (self.op.iallocator, ial.info),
10828
                                   errors.ECODE_NORES)
10829

    
10830
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10831

    
10832
    elif self.op.remote_node is not None:
10833
      assert self.op.mode == constants.NODE_EVAC_SEC
10834
      jobs = [
10835
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10836
                                        remote_node=self.op.remote_node,
10837
                                        disks=[],
10838
                                        mode=constants.REPLACE_DISK_CHG,
10839
                                        early_release=self.op.early_release)]
10840
        for instance_name in self.instance_names
10841
        ]
10842

    
10843
    else:
10844
      raise errors.ProgrammerError("No iallocator or remote node")
10845

    
10846
    return ResultWithJobs(jobs)
10847

    
10848

    
10849
def _SetOpEarlyRelease(early_release, op):
10850
  """Sets C{early_release} flag on opcodes if available.
10851

10852
  """
10853
  try:
10854
    op.early_release = early_release
10855
  except AttributeError:
10856
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10857

    
10858
  return op
10859

    
10860

    
10861
def _NodeEvacDest(use_nodes, group, nodes):
10862
  """Returns group or nodes depending on caller's choice.
10863

10864
  """
10865
  if use_nodes:
10866
    return utils.CommaJoin(nodes)
10867
  else:
10868
    return group
10869

    
10870

    
10871
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10872
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10873

10874
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10875
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10876

10877
  @type lu: L{LogicalUnit}
10878
  @param lu: Logical unit instance
10879
  @type alloc_result: tuple/list
10880
  @param alloc_result: Result from iallocator
10881
  @type early_release: bool
10882
  @param early_release: Whether to release locks early if possible
10883
  @type use_nodes: bool
10884
  @param use_nodes: Whether to display node names instead of groups
10885

10886
  """
10887
  (moved, failed, jobs) = alloc_result
10888

    
10889
  if failed:
10890
    failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10891
                                 for (name, reason) in failed)
10892
    lu.LogWarning("Unable to evacuate instances %s", failreason)
10893
    raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10894

    
10895
  if moved:
10896
    lu.LogInfo("Instances to be moved: %s",
10897
               utils.CommaJoin("%s (to %s)" %
10898
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10899
                               for (name, group, nodes) in moved))
10900

    
10901
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10902
              map(opcodes.OpCode.LoadOpCode, ops))
10903
          for ops in jobs]
10904

    
10905

    
10906
class LUInstanceGrowDisk(LogicalUnit):
10907
  """Grow a disk of an instance.
10908

10909
  """
10910
  HPATH = "disk-grow"
10911
  HTYPE = constants.HTYPE_INSTANCE
10912
  REQ_BGL = False
10913

    
10914
  def ExpandNames(self):
10915
    self._ExpandAndLockInstance()
10916
    self.needed_locks[locking.LEVEL_NODE] = []
10917
    self.needed_locks[locking.LEVEL_NODE_RES] = []
10918
    self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
10919

    
10920
  def DeclareLocks(self, level):
10921
    if level == locking.LEVEL_NODE:
10922
      self._LockInstancesNodes()
10923
    elif level == locking.LEVEL_NODE_RES:
10924
      # Copy node locks
10925
      self.needed_locks[locking.LEVEL_NODE_RES] = \
10926
        self.needed_locks[locking.LEVEL_NODE][:]
10927

    
10928
  def BuildHooksEnv(self):
10929
    """Build hooks env.
10930

10931
    This runs on the master, the primary and all the secondaries.
10932

10933
    """
10934
    env = {
10935
      "DISK": self.op.disk,
10936
      "AMOUNT": self.op.amount,
10937
      }
10938
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10939
    return env
10940

    
10941
  def BuildHooksNodes(self):
10942
    """Build hooks nodes.
10943

10944
    """
10945
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10946
    return (nl, nl)
10947

    
10948
  def CheckPrereq(self):
10949
    """Check prerequisites.
10950

10951
    This checks that the instance is in the cluster.
10952

10953
    """
10954
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10955
    assert instance is not None, \
10956
      "Cannot retrieve locked instance %s" % self.op.instance_name
10957
    nodenames = list(instance.all_nodes)
10958
    for node in nodenames:
10959
      _CheckNodeOnline(self, node)
10960

    
10961
    self.instance = instance
10962

    
10963
    if instance.disk_template not in constants.DTS_GROWABLE:
10964
      raise errors.OpPrereqError("Instance's disk layout does not support"
10965
                                 " growing", errors.ECODE_INVAL)
10966

    
10967
    self.disk = instance.FindDisk(self.op.disk)
10968

    
10969
    if instance.disk_template not in (constants.DT_FILE,
10970
                                      constants.DT_SHARED_FILE):
10971
      # TODO: check the free disk space for file, when that feature will be
10972
      # supported
10973
      _CheckNodesFreeDiskPerVG(self, nodenames,
10974
                               self.disk.ComputeGrowth(self.op.amount))
10975

    
10976
  def Exec(self, feedback_fn):
10977
    """Execute disk grow.
10978

10979
    """
10980
    instance = self.instance
10981
    disk = self.disk
10982

    
10983
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10984
    assert (self.owned_locks(locking.LEVEL_NODE) ==
10985
            self.owned_locks(locking.LEVEL_NODE_RES))
10986

    
10987
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10988
    if not disks_ok:
10989
      raise errors.OpExecError("Cannot activate block device to grow")
10990

    
10991
    feedback_fn("Growing disk %s of instance '%s' by %s" %
10992
                (self.op.disk, instance.name,
10993
                 utils.FormatUnit(self.op.amount, "h")))
10994

    
10995
    # First run all grow ops in dry-run mode
10996
    for node in instance.all_nodes:
10997
      self.cfg.SetDiskID(disk, node)
10998
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10999
      result.Raise("Grow request failed to node %s" % node)
11000

    
11001
    # We know that (as far as we can test) operations across different
11002
    # nodes will succeed, time to run it for real
11003
    for node in instance.all_nodes:
11004
      self.cfg.SetDiskID(disk, node)
11005
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
11006
      result.Raise("Grow request failed to node %s" % node)
11007

    
11008
      # TODO: Rewrite code to work properly
11009
      # DRBD goes into sync mode for a short amount of time after executing the
11010
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
11011
      # calling "resize" in sync mode fails. Sleeping for a short amount of
11012
      # time is a work-around.
11013
      time.sleep(5)
11014

    
11015
    disk.RecordGrow(self.op.amount)
11016
    self.cfg.Update(instance, feedback_fn)
11017

    
11018
    # Changes have been recorded, release node lock
11019
    _ReleaseLocks(self, locking.LEVEL_NODE)
11020

    
11021
    # Downgrade lock while waiting for sync
11022
    self.glm.downgrade(locking.LEVEL_INSTANCE)
11023

    
11024
    if self.op.wait_for_sync:
11025
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
11026
      if disk_abort:
11027
        self.proc.LogWarning("Disk sync-ing has not returned a good"
11028
                             " status; please check the instance")
11029
      if instance.admin_state != constants.ADMINST_UP:
11030
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
11031
    elif instance.admin_state != constants.ADMINST_UP:
11032
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
11033
                           " not supposed to be running because no wait for"
11034
                           " sync mode was requested")
11035

    
11036
    assert self.owned_locks(locking.LEVEL_NODE_RES)
11037
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
11038

    
11039

    
11040
class LUInstanceQueryData(NoHooksLU):
11041
  """Query runtime instance data.
11042

11043
  """
11044
  REQ_BGL = False
11045

    
11046
  def ExpandNames(self):
11047
    self.needed_locks = {}
11048

    
11049
    # Use locking if requested or when non-static information is wanted
11050
    if not (self.op.static or self.op.use_locking):
11051
      self.LogWarning("Non-static data requested, locks need to be acquired")
11052
      self.op.use_locking = True
11053

    
11054
    if self.op.instances or not self.op.use_locking:
11055
      # Expand instance names right here
11056
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
11057
    else:
11058
      # Will use acquired locks
11059
      self.wanted_names = None
11060

    
11061
    if self.op.use_locking:
11062
      self.share_locks = _ShareAll()
11063

    
11064
      if self.wanted_names is None:
11065
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11066
      else:
11067
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11068

    
11069
      self.needed_locks[locking.LEVEL_NODE] = []
11070
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11071

    
11072
  def DeclareLocks(self, level):
11073
    if self.op.use_locking and level == locking.LEVEL_NODE:
11074
      self._LockInstancesNodes()
11075

    
11076
  def CheckPrereq(self):
11077
    """Check prerequisites.
11078

11079
    This only checks the optional instance list against the existing names.
11080

11081
    """
11082
    if self.wanted_names is None:
11083
      assert self.op.use_locking, "Locking was not used"
11084
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11085

    
11086
    self.wanted_instances = \
11087
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11088

    
11089
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
11090
    """Returns the status of a block device
11091

11092
    """
11093
    if self.op.static or not node:
11094
      return None
11095

    
11096
    self.cfg.SetDiskID(dev, node)
11097

    
11098
    result = self.rpc.call_blockdev_find(node, dev)
11099
    if result.offline:
11100
      return None
11101

    
11102
    result.Raise("Can't compute disk status for %s" % instance_name)
11103

    
11104
    status = result.payload
11105
    if status is None:
11106
      return None
11107

    
11108
    return (status.dev_path, status.major, status.minor,
11109
            status.sync_percent, status.estimated_time,
11110
            status.is_degraded, status.ldisk_status)
11111

    
11112
  def _ComputeDiskStatus(self, instance, snode, dev):
11113
    """Compute block device status.
11114

11115
    """
11116
    if dev.dev_type in constants.LDS_DRBD:
11117
      # we change the snode then (otherwise we use the one passed in)
11118
      if dev.logical_id[0] == instance.primary_node:
11119
        snode = dev.logical_id[1]
11120
      else:
11121
        snode = dev.logical_id[0]
11122

    
11123
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11124
                                              instance.name, dev)
11125
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11126

    
11127
    if dev.children:
11128
      dev_children = map(compat.partial(self._ComputeDiskStatus,
11129
                                        instance, snode),
11130
                         dev.children)
11131
    else:
11132
      dev_children = []
11133

    
11134
    return {
11135
      "iv_name": dev.iv_name,
11136
      "dev_type": dev.dev_type,
11137
      "logical_id": dev.logical_id,
11138
      "physical_id": dev.physical_id,
11139
      "pstatus": dev_pstatus,
11140
      "sstatus": dev_sstatus,
11141
      "children": dev_children,
11142
      "mode": dev.mode,
11143
      "size": dev.size,
11144
      }
11145

    
11146
  def Exec(self, feedback_fn):
11147
    """Gather and return data"""
11148
    result = {}
11149

    
11150
    cluster = self.cfg.GetClusterInfo()
11151

    
11152
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11153
                                          for i in self.wanted_instances)
11154
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11155
      if self.op.static or pnode.offline:
11156
        remote_state = None
11157
        if pnode.offline:
11158
          self.LogWarning("Primary node %s is marked offline, returning static"
11159
                          " information only for instance %s" %
11160
                          (pnode.name, instance.name))
11161
      else:
11162
        remote_info = self.rpc.call_instance_info(instance.primary_node,
11163
                                                  instance.name,
11164
                                                  instance.hypervisor)
11165
        remote_info.Raise("Error checking node %s" % instance.primary_node)
11166
        remote_info = remote_info.payload
11167
        if remote_info and "state" in remote_info:
11168
          remote_state = "up"
11169
        else:
11170
          if instance.admin_state == constants.ADMINST_UP:
11171
            remote_state = "down"
11172
          else:
11173
            remote_state = instance.admin_state
11174

    
11175
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11176
                  instance.disks)
11177

    
11178
      result[instance.name] = {
11179
        "name": instance.name,
11180
        "config_state": instance.admin_state,
11181
        "run_state": remote_state,
11182
        "pnode": instance.primary_node,
11183
        "snodes": instance.secondary_nodes,
11184
        "os": instance.os,
11185
        # this happens to be the same format used for hooks
11186
        "nics": _NICListToTuple(self, instance.nics),
11187
        "disk_template": instance.disk_template,
11188
        "disks": disks,
11189
        "hypervisor": instance.hypervisor,
11190
        "network_port": instance.network_port,
11191
        "hv_instance": instance.hvparams,
11192
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
11193
        "be_instance": instance.beparams,
11194
        "be_actual": cluster.FillBE(instance),
11195
        "os_instance": instance.osparams,
11196
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11197
        "serial_no": instance.serial_no,
11198
        "mtime": instance.mtime,
11199
        "ctime": instance.ctime,
11200
        "uuid": instance.uuid,
11201
        }
11202

    
11203
    return result
11204

    
11205

    
11206
class LUInstanceSetParams(LogicalUnit):
11207
  """Modifies an instances's parameters.
11208

11209
  """
11210
  HPATH = "instance-modify"
11211
  HTYPE = constants.HTYPE_INSTANCE
11212
  REQ_BGL = False
11213

    
11214
  def CheckArguments(self):
11215
    if not (self.op.nics or self.op.disks or self.op.disk_template or
11216
            self.op.hvparams or self.op.beparams or self.op.os_name or
11217
            self.op.online_inst or self.op.offline_inst):
11218
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11219

    
11220
    if self.op.hvparams:
11221
      _CheckGlobalHvParams(self.op.hvparams)
11222

    
11223
    # Disk validation
11224
    disk_addremove = 0
11225
    for disk_op, disk_dict in self.op.disks:
11226
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11227
      if disk_op == constants.DDM_REMOVE:
11228
        disk_addremove += 1
11229
        continue
11230
      elif disk_op == constants.DDM_ADD:
11231
        disk_addremove += 1
11232
      else:
11233
        if not isinstance(disk_op, int):
11234
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11235
        if not isinstance(disk_dict, dict):
11236
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11237
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11238

    
11239
      if disk_op == constants.DDM_ADD:
11240
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11241
        if mode not in constants.DISK_ACCESS_SET:
11242
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11243
                                     errors.ECODE_INVAL)
11244
        size = disk_dict.get(constants.IDISK_SIZE, None)
11245
        if size is None:
11246
          raise errors.OpPrereqError("Required disk parameter size missing",
11247
                                     errors.ECODE_INVAL)
11248
        try:
11249
          size = int(size)
11250
        except (TypeError, ValueError), err:
11251
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11252
                                     str(err), errors.ECODE_INVAL)
11253
        disk_dict[constants.IDISK_SIZE] = size
11254
      else:
11255
        # modification of disk
11256
        if constants.IDISK_SIZE in disk_dict:
11257
          raise errors.OpPrereqError("Disk size change not possible, use"
11258
                                     " grow-disk", errors.ECODE_INVAL)
11259

    
11260
    if disk_addremove > 1:
11261
      raise errors.OpPrereqError("Only one disk add or remove operation"
11262
                                 " supported at a time", errors.ECODE_INVAL)
11263

    
11264
    if self.op.disks and self.op.disk_template is not None:
11265
      raise errors.OpPrereqError("Disk template conversion and other disk"
11266
                                 " changes not supported at the same time",
11267
                                 errors.ECODE_INVAL)
11268

    
11269
    if (self.op.disk_template and
11270
        self.op.disk_template in constants.DTS_INT_MIRROR and
11271
        self.op.remote_node is None):
11272
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
11273
                                 " one requires specifying a secondary node",
11274
                                 errors.ECODE_INVAL)
11275

    
11276
    # NIC validation
11277
    nic_addremove = 0
11278
    for nic_op, nic_dict in self.op.nics:
11279
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11280
      if nic_op == constants.DDM_REMOVE:
11281
        nic_addremove += 1
11282
        continue
11283
      elif nic_op == constants.DDM_ADD:
11284
        nic_addremove += 1
11285
      else:
11286
        if not isinstance(nic_op, int):
11287
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11288
        if not isinstance(nic_dict, dict):
11289
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11290
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11291

    
11292
      # nic_dict should be a dict
11293
      nic_ip = nic_dict.get(constants.INIC_IP, None)
11294
      if nic_ip is not None:
11295
        if nic_ip.lower() == constants.VALUE_NONE:
11296
          nic_dict[constants.INIC_IP] = None
11297
        else:
11298
          if not netutils.IPAddress.IsValid(nic_ip):
11299
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11300
                                       errors.ECODE_INVAL)
11301

    
11302
      nic_bridge = nic_dict.get("bridge", None)
11303
      nic_link = nic_dict.get(constants.INIC_LINK, None)
11304
      if nic_bridge and nic_link:
11305
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11306
                                   " at the same time", errors.ECODE_INVAL)
11307
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11308
        nic_dict["bridge"] = None
11309
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11310
        nic_dict[constants.INIC_LINK] = None
11311

    
11312
      if nic_op == constants.DDM_ADD:
11313
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
11314
        if nic_mac is None:
11315
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11316

    
11317
      if constants.INIC_MAC in nic_dict:
11318
        nic_mac = nic_dict[constants.INIC_MAC]
11319
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11320
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11321

    
11322
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11323
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11324
                                     " modifying an existing nic",
11325
                                     errors.ECODE_INVAL)
11326

    
11327
    if nic_addremove > 1:
11328
      raise errors.OpPrereqError("Only one NIC add or remove operation"
11329
                                 " supported at a time", errors.ECODE_INVAL)
11330

    
11331
  def ExpandNames(self):
11332
    self._ExpandAndLockInstance()
11333
    # Can't even acquire node locks in shared mode as upcoming changes in
11334
    # Ganeti 2.6 will start to modify the node object on disk conversion
11335
    self.needed_locks[locking.LEVEL_NODE] = []
11336
    self.needed_locks[locking.LEVEL_NODE_RES] = []
11337
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11338

    
11339
  def DeclareLocks(self, level):
11340
    if level == locking.LEVEL_NODE:
11341
      self._LockInstancesNodes()
11342
      if self.op.disk_template and self.op.remote_node:
11343
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11344
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11345
    elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11346
      # Copy node locks
11347
      self.needed_locks[locking.LEVEL_NODE_RES] = \
11348
        self.needed_locks[locking.LEVEL_NODE][:]
11349

    
11350
  def BuildHooksEnv(self):
11351
    """Build hooks env.
11352

11353
    This runs on the master, primary and secondaries.
11354

11355
    """
11356
    args = dict()
11357
    if constants.BE_MINMEM in self.be_new:
11358
      args["minmem"] = self.be_new[constants.BE_MINMEM]
11359
    if constants.BE_MAXMEM in self.be_new:
11360
      args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11361
    if constants.BE_VCPUS in self.be_new:
11362
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
11363
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11364
    # information at all.
11365
    if self.op.nics:
11366
      args["nics"] = []
11367
      nic_override = dict(self.op.nics)
11368
      for idx, nic in enumerate(self.instance.nics):
11369
        if idx in nic_override:
11370
          this_nic_override = nic_override[idx]
11371
        else:
11372
          this_nic_override = {}
11373
        if constants.INIC_IP in this_nic_override:
11374
          ip = this_nic_override[constants.INIC_IP]
11375
        else:
11376
          ip = nic.ip
11377
        if constants.INIC_MAC in this_nic_override:
11378
          mac = this_nic_override[constants.INIC_MAC]
11379
        else:
11380
          mac = nic.mac
11381
        if idx in self.nic_pnew:
11382
          nicparams = self.nic_pnew[idx]
11383
        else:
11384
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11385
        mode = nicparams[constants.NIC_MODE]
11386
        link = nicparams[constants.NIC_LINK]
11387
        args["nics"].append((ip, mac, mode, link))
11388
      if constants.DDM_ADD in nic_override:
11389
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11390
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11391
        nicparams = self.nic_pnew[constants.DDM_ADD]
11392
        mode = nicparams[constants.NIC_MODE]
11393
        link = nicparams[constants.NIC_LINK]
11394
        args["nics"].append((ip, mac, mode, link))
11395
      elif constants.DDM_REMOVE in nic_override:
11396
        del args["nics"][-1]
11397

    
11398
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11399
    if self.op.disk_template:
11400
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11401

    
11402
    return env
11403

    
11404
  def BuildHooksNodes(self):
11405
    """Build hooks nodes.
11406

11407
    """
11408
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11409
    return (nl, nl)
11410

    
11411
  def CheckPrereq(self):
11412
    """Check prerequisites.
11413

11414
    This only checks the instance list against the existing names.
11415

11416
    """
11417
    # checking the new params on the primary/secondary nodes
11418

    
11419
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11420
    cluster = self.cluster = self.cfg.GetClusterInfo()
11421
    assert self.instance is not None, \
11422
      "Cannot retrieve locked instance %s" % self.op.instance_name
11423
    pnode = instance.primary_node
11424
    nodelist = list(instance.all_nodes)
11425
    pnode_info = self.cfg.GetNodeInfo(pnode)
11426
    self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11427

    
11428
    # OS change
11429
    if self.op.os_name and not self.op.force:
11430
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11431
                      self.op.force_variant)
11432
      instance_os = self.op.os_name
11433
    else:
11434
      instance_os = instance.os
11435

    
11436
    if self.op.disk_template:
11437
      if instance.disk_template == self.op.disk_template:
11438
        raise errors.OpPrereqError("Instance already has disk template %s" %
11439
                                   instance.disk_template, errors.ECODE_INVAL)
11440

    
11441
      if (instance.disk_template,
11442
          self.op.disk_template) not in self._DISK_CONVERSIONS:
11443
        raise errors.OpPrereqError("Unsupported disk template conversion from"
11444
                                   " %s to %s" % (instance.disk_template,
11445
                                                  self.op.disk_template),
11446
                                   errors.ECODE_INVAL)
11447
      _CheckInstanceState(self, instance, INSTANCE_DOWN,
11448
                          msg="cannot change disk template")
11449
      if self.op.disk_template in constants.DTS_INT_MIRROR:
11450
        if self.op.remote_node == pnode:
11451
          raise errors.OpPrereqError("Given new secondary node %s is the same"
11452
                                     " as the primary node of the instance" %
11453
                                     self.op.remote_node, errors.ECODE_STATE)
11454
        _CheckNodeOnline(self, self.op.remote_node)
11455
        _CheckNodeNotDrained(self, self.op.remote_node)
11456
        # FIXME: here we assume that the old instance type is DT_PLAIN
11457
        assert instance.disk_template == constants.DT_PLAIN
11458
        disks = [{constants.IDISK_SIZE: d.size,
11459
                  constants.IDISK_VG: d.logical_id[0]}
11460
                 for d in instance.disks]
11461
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11462
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11463

    
11464
        snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11465
        if pnode_info.group != snode_info.group:
11466
          self.LogWarning("The primary and secondary nodes are in two"
11467
                          " different node groups; the disk parameters"
11468
                          " from the first disk's node group will be"
11469
                          " used")
11470

    
11471
    # hvparams processing
11472
    if self.op.hvparams:
11473
      hv_type = instance.hypervisor
11474
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11475
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11476
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11477

    
11478
      # local check
11479
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11480
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11481
      self.hv_proposed = self.hv_new = hv_new # the new actual values
11482
      self.hv_inst = i_hvdict # the new dict (without defaults)
11483
    else:
11484
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11485
                                              instance.hvparams)
11486
      self.hv_new = self.hv_inst = {}
11487

    
11488
    # beparams processing
11489
    if self.op.beparams:
11490
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11491
                                   use_none=True)
11492
      objects.UpgradeBeParams(i_bedict)
11493
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11494
      be_new = cluster.SimpleFillBE(i_bedict)
11495
      self.be_proposed = self.be_new = be_new # the new actual values
11496
      self.be_inst = i_bedict # the new dict (without defaults)
11497
    else:
11498
      self.be_new = self.be_inst = {}
11499
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11500
    be_old = cluster.FillBE(instance)
11501

    
11502
    # CPU param validation -- checking every time a paramtere is
11503
    # changed to cover all cases where either CPU mask or vcpus have
11504
    # changed
11505
    if (constants.BE_VCPUS in self.be_proposed and
11506
        constants.HV_CPU_MASK in self.hv_proposed):
11507
      cpu_list = \
11508
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11509
      # Verify mask is consistent with number of vCPUs. Can skip this
11510
      # test if only 1 entry in the CPU mask, which means same mask
11511
      # is applied to all vCPUs.
11512
      if (len(cpu_list) > 1 and
11513
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11514
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11515
                                   " CPU mask [%s]" %
11516
                                   (self.be_proposed[constants.BE_VCPUS],
11517
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11518
                                   errors.ECODE_INVAL)
11519

    
11520
      # Only perform this test if a new CPU mask is given
11521
      if constants.HV_CPU_MASK in self.hv_new:
11522
        # Calculate the largest CPU number requested
11523
        max_requested_cpu = max(map(max, cpu_list))
11524
        # Check that all of the instance's nodes have enough physical CPUs to
11525
        # satisfy the requested CPU mask
11526
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11527
                                max_requested_cpu + 1, instance.hypervisor)
11528

    
11529
    # osparams processing
11530
    if self.op.osparams:
11531
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11532
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11533
      self.os_inst = i_osdict # the new dict (without defaults)
11534
    else:
11535
      self.os_inst = {}
11536

    
11537
    self.warn = []
11538

    
11539
    #TODO(dynmem): do the appropriate check involving MINMEM
11540
    if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11541
        be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11542
      mem_check_list = [pnode]
11543
      if be_new[constants.BE_AUTO_BALANCE]:
11544
        # either we changed auto_balance to yes or it was from before
11545
        mem_check_list.extend(instance.secondary_nodes)
11546
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11547
                                                  instance.hypervisor)
11548
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11549
                                         [instance.hypervisor])
11550
      pninfo = nodeinfo[pnode]
11551
      msg = pninfo.fail_msg
11552
      if msg:
11553
        # Assume the primary node is unreachable and go ahead
11554
        self.warn.append("Can't get info from primary node %s: %s" %
11555
                         (pnode, msg))
11556
      else:
11557
        (_, _, (pnhvinfo, )) = pninfo.payload
11558
        if not isinstance(pnhvinfo.get("memory_free", None), int):
11559
          self.warn.append("Node data from primary node %s doesn't contain"
11560
                           " free memory information" % pnode)
11561
        elif instance_info.fail_msg:
11562
          self.warn.append("Can't get instance runtime information: %s" %
11563
                          instance_info.fail_msg)
11564
        else:
11565
          if instance_info.payload:
11566
            current_mem = int(instance_info.payload["memory"])
11567
          else:
11568
            # Assume instance not running
11569
            # (there is a slight race condition here, but it's not very
11570
            # probable, and we have no other way to check)
11571
            # TODO: Describe race condition
11572
            current_mem = 0
11573
          #TODO(dynmem): do the appropriate check involving MINMEM
11574
          miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11575
                      pnhvinfo["memory_free"])
11576
          if miss_mem > 0:
11577
            raise errors.OpPrereqError("This change will prevent the instance"
11578
                                       " from starting, due to %d MB of memory"
11579
                                       " missing on its primary node" %
11580
                                       miss_mem,
11581
                                       errors.ECODE_NORES)
11582

    
11583
      if be_new[constants.BE_AUTO_BALANCE]:
11584
        for node, nres in nodeinfo.items():
11585
          if node not in instance.secondary_nodes:
11586
            continue
11587
          nres.Raise("Can't get info from secondary node %s" % node,
11588
                     prereq=True, ecode=errors.ECODE_STATE)
11589
          (_, _, (nhvinfo, )) = nres.payload
11590
          if not isinstance(nhvinfo.get("memory_free", None), int):
11591
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11592
                                       " memory information" % node,
11593
                                       errors.ECODE_STATE)
11594
          #TODO(dynmem): do the appropriate check involving MINMEM
11595
          elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11596
            raise errors.OpPrereqError("This change will prevent the instance"
11597
                                       " from failover to its secondary node"
11598
                                       " %s, due to not enough memory" % node,
11599
                                       errors.ECODE_STATE)
11600

    
11601
    # NIC processing
11602
    self.nic_pnew = {}
11603
    self.nic_pinst = {}
11604
    for nic_op, nic_dict in self.op.nics:
11605
      if nic_op == constants.DDM_REMOVE:
11606
        if not instance.nics:
11607
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11608
                                     errors.ECODE_INVAL)
11609
        continue
11610
      if nic_op != constants.DDM_ADD:
11611
        # an existing nic
11612
        if not instance.nics:
11613
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11614
                                     " no NICs" % nic_op,
11615
                                     errors.ECODE_INVAL)
11616
        if nic_op < 0 or nic_op >= len(instance.nics):
11617
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11618
                                     " are 0 to %d" %
11619
                                     (nic_op, len(instance.nics) - 1),
11620
                                     errors.ECODE_INVAL)
11621
        old_nic_params = instance.nics[nic_op].nicparams
11622
        old_nic_ip = instance.nics[nic_op].ip
11623
      else:
11624
        old_nic_params = {}
11625
        old_nic_ip = None
11626

    
11627
      update_params_dict = dict([(key, nic_dict[key])
11628
                                 for key in constants.NICS_PARAMETERS
11629
                                 if key in nic_dict])
11630

    
11631
      if "bridge" in nic_dict:
11632
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11633

    
11634
      new_nic_params = _GetUpdatedParams(old_nic_params,
11635
                                         update_params_dict)
11636
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11637
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11638
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11639
      self.nic_pinst[nic_op] = new_nic_params
11640
      self.nic_pnew[nic_op] = new_filled_nic_params
11641
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11642

    
11643
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11644
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11645
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11646
        if msg:
11647
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11648
          if self.op.force:
11649
            self.warn.append(msg)
11650
          else:
11651
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11652
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11653
        if constants.INIC_IP in nic_dict:
11654
          nic_ip = nic_dict[constants.INIC_IP]
11655
        else:
11656
          nic_ip = old_nic_ip
11657
        if nic_ip is None:
11658
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11659
                                     " on a routed nic", errors.ECODE_INVAL)
11660
      if constants.INIC_MAC in nic_dict:
11661
        nic_mac = nic_dict[constants.INIC_MAC]
11662
        if nic_mac is None:
11663
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11664
                                     errors.ECODE_INVAL)
11665
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11666
          # otherwise generate the mac
11667
          nic_dict[constants.INIC_MAC] = \
11668
            self.cfg.GenerateMAC(self.proc.GetECId())
11669
        else:
11670
          # or validate/reserve the current one
11671
          try:
11672
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11673
          except errors.ReservationError:
11674
            raise errors.OpPrereqError("MAC address %s already in use"
11675
                                       " in cluster" % nic_mac,
11676
                                       errors.ECODE_NOTUNIQUE)
11677

    
11678
    # DISK processing
11679
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11680
      raise errors.OpPrereqError("Disk operations not supported for"
11681
                                 " diskless instances",
11682
                                 errors.ECODE_INVAL)
11683
    for disk_op, _ in self.op.disks:
11684
      if disk_op == constants.DDM_REMOVE:
11685
        if len(instance.disks) == 1:
11686
          raise errors.OpPrereqError("Cannot remove the last disk of"
11687
                                     " an instance", errors.ECODE_INVAL)
11688
        _CheckInstanceState(self, instance, INSTANCE_DOWN,
11689
                            msg="cannot remove disks")
11690

    
11691
      if (disk_op == constants.DDM_ADD and
11692
          len(instance.disks) >= constants.MAX_DISKS):
11693
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11694
                                   " add more" % constants.MAX_DISKS,
11695
                                   errors.ECODE_STATE)
11696
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11697
        # an existing disk
11698
        if disk_op < 0 or disk_op >= len(instance.disks):
11699
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11700
                                     " are 0 to %d" %
11701
                                     (disk_op, len(instance.disks)),
11702
                                     errors.ECODE_INVAL)
11703

    
11704
    # disabling the instance
11705
    if self.op.offline_inst:
11706
      _CheckInstanceState(self, instance, INSTANCE_DOWN,
11707
                          msg="cannot change instance state to offline")
11708

    
11709
    # enabling the instance
11710
    if self.op.online_inst:
11711
      _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11712
                          msg="cannot make instance go online")
11713

    
11714
  def _ConvertPlainToDrbd(self, feedback_fn):
11715
    """Converts an instance from plain to drbd.
11716

11717
    """
11718
    feedback_fn("Converting template to drbd")
11719
    instance = self.instance
11720
    pnode = instance.primary_node
11721
    snode = self.op.remote_node
11722

    
11723
    assert instance.disk_template == constants.DT_PLAIN
11724

    
11725
    # create a fake disk info for _GenerateDiskTemplate
11726
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11727
                  constants.IDISK_VG: d.logical_id[0]}
11728
                 for d in instance.disks]
11729
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11730
                                      instance.name, pnode, [snode],
11731
                                      disk_info, None, None, 0, feedback_fn,
11732
                                      self.diskparams)
11733
    info = _GetInstanceInfoText(instance)
11734
    feedback_fn("Creating aditional volumes...")
11735
    # first, create the missing data and meta devices
11736
    for disk in new_disks:
11737
      # unfortunately this is... not too nice
11738
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11739
                            info, True)
11740
      for child in disk.children:
11741
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11742
    # at this stage, all new LVs have been created, we can rename the
11743
    # old ones
11744
    feedback_fn("Renaming original volumes...")
11745
    rename_list = [(o, n.children[0].logical_id)
11746
                   for (o, n) in zip(instance.disks, new_disks)]
11747
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11748
    result.Raise("Failed to rename original LVs")
11749

    
11750
    feedback_fn("Initializing DRBD devices...")
11751
    # all child devices are in place, we can now create the DRBD devices
11752
    for disk in new_disks:
11753
      for node in [pnode, snode]:
11754
        f_create = node == pnode
11755
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11756

    
11757
    # at this point, the instance has been modified
11758
    instance.disk_template = constants.DT_DRBD8
11759
    instance.disks = new_disks
11760
    self.cfg.Update(instance, feedback_fn)
11761

    
11762
    # Release node locks while waiting for sync
11763
    _ReleaseLocks(self, locking.LEVEL_NODE)
11764

    
11765
    # disks are created, waiting for sync
11766
    disk_abort = not _WaitForSync(self, instance,
11767
                                  oneshot=not self.op.wait_for_sync)
11768
    if disk_abort:
11769
      raise errors.OpExecError("There are some degraded disks for"
11770
                               " this instance, please cleanup manually")
11771

    
11772
    # Node resource locks will be released by caller
11773

    
11774
  def _ConvertDrbdToPlain(self, feedback_fn):
11775
    """Converts an instance from drbd to plain.
11776

11777
    """
11778
    instance = self.instance
11779

    
11780
    assert len(instance.secondary_nodes) == 1
11781
    assert instance.disk_template == constants.DT_DRBD8
11782

    
11783
    pnode = instance.primary_node
11784
    snode = instance.secondary_nodes[0]
11785
    feedback_fn("Converting template to plain")
11786

    
11787
    old_disks = instance.disks
11788
    new_disks = [d.children[0] for d in old_disks]
11789

    
11790
    # copy over size and mode
11791
    for parent, child in zip(old_disks, new_disks):
11792
      child.size = parent.size
11793
      child.mode = parent.mode
11794

    
11795
    # update instance structure
11796
    instance.disks = new_disks
11797
    instance.disk_template = constants.DT_PLAIN
11798
    self.cfg.Update(instance, feedback_fn)
11799

    
11800
    # Release locks in case removing disks takes a while
11801
    _ReleaseLocks(self, locking.LEVEL_NODE)
11802

    
11803
    feedback_fn("Removing volumes on the secondary node...")
11804
    for disk in old_disks:
11805
      self.cfg.SetDiskID(disk, snode)
11806
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11807
      if msg:
11808
        self.LogWarning("Could not remove block device %s on node %s,"
11809
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11810

    
11811
    feedback_fn("Removing unneeded volumes on the primary node...")
11812
    for idx, disk in enumerate(old_disks):
11813
      meta = disk.children[1]
11814
      self.cfg.SetDiskID(meta, pnode)
11815
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11816
      if msg:
11817
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11818
                        " continuing anyway: %s", idx, pnode, msg)
11819

    
11820
    # this is a DRBD disk, return its port to the pool
11821
    for disk in old_disks:
11822
      tcp_port = disk.logical_id[2]
11823
      self.cfg.AddTcpUdpPort(tcp_port)
11824

    
11825
    # Node resource locks will be released by caller
11826

    
11827
  def Exec(self, feedback_fn):
11828
    """Modifies an instance.
11829

11830
    All parameters take effect only at the next restart of the instance.
11831

11832
    """
11833
    # Process here the warnings from CheckPrereq, as we don't have a
11834
    # feedback_fn there.
11835
    for warn in self.warn:
11836
      feedback_fn("WARNING: %s" % warn)
11837

    
11838
    assert ((self.op.disk_template is None) ^
11839
            bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
11840
      "Not owning any node resource locks"
11841

    
11842
    result = []
11843
    instance = self.instance
11844
    # disk changes
11845
    for disk_op, disk_dict in self.op.disks:
11846
      if disk_op == constants.DDM_REMOVE:
11847
        # remove the last disk
11848
        device = instance.disks.pop()
11849
        device_idx = len(instance.disks)
11850
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11851
          self.cfg.SetDiskID(disk, node)
11852
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11853
          if msg:
11854
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11855
                            " continuing anyway", device_idx, node, msg)
11856
        result.append(("disk/%d" % device_idx, "remove"))
11857

    
11858
        # if this is a DRBD disk, return its port to the pool
11859
        if device.dev_type in constants.LDS_DRBD:
11860
          tcp_port = device.logical_id[2]
11861
          self.cfg.AddTcpUdpPort(tcp_port)
11862
      elif disk_op == constants.DDM_ADD:
11863
        # add a new disk
11864
        if instance.disk_template in (constants.DT_FILE,
11865
                                        constants.DT_SHARED_FILE):
11866
          file_driver, file_path = instance.disks[0].logical_id
11867
          file_path = os.path.dirname(file_path)
11868
        else:
11869
          file_driver = file_path = None
11870
        disk_idx_base = len(instance.disks)
11871
        new_disk = _GenerateDiskTemplate(self,
11872
                                         instance.disk_template,
11873
                                         instance.name, instance.primary_node,
11874
                                         instance.secondary_nodes,
11875
                                         [disk_dict],
11876
                                         file_path,
11877
                                         file_driver,
11878
                                         disk_idx_base,
11879
                                         feedback_fn,
11880
                                         self.diskparams)[0]
11881
        instance.disks.append(new_disk)
11882
        info = _GetInstanceInfoText(instance)
11883

    
11884
        logging.info("Creating volume %s for instance %s",
11885
                     new_disk.iv_name, instance.name)
11886
        # Note: this needs to be kept in sync with _CreateDisks
11887
        #HARDCODE
11888
        for node in instance.all_nodes:
11889
          f_create = node == instance.primary_node
11890
          try:
11891
            _CreateBlockDev(self, node, instance, new_disk,
11892
                            f_create, info, f_create)
11893
          except errors.OpExecError, err:
11894
            self.LogWarning("Failed to create volume %s (%s) on"
11895
                            " node %s: %s",
11896
                            new_disk.iv_name, new_disk, node, err)
11897
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11898
                       (new_disk.size, new_disk.mode)))
11899
      else:
11900
        # change a given disk
11901
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11902
        result.append(("disk.mode/%d" % disk_op,
11903
                       disk_dict[constants.IDISK_MODE]))
11904

    
11905
    if self.op.disk_template:
11906
      if __debug__:
11907
        check_nodes = set(instance.all_nodes)
11908
        if self.op.remote_node:
11909
          check_nodes.add(self.op.remote_node)
11910
        for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
11911
          owned = self.owned_locks(level)
11912
          assert not (check_nodes - owned), \
11913
            ("Not owning the correct locks, owning %r, expected at least %r" %
11914
             (owned, check_nodes))
11915

    
11916
      r_shut = _ShutdownInstanceDisks(self, instance)
11917
      if not r_shut:
11918
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11919
                                 " proceed with disk template conversion")
11920
      mode = (instance.disk_template, self.op.disk_template)
11921
      try:
11922
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11923
      except:
11924
        self.cfg.ReleaseDRBDMinors(instance.name)
11925
        raise
11926
      result.append(("disk_template", self.op.disk_template))
11927

    
11928
      assert instance.disk_template == self.op.disk_template, \
11929
        ("Expected disk template '%s', found '%s'" %
11930
         (self.op.disk_template, instance.disk_template))
11931

    
11932
    # Release node and resource locks if there are any (they might already have
11933
    # been released during disk conversion)
11934
    _ReleaseLocks(self, locking.LEVEL_NODE)
11935
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11936

    
11937
    # NIC changes
11938
    for nic_op, nic_dict in self.op.nics:
11939
      if nic_op == constants.DDM_REMOVE:
11940
        # remove the last nic
11941
        del instance.nics[-1]
11942
        result.append(("nic.%d" % len(instance.nics), "remove"))
11943
      elif nic_op == constants.DDM_ADD:
11944
        # mac and bridge should be set, by now
11945
        mac = nic_dict[constants.INIC_MAC]
11946
        ip = nic_dict.get(constants.INIC_IP, None)
11947
        nicparams = self.nic_pinst[constants.DDM_ADD]
11948
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11949
        instance.nics.append(new_nic)
11950
        result.append(("nic.%d" % (len(instance.nics) - 1),
11951
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11952
                       (new_nic.mac, new_nic.ip,
11953
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11954
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11955
                       )))
11956
      else:
11957
        for key in (constants.INIC_MAC, constants.INIC_IP):
11958
          if key in nic_dict:
11959
            setattr(instance.nics[nic_op], key, nic_dict[key])
11960
        if nic_op in self.nic_pinst:
11961
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11962
        for key, val in nic_dict.iteritems():
11963
          result.append(("nic.%s/%d" % (key, nic_op), val))
11964

    
11965
    # hvparams changes
11966
    if self.op.hvparams:
11967
      instance.hvparams = self.hv_inst
11968
      for key, val in self.op.hvparams.iteritems():
11969
        result.append(("hv/%s" % key, val))
11970

    
11971
    # beparams changes
11972
    if self.op.beparams:
11973
      instance.beparams = self.be_inst
11974
      for key, val in self.op.beparams.iteritems():
11975
        result.append(("be/%s" % key, val))
11976

    
11977
    # OS change
11978
    if self.op.os_name:
11979
      instance.os = self.op.os_name
11980

    
11981
    # osparams changes
11982
    if self.op.osparams:
11983
      instance.osparams = self.os_inst
11984
      for key, val in self.op.osparams.iteritems():
11985
        result.append(("os/%s" % key, val))
11986

    
11987
    # online/offline instance
11988
    if self.op.online_inst:
11989
      self.cfg.MarkInstanceDown(instance.name)
11990
      result.append(("admin_state", constants.ADMINST_DOWN))
11991
    if self.op.offline_inst:
11992
      self.cfg.MarkInstanceOffline(instance.name)
11993
      result.append(("admin_state", constants.ADMINST_OFFLINE))
11994

    
11995
    self.cfg.Update(instance, feedback_fn)
11996

    
11997
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
11998
                self.owned_locks(locking.LEVEL_NODE)), \
11999
      "All node locks should have been released by now"
12000

    
12001
    return result
12002

    
12003
  _DISK_CONVERSIONS = {
12004
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
12005
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
12006
    }
12007

    
12008

    
12009
class LUInstanceChangeGroup(LogicalUnit):
12010
  HPATH = "instance-change-group"
12011
  HTYPE = constants.HTYPE_INSTANCE
12012
  REQ_BGL = False
12013

    
12014
  def ExpandNames(self):
12015
    self.share_locks = _ShareAll()
12016
    self.needed_locks = {
12017
      locking.LEVEL_NODEGROUP: [],
12018
      locking.LEVEL_NODE: [],
12019
      }
12020

    
12021
    self._ExpandAndLockInstance()
12022

    
12023
    if self.op.target_groups:
12024
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12025
                                  self.op.target_groups)
12026
    else:
12027
      self.req_target_uuids = None
12028

    
12029
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12030

    
12031
  def DeclareLocks(self, level):
12032
    if level == locking.LEVEL_NODEGROUP:
12033
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12034

    
12035
      if self.req_target_uuids:
12036
        lock_groups = set(self.req_target_uuids)
12037

    
12038
        # Lock all groups used by instance optimistically; this requires going
12039
        # via the node before it's locked, requiring verification later on
12040
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
12041
        lock_groups.update(instance_groups)
12042
      else:
12043
        # No target groups, need to lock all of them
12044
        lock_groups = locking.ALL_SET
12045

    
12046
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12047

    
12048
    elif level == locking.LEVEL_NODE:
12049
      if self.req_target_uuids:
12050
        # Lock all nodes used by instances
12051
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12052
        self._LockInstancesNodes()
12053

    
12054
        # Lock all nodes in all potential target groups
12055
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12056
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12057
        member_nodes = [node_name
12058
                        for group in lock_groups
12059
                        for node_name in self.cfg.GetNodeGroup(group).members]
12060
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12061
      else:
12062
        # Lock all nodes as all groups are potential targets
12063
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12064

    
12065
  def CheckPrereq(self):
12066
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12067
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12068
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12069

    
12070
    assert (self.req_target_uuids is None or
12071
            owned_groups.issuperset(self.req_target_uuids))
12072
    assert owned_instances == set([self.op.instance_name])
12073

    
12074
    # Get instance information
12075
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12076

    
12077
    # Check if node groups for locked instance are still correct
12078
    assert owned_nodes.issuperset(self.instance.all_nodes), \
12079
      ("Instance %s's nodes changed while we kept the lock" %
12080
       self.op.instance_name)
12081

    
12082
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12083
                                           owned_groups)
12084

    
12085
    if self.req_target_uuids:
12086
      # User requested specific target groups
12087
      self.target_uuids = self.req_target_uuids
12088
    else:
12089
      # All groups except those used by the instance are potential targets
12090
      self.target_uuids = owned_groups - inst_groups
12091

    
12092
    conflicting_groups = self.target_uuids & inst_groups
12093
    if conflicting_groups:
12094
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12095
                                 " used by the instance '%s'" %
12096
                                 (utils.CommaJoin(conflicting_groups),
12097
                                  self.op.instance_name),
12098
                                 errors.ECODE_INVAL)
12099

    
12100
    if not self.target_uuids:
12101
      raise errors.OpPrereqError("There are no possible target groups",
12102
                                 errors.ECODE_INVAL)
12103

    
12104
  def BuildHooksEnv(self):
12105
    """Build hooks env.
12106

12107
    """
12108
    assert self.target_uuids
12109

    
12110
    env = {
12111
      "TARGET_GROUPS": " ".join(self.target_uuids),
12112
      }
12113

    
12114
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12115

    
12116
    return env
12117

    
12118
  def BuildHooksNodes(self):
12119
    """Build hooks nodes.
12120

12121
    """
12122
    mn = self.cfg.GetMasterNode()
12123
    return ([mn], [mn])
12124

    
12125
  def Exec(self, feedback_fn):
12126
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12127

    
12128
    assert instances == [self.op.instance_name], "Instance not locked"
12129

    
12130
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12131
                     instances=instances, target_groups=list(self.target_uuids))
12132

    
12133
    ial.Run(self.op.iallocator)
12134

    
12135
    if not ial.success:
12136
      raise errors.OpPrereqError("Can't compute solution for changing group of"
12137
                                 " instance '%s' using iallocator '%s': %s" %
12138
                                 (self.op.instance_name, self.op.iallocator,
12139
                                  ial.info),
12140
                                 errors.ECODE_NORES)
12141

    
12142
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12143

    
12144
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
12145
                 " instance '%s'", len(jobs), self.op.instance_name)
12146

    
12147
    return ResultWithJobs(jobs)
12148

    
12149

    
12150
class LUBackupQuery(NoHooksLU):
12151
  """Query the exports list
12152

12153
  """
12154
  REQ_BGL = False
12155

    
12156
  def ExpandNames(self):
12157
    self.needed_locks = {}
12158
    self.share_locks[locking.LEVEL_NODE] = 1
12159
    if not self.op.nodes:
12160
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12161
    else:
12162
      self.needed_locks[locking.LEVEL_NODE] = \
12163
        _GetWantedNodes(self, self.op.nodes)
12164

    
12165
  def Exec(self, feedback_fn):
12166
    """Compute the list of all the exported system images.
12167

12168
    @rtype: dict
12169
    @return: a dictionary with the structure node->(export-list)
12170
        where export-list is a list of the instances exported on
12171
        that node.
12172

12173
    """
12174
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
12175
    rpcresult = self.rpc.call_export_list(self.nodes)
12176
    result = {}
12177
    for node in rpcresult:
12178
      if rpcresult[node].fail_msg:
12179
        result[node] = False
12180
      else:
12181
        result[node] = rpcresult[node].payload
12182

    
12183
    return result
12184

    
12185

    
12186
class LUBackupPrepare(NoHooksLU):
12187
  """Prepares an instance for an export and returns useful information.
12188

12189
  """
12190
  REQ_BGL = False
12191

    
12192
  def ExpandNames(self):
12193
    self._ExpandAndLockInstance()
12194

    
12195
  def CheckPrereq(self):
12196
    """Check prerequisites.
12197

12198
    """
12199
    instance_name = self.op.instance_name
12200

    
12201
    self.instance = self.cfg.GetInstanceInfo(instance_name)
12202
    assert self.instance is not None, \
12203
          "Cannot retrieve locked instance %s" % self.op.instance_name
12204
    _CheckNodeOnline(self, self.instance.primary_node)
12205

    
12206
    self._cds = _GetClusterDomainSecret()
12207

    
12208
  def Exec(self, feedback_fn):
12209
    """Prepares an instance for an export.
12210

12211
    """
12212
    instance = self.instance
12213

    
12214
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
12215
      salt = utils.GenerateSecret(8)
12216

    
12217
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12218
      result = self.rpc.call_x509_cert_create(instance.primary_node,
12219
                                              constants.RIE_CERT_VALIDITY)
12220
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
12221

    
12222
      (name, cert_pem) = result.payload
12223

    
12224
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12225
                                             cert_pem)
12226

    
12227
      return {
12228
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12229
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12230
                          salt),
12231
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12232
        }
12233

    
12234
    return None
12235

    
12236

    
12237
class LUBackupExport(LogicalUnit):
12238
  """Export an instance to an image in the cluster.
12239

12240
  """
12241
  HPATH = "instance-export"
12242
  HTYPE = constants.HTYPE_INSTANCE
12243
  REQ_BGL = False
12244

    
12245
  def CheckArguments(self):
12246
    """Check the arguments.
12247

12248
    """
12249
    self.x509_key_name = self.op.x509_key_name
12250
    self.dest_x509_ca_pem = self.op.destination_x509_ca
12251

    
12252
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
12253
      if not self.x509_key_name:
12254
        raise errors.OpPrereqError("Missing X509 key name for encryption",
12255
                                   errors.ECODE_INVAL)
12256

    
12257
      if not self.dest_x509_ca_pem:
12258
        raise errors.OpPrereqError("Missing destination X509 CA",
12259
                                   errors.ECODE_INVAL)
12260

    
12261
  def ExpandNames(self):
12262
    self._ExpandAndLockInstance()
12263

    
12264
    # Lock all nodes for local exports
12265
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12266
      # FIXME: lock only instance primary and destination node
12267
      #
12268
      # Sad but true, for now we have do lock all nodes, as we don't know where
12269
      # the previous export might be, and in this LU we search for it and
12270
      # remove it from its current node. In the future we could fix this by:
12271
      #  - making a tasklet to search (share-lock all), then create the
12272
      #    new one, then one to remove, after
12273
      #  - removing the removal operation altogether
12274
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12275

    
12276
  def DeclareLocks(self, level):
12277
    """Last minute lock declaration."""
12278
    # All nodes are locked anyway, so nothing to do here.
12279

    
12280
  def BuildHooksEnv(self):
12281
    """Build hooks env.
12282

12283
    This will run on the master, primary node and target node.
12284

12285
    """
12286
    env = {
12287
      "EXPORT_MODE": self.op.mode,
12288
      "EXPORT_NODE": self.op.target_node,
12289
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12290
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12291
      # TODO: Generic function for boolean env variables
12292
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12293
      }
12294

    
12295
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12296

    
12297
    return env
12298

    
12299
  def BuildHooksNodes(self):
12300
    """Build hooks nodes.
12301

12302
    """
12303
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12304

    
12305
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12306
      nl.append(self.op.target_node)
12307

    
12308
    return (nl, nl)
12309

    
12310
  def CheckPrereq(self):
12311
    """Check prerequisites.
12312

12313
    This checks that the instance and node names are valid.
12314

12315
    """
12316
    instance_name = self.op.instance_name
12317

    
12318
    self.instance = self.cfg.GetInstanceInfo(instance_name)
12319
    assert self.instance is not None, \
12320
          "Cannot retrieve locked instance %s" % self.op.instance_name
12321
    _CheckNodeOnline(self, self.instance.primary_node)
12322

    
12323
    if (self.op.remove_instance and
12324
        self.instance.admin_state == constants.ADMINST_UP and
12325
        not self.op.shutdown):
12326
      raise errors.OpPrereqError("Can not remove instance without shutting it"
12327
                                 " down before")
12328

    
12329
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12330
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12331
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12332
      assert self.dst_node is not None
12333

    
12334
      _CheckNodeOnline(self, self.dst_node.name)
12335
      _CheckNodeNotDrained(self, self.dst_node.name)
12336

    
12337
      self._cds = None
12338
      self.dest_disk_info = None
12339
      self.dest_x509_ca = None
12340

    
12341
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12342
      self.dst_node = None
12343

    
12344
      if len(self.op.target_node) != len(self.instance.disks):
12345
        raise errors.OpPrereqError(("Received destination information for %s"
12346
                                    " disks, but instance %s has %s disks") %
12347
                                   (len(self.op.target_node), instance_name,
12348
                                    len(self.instance.disks)),
12349
                                   errors.ECODE_INVAL)
12350

    
12351
      cds = _GetClusterDomainSecret()
12352

    
12353
      # Check X509 key name
12354
      try:
12355
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12356
      except (TypeError, ValueError), err:
12357
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12358

    
12359
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12360
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12361
                                   errors.ECODE_INVAL)
12362

    
12363
      # Load and verify CA
12364
      try:
12365
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12366
      except OpenSSL.crypto.Error, err:
12367
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12368
                                   (err, ), errors.ECODE_INVAL)
12369

    
12370
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12371
      if errcode is not None:
12372
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12373
                                   (msg, ), errors.ECODE_INVAL)
12374

    
12375
      self.dest_x509_ca = cert
12376

    
12377
      # Verify target information
12378
      disk_info = []
12379
      for idx, disk_data in enumerate(self.op.target_node):
12380
        try:
12381
          (host, port, magic) = \
12382
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12383
        except errors.GenericError, err:
12384
          raise errors.OpPrereqError("Target info for disk %s: %s" %
12385
                                     (idx, err), errors.ECODE_INVAL)
12386

    
12387
        disk_info.append((host, port, magic))
12388

    
12389
      assert len(disk_info) == len(self.op.target_node)
12390
      self.dest_disk_info = disk_info
12391

    
12392
    else:
12393
      raise errors.ProgrammerError("Unhandled export mode %r" %
12394
                                   self.op.mode)
12395

    
12396
    # instance disk type verification
12397
    # TODO: Implement export support for file-based disks
12398
    for disk in self.instance.disks:
12399
      if disk.dev_type == constants.LD_FILE:
12400
        raise errors.OpPrereqError("Export not supported for instances with"
12401
                                   " file-based disks", errors.ECODE_INVAL)
12402

    
12403
  def _CleanupExports(self, feedback_fn):
12404
    """Removes exports of current instance from all other nodes.
12405

12406
    If an instance in a cluster with nodes A..D was exported to node C, its
12407
    exports will be removed from the nodes A, B and D.
12408

12409
    """
12410
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
12411

    
12412
    nodelist = self.cfg.GetNodeList()
12413
    nodelist.remove(self.dst_node.name)
12414

    
12415
    # on one-node clusters nodelist will be empty after the removal
12416
    # if we proceed the backup would be removed because OpBackupQuery
12417
    # substitutes an empty list with the full cluster node list.
12418
    iname = self.instance.name
12419
    if nodelist:
12420
      feedback_fn("Removing old exports for instance %s" % iname)
12421
      exportlist = self.rpc.call_export_list(nodelist)
12422
      for node in exportlist:
12423
        if exportlist[node].fail_msg:
12424
          continue
12425
        if iname in exportlist[node].payload:
12426
          msg = self.rpc.call_export_remove(node, iname).fail_msg
12427
          if msg:
12428
            self.LogWarning("Could not remove older export for instance %s"
12429
                            " on node %s: %s", iname, node, msg)
12430

    
12431
  def Exec(self, feedback_fn):
12432
    """Export an instance to an image in the cluster.
12433

12434
    """
12435
    assert self.op.mode in constants.EXPORT_MODES
12436

    
12437
    instance = self.instance
12438
    src_node = instance.primary_node
12439

    
12440
    if self.op.shutdown:
12441
      # shutdown the instance, but not the disks
12442
      feedback_fn("Shutting down instance %s" % instance.name)
12443
      result = self.rpc.call_instance_shutdown(src_node, instance,
12444
                                               self.op.shutdown_timeout)
12445
      # TODO: Maybe ignore failures if ignore_remove_failures is set
12446
      result.Raise("Could not shutdown instance %s on"
12447
                   " node %s" % (instance.name, src_node))
12448

    
12449
    # set the disks ID correctly since call_instance_start needs the
12450
    # correct drbd minor to create the symlinks
12451
    for disk in instance.disks:
12452
      self.cfg.SetDiskID(disk, src_node)
12453

    
12454
    activate_disks = (instance.admin_state != constants.ADMINST_UP)
12455

    
12456
    if activate_disks:
12457
      # Activate the instance disks if we'exporting a stopped instance
12458
      feedback_fn("Activating disks for %s" % instance.name)
12459
      _StartInstanceDisks(self, instance, None)
12460

    
12461
    try:
12462
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12463
                                                     instance)
12464

    
12465
      helper.CreateSnapshots()
12466
      try:
12467
        if (self.op.shutdown and
12468
            instance.admin_state == constants.ADMINST_UP and
12469
            not self.op.remove_instance):
12470
          assert not activate_disks
12471
          feedback_fn("Starting instance %s" % instance.name)
12472
          result = self.rpc.call_instance_start(src_node,
12473
                                                (instance, None, None), False)
12474
          msg = result.fail_msg
12475
          if msg:
12476
            feedback_fn("Failed to start instance: %s" % msg)
12477
            _ShutdownInstanceDisks(self, instance)
12478
            raise errors.OpExecError("Could not start instance: %s" % msg)
12479

    
12480
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
12481
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12482
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12483
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
12484
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12485

    
12486
          (key_name, _, _) = self.x509_key_name
12487

    
12488
          dest_ca_pem = \
12489
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12490
                                            self.dest_x509_ca)
12491

    
12492
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12493
                                                     key_name, dest_ca_pem,
12494
                                                     timeouts)
12495
      finally:
12496
        helper.Cleanup()
12497

    
12498
      # Check for backwards compatibility
12499
      assert len(dresults) == len(instance.disks)
12500
      assert compat.all(isinstance(i, bool) for i in dresults), \
12501
             "Not all results are boolean: %r" % dresults
12502

    
12503
    finally:
12504
      if activate_disks:
12505
        feedback_fn("Deactivating disks for %s" % instance.name)
12506
        _ShutdownInstanceDisks(self, instance)
12507

    
12508
    if not (compat.all(dresults) and fin_resu):
12509
      failures = []
12510
      if not fin_resu:
12511
        failures.append("export finalization")
12512
      if not compat.all(dresults):
12513
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12514
                               if not dsk)
12515
        failures.append("disk export: disk(s) %s" % fdsk)
12516

    
12517
      raise errors.OpExecError("Export failed, errors in %s" %
12518
                               utils.CommaJoin(failures))
12519

    
12520
    # At this point, the export was successful, we can cleanup/finish
12521

    
12522
    # Remove instance if requested
12523
    if self.op.remove_instance:
12524
      feedback_fn("Removing instance %s" % instance.name)
12525
      _RemoveInstance(self, feedback_fn, instance,
12526
                      self.op.ignore_remove_failures)
12527

    
12528
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12529
      self._CleanupExports(feedback_fn)
12530

    
12531
    return fin_resu, dresults
12532

    
12533

    
12534
class LUBackupRemove(NoHooksLU):
12535
  """Remove exports related to the named instance.
12536

12537
  """
12538
  REQ_BGL = False
12539

    
12540
  def ExpandNames(self):
12541
    self.needed_locks = {}
12542
    # We need all nodes to be locked in order for RemoveExport to work, but we
12543
    # don't need to lock the instance itself, as nothing will happen to it (and
12544
    # we can remove exports also for a removed instance)
12545
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12546

    
12547
  def Exec(self, feedback_fn):
12548
    """Remove any export.
12549

12550
    """
12551
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12552
    # If the instance was not found we'll try with the name that was passed in.
12553
    # This will only work if it was an FQDN, though.
12554
    fqdn_warn = False
12555
    if not instance_name:
12556
      fqdn_warn = True
12557
      instance_name = self.op.instance_name
12558

    
12559
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12560
    exportlist = self.rpc.call_export_list(locked_nodes)
12561
    found = False
12562
    for node in exportlist:
12563
      msg = exportlist[node].fail_msg
12564
      if msg:
12565
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12566
        continue
12567
      if instance_name in exportlist[node].payload:
12568
        found = True
12569
        result = self.rpc.call_export_remove(node, instance_name)
12570
        msg = result.fail_msg
12571
        if msg:
12572
          logging.error("Could not remove export for instance %s"
12573
                        " on node %s: %s", instance_name, node, msg)
12574

    
12575
    if fqdn_warn and not found:
12576
      feedback_fn("Export not found. If trying to remove an export belonging"
12577
                  " to a deleted instance please use its Fully Qualified"
12578
                  " Domain Name.")
12579

    
12580

    
12581
class LUGroupAdd(LogicalUnit):
12582
  """Logical unit for creating node groups.
12583

12584
  """
12585
  HPATH = "group-add"
12586
  HTYPE = constants.HTYPE_GROUP
12587
  REQ_BGL = False
12588

    
12589
  def ExpandNames(self):
12590
    # We need the new group's UUID here so that we can create and acquire the
12591
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12592
    # that it should not check whether the UUID exists in the configuration.
12593
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12594
    self.needed_locks = {}
12595
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12596

    
12597
  def CheckPrereq(self):
12598
    """Check prerequisites.
12599

12600
    This checks that the given group name is not an existing node group
12601
    already.
12602

12603
    """
12604
    try:
12605
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12606
    except errors.OpPrereqError:
12607
      pass
12608
    else:
12609
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12610
                                 " node group (UUID: %s)" %
12611
                                 (self.op.group_name, existing_uuid),
12612
                                 errors.ECODE_EXISTS)
12613

    
12614
    if self.op.ndparams:
12615
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12616

    
12617
    if self.op.diskparams:
12618
      for templ in constants.DISK_TEMPLATES:
12619
        if templ not in self.op.diskparams:
12620
          self.op.diskparams[templ] = {}
12621
        utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12622
    else:
12623
      self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12624

    
12625
  def BuildHooksEnv(self):
12626
    """Build hooks env.
12627

12628
    """
12629
    return {
12630
      "GROUP_NAME": self.op.group_name,
12631
      }
12632

    
12633
  def BuildHooksNodes(self):
12634
    """Build hooks nodes.
12635

12636
    """
12637
    mn = self.cfg.GetMasterNode()
12638
    return ([mn], [mn])
12639

    
12640
  def Exec(self, feedback_fn):
12641
    """Add the node group to the cluster.
12642

12643
    """
12644
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12645
                                  uuid=self.group_uuid,
12646
                                  alloc_policy=self.op.alloc_policy,
12647
                                  ndparams=self.op.ndparams,
12648
                                  diskparams=self.op.diskparams)
12649

    
12650
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12651
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12652

    
12653

    
12654
class LUGroupAssignNodes(NoHooksLU):
12655
  """Logical unit for assigning nodes to groups.
12656

12657
  """
12658
  REQ_BGL = False
12659

    
12660
  def ExpandNames(self):
12661
    # These raise errors.OpPrereqError on their own:
12662
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12663
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12664

    
12665
    # We want to lock all the affected nodes and groups. We have readily
12666
    # available the list of nodes, and the *destination* group. To gather the
12667
    # list of "source" groups, we need to fetch node information later on.
12668
    self.needed_locks = {
12669
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12670
      locking.LEVEL_NODE: self.op.nodes,
12671
      }
12672

    
12673
  def DeclareLocks(self, level):
12674
    if level == locking.LEVEL_NODEGROUP:
12675
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12676

    
12677
      # Try to get all affected nodes' groups without having the group or node
12678
      # lock yet. Needs verification later in the code flow.
12679
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12680

    
12681
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12682

    
12683
  def CheckPrereq(self):
12684
    """Check prerequisites.
12685

12686
    """
12687
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12688
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12689
            frozenset(self.op.nodes))
12690

    
12691
    expected_locks = (set([self.group_uuid]) |
12692
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12693
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12694
    if actual_locks != expected_locks:
12695
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12696
                               " current groups are '%s', used to be '%s'" %
12697
                               (utils.CommaJoin(expected_locks),
12698
                                utils.CommaJoin(actual_locks)))
12699

    
12700
    self.node_data = self.cfg.GetAllNodesInfo()
12701
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12702
    instance_data = self.cfg.GetAllInstancesInfo()
12703

    
12704
    if self.group is None:
12705
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12706
                               (self.op.group_name, self.group_uuid))
12707

    
12708
    (new_splits, previous_splits) = \
12709
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12710
                                             for node in self.op.nodes],
12711
                                            self.node_data, instance_data)
12712

    
12713
    if new_splits:
12714
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12715

    
12716
      if not self.op.force:
12717
        raise errors.OpExecError("The following instances get split by this"
12718
                                 " change and --force was not given: %s" %
12719
                                 fmt_new_splits)
12720
      else:
12721
        self.LogWarning("This operation will split the following instances: %s",
12722
                        fmt_new_splits)
12723

    
12724
        if previous_splits:
12725
          self.LogWarning("In addition, these already-split instances continue"
12726
                          " to be split across groups: %s",
12727
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12728

    
12729
  def Exec(self, feedback_fn):
12730
    """Assign nodes to a new group.
12731

12732
    """
12733
    mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12734

    
12735
    self.cfg.AssignGroupNodes(mods)
12736

    
12737
  @staticmethod
12738
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12739
    """Check for split instances after a node assignment.
12740

12741
    This method considers a series of node assignments as an atomic operation,
12742
    and returns information about split instances after applying the set of
12743
    changes.
12744

12745
    In particular, it returns information about newly split instances, and
12746
    instances that were already split, and remain so after the change.
12747

12748
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12749
    considered.
12750

12751
    @type changes: list of (node_name, new_group_uuid) pairs.
12752
    @param changes: list of node assignments to consider.
12753
    @param node_data: a dict with data for all nodes
12754
    @param instance_data: a dict with all instances to consider
12755
    @rtype: a two-tuple
12756
    @return: a list of instances that were previously okay and result split as a
12757
      consequence of this change, and a list of instances that were previously
12758
      split and this change does not fix.
12759

12760
    """
12761
    changed_nodes = dict((node, group) for node, group in changes
12762
                         if node_data[node].group != group)
12763

    
12764
    all_split_instances = set()
12765
    previously_split_instances = set()
12766

    
12767
    def InstanceNodes(instance):
12768
      return [instance.primary_node] + list(instance.secondary_nodes)
12769

    
12770
    for inst in instance_data.values():
12771
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12772
        continue
12773

    
12774
      instance_nodes = InstanceNodes(inst)
12775

    
12776
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12777
        previously_split_instances.add(inst.name)
12778

    
12779
      if len(set(changed_nodes.get(node, node_data[node].group)
12780
                 for node in instance_nodes)) > 1:
12781
        all_split_instances.add(inst.name)
12782

    
12783
    return (list(all_split_instances - previously_split_instances),
12784
            list(previously_split_instances & all_split_instances))
12785

    
12786

    
12787
class _GroupQuery(_QueryBase):
12788
  FIELDS = query.GROUP_FIELDS
12789

    
12790
  def ExpandNames(self, lu):
12791
    lu.needed_locks = {}
12792

    
12793
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12794
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12795

    
12796
    if not self.names:
12797
      self.wanted = [name_to_uuid[name]
12798
                     for name in utils.NiceSort(name_to_uuid.keys())]
12799
    else:
12800
      # Accept names to be either names or UUIDs.
12801
      missing = []
12802
      self.wanted = []
12803
      all_uuid = frozenset(self._all_groups.keys())
12804

    
12805
      for name in self.names:
12806
        if name in all_uuid:
12807
          self.wanted.append(name)
12808
        elif name in name_to_uuid:
12809
          self.wanted.append(name_to_uuid[name])
12810
        else:
12811
          missing.append(name)
12812

    
12813
      if missing:
12814
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12815
                                   utils.CommaJoin(missing),
12816
                                   errors.ECODE_NOENT)
12817

    
12818
  def DeclareLocks(self, lu, level):
12819
    pass
12820

    
12821
  def _GetQueryData(self, lu):
12822
    """Computes the list of node groups and their attributes.
12823

12824
    """
12825
    do_nodes = query.GQ_NODE in self.requested_data
12826
    do_instances = query.GQ_INST in self.requested_data
12827

    
12828
    group_to_nodes = None
12829
    group_to_instances = None
12830

    
12831
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12832
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12833
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12834
    # instance->node. Hence, we will need to process nodes even if we only need
12835
    # instance information.
12836
    if do_nodes or do_instances:
12837
      all_nodes = lu.cfg.GetAllNodesInfo()
12838
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12839
      node_to_group = {}
12840

    
12841
      for node in all_nodes.values():
12842
        if node.group in group_to_nodes:
12843
          group_to_nodes[node.group].append(node.name)
12844
          node_to_group[node.name] = node.group
12845

    
12846
      if do_instances:
12847
        all_instances = lu.cfg.GetAllInstancesInfo()
12848
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12849

    
12850
        for instance in all_instances.values():
12851
          node = instance.primary_node
12852
          if node in node_to_group:
12853
            group_to_instances[node_to_group[node]].append(instance.name)
12854

    
12855
        if not do_nodes:
12856
          # Do not pass on node information if it was not requested.
12857
          group_to_nodes = None
12858

    
12859
    return query.GroupQueryData([self._all_groups[uuid]
12860
                                 for uuid in self.wanted],
12861
                                group_to_nodes, group_to_instances)
12862

    
12863

    
12864
class LUGroupQuery(NoHooksLU):
12865
  """Logical unit for querying node groups.
12866

12867
  """
12868
  REQ_BGL = False
12869

    
12870
  def CheckArguments(self):
12871
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12872
                          self.op.output_fields, False)
12873

    
12874
  def ExpandNames(self):
12875
    self.gq.ExpandNames(self)
12876

    
12877
  def DeclareLocks(self, level):
12878
    self.gq.DeclareLocks(self, level)
12879

    
12880
  def Exec(self, feedback_fn):
12881
    return self.gq.OldStyleQuery(self)
12882

    
12883

    
12884
class LUGroupSetParams(LogicalUnit):
12885
  """Modifies the parameters of a node group.
12886

12887
  """
12888
  HPATH = "group-modify"
12889
  HTYPE = constants.HTYPE_GROUP
12890
  REQ_BGL = False
12891

    
12892
  def CheckArguments(self):
12893
    all_changes = [
12894
      self.op.ndparams,
12895
      self.op.diskparams,
12896
      self.op.alloc_policy,
12897
      ]
12898

    
12899
    if all_changes.count(None) == len(all_changes):
12900
      raise errors.OpPrereqError("Please pass at least one modification",
12901
                                 errors.ECODE_INVAL)
12902

    
12903
  def ExpandNames(self):
12904
    # This raises errors.OpPrereqError on its own:
12905
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12906

    
12907
    self.needed_locks = {
12908
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12909
      }
12910

    
12911
  def CheckPrereq(self):
12912
    """Check prerequisites.
12913

12914
    """
12915
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12916

    
12917
    if self.group is None:
12918
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12919
                               (self.op.group_name, self.group_uuid))
12920

    
12921
    if self.op.ndparams:
12922
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12923
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12924
      self.new_ndparams = new_ndparams
12925

    
12926
    if self.op.diskparams:
12927
      self.new_diskparams = dict()
12928
      for templ in constants.DISK_TEMPLATES:
12929
        if templ not in self.op.diskparams:
12930
          self.op.diskparams[templ] = {}
12931
        new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
12932
                                             self.op.diskparams[templ])
12933
        utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
12934
        self.new_diskparams[templ] = new_templ_params
12935

    
12936
  def BuildHooksEnv(self):
12937
    """Build hooks env.
12938

12939
    """
12940
    return {
12941
      "GROUP_NAME": self.op.group_name,
12942
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12943
      }
12944

    
12945
  def BuildHooksNodes(self):
12946
    """Build hooks nodes.
12947

12948
    """
12949
    mn = self.cfg.GetMasterNode()
12950
    return ([mn], [mn])
12951

    
12952
  def Exec(self, feedback_fn):
12953
    """Modifies the node group.
12954

12955
    """
12956
    result = []
12957

    
12958
    if self.op.ndparams:
12959
      self.group.ndparams = self.new_ndparams
12960
      result.append(("ndparams", str(self.group.ndparams)))
12961

    
12962
    if self.op.diskparams:
12963
      self.group.diskparams = self.new_diskparams
12964
      result.append(("diskparams", str(self.group.diskparams)))
12965

    
12966
    if self.op.alloc_policy:
12967
      self.group.alloc_policy = self.op.alloc_policy
12968

    
12969
    self.cfg.Update(self.group, feedback_fn)
12970
    return result
12971

    
12972

    
12973
class LUGroupRemove(LogicalUnit):
12974
  HPATH = "group-remove"
12975
  HTYPE = constants.HTYPE_GROUP
12976
  REQ_BGL = False
12977

    
12978
  def ExpandNames(self):
12979
    # This will raises errors.OpPrereqError on its own:
12980
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12981
    self.needed_locks = {
12982
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12983
      }
12984

    
12985
  def CheckPrereq(self):
12986
    """Check prerequisites.
12987

12988
    This checks that the given group name exists as a node group, that is
12989
    empty (i.e., contains no nodes), and that is not the last group of the
12990
    cluster.
12991

12992
    """
12993
    # Verify that the group is empty.
12994
    group_nodes = [node.name
12995
                   for node in self.cfg.GetAllNodesInfo().values()
12996
                   if node.group == self.group_uuid]
12997

    
12998
    if group_nodes:
12999
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
13000
                                 " nodes: %s" %
13001
                                 (self.op.group_name,
13002
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
13003
                                 errors.ECODE_STATE)
13004

    
13005
    # Verify the cluster would not be left group-less.
13006
    if len(self.cfg.GetNodeGroupList()) == 1:
13007
      raise errors.OpPrereqError("Group '%s' is the only group,"
13008
                                 " cannot be removed" %
13009
                                 self.op.group_name,
13010
                                 errors.ECODE_STATE)
13011

    
13012
  def BuildHooksEnv(self):
13013
    """Build hooks env.
13014

13015
    """
13016
    return {
13017
      "GROUP_NAME": self.op.group_name,
13018
      }
13019

    
13020
  def BuildHooksNodes(self):
13021
    """Build hooks nodes.
13022

13023
    """
13024
    mn = self.cfg.GetMasterNode()
13025
    return ([mn], [mn])
13026

    
13027
  def Exec(self, feedback_fn):
13028
    """Remove the node group.
13029

13030
    """
13031
    try:
13032
      self.cfg.RemoveNodeGroup(self.group_uuid)
13033
    except errors.ConfigurationError:
13034
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
13035
                               (self.op.group_name, self.group_uuid))
13036

    
13037
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
13038

    
13039

    
13040
class LUGroupRename(LogicalUnit):
13041
  HPATH = "group-rename"
13042
  HTYPE = constants.HTYPE_GROUP
13043
  REQ_BGL = False
13044

    
13045
  def ExpandNames(self):
13046
    # This raises errors.OpPrereqError on its own:
13047
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13048

    
13049
    self.needed_locks = {
13050
      locking.LEVEL_NODEGROUP: [self.group_uuid],
13051
      }
13052

    
13053
  def CheckPrereq(self):
13054
    """Check prerequisites.
13055

13056
    Ensures requested new name is not yet used.
13057

13058
    """
13059
    try:
13060
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13061
    except errors.OpPrereqError:
13062
      pass
13063
    else:
13064
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13065
                                 " node group (UUID: %s)" %
13066
                                 (self.op.new_name, new_name_uuid),
13067
                                 errors.ECODE_EXISTS)
13068

    
13069
  def BuildHooksEnv(self):
13070
    """Build hooks env.
13071

13072
    """
13073
    return {
13074
      "OLD_NAME": self.op.group_name,
13075
      "NEW_NAME": self.op.new_name,
13076
      }
13077

    
13078
  def BuildHooksNodes(self):
13079
    """Build hooks nodes.
13080

13081
    """
13082
    mn = self.cfg.GetMasterNode()
13083

    
13084
    all_nodes = self.cfg.GetAllNodesInfo()
13085
    all_nodes.pop(mn, None)
13086

    
13087
    run_nodes = [mn]
13088
    run_nodes.extend(node.name for node in all_nodes.values()
13089
                     if node.group == self.group_uuid)
13090

    
13091
    return (run_nodes, run_nodes)
13092

    
13093
  def Exec(self, feedback_fn):
13094
    """Rename the node group.
13095

13096
    """
13097
    group = self.cfg.GetNodeGroup(self.group_uuid)
13098

    
13099
    if group is None:
13100
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13101
                               (self.op.group_name, self.group_uuid))
13102

    
13103
    group.name = self.op.new_name
13104
    self.cfg.Update(group, feedback_fn)
13105

    
13106
    return self.op.new_name
13107

    
13108

    
13109
class LUGroupEvacuate(LogicalUnit):
13110
  HPATH = "group-evacuate"
13111
  HTYPE = constants.HTYPE_GROUP
13112
  REQ_BGL = False
13113

    
13114
  def ExpandNames(self):
13115
    # This raises errors.OpPrereqError on its own:
13116
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13117

    
13118
    if self.op.target_groups:
13119
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13120
                                  self.op.target_groups)
13121
    else:
13122
      self.req_target_uuids = []
13123

    
13124
    if self.group_uuid in self.req_target_uuids:
13125
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13126
                                 " as a target group (targets are %s)" %
13127
                                 (self.group_uuid,
13128
                                  utils.CommaJoin(self.req_target_uuids)),
13129
                                 errors.ECODE_INVAL)
13130

    
13131
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13132

    
13133
    self.share_locks = _ShareAll()
13134
    self.needed_locks = {
13135
      locking.LEVEL_INSTANCE: [],
13136
      locking.LEVEL_NODEGROUP: [],
13137
      locking.LEVEL_NODE: [],
13138
      }
13139

    
13140
  def DeclareLocks(self, level):
13141
    if level == locking.LEVEL_INSTANCE:
13142
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
13143

    
13144
      # Lock instances optimistically, needs verification once node and group
13145
      # locks have been acquired
13146
      self.needed_locks[locking.LEVEL_INSTANCE] = \
13147
        self.cfg.GetNodeGroupInstances(self.group_uuid)
13148

    
13149
    elif level == locking.LEVEL_NODEGROUP:
13150
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13151

    
13152
      if self.req_target_uuids:
13153
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
13154

    
13155
        # Lock all groups used by instances optimistically; this requires going
13156
        # via the node before it's locked, requiring verification later on
13157
        lock_groups.update(group_uuid
13158
                           for instance_name in
13159
                             self.owned_locks(locking.LEVEL_INSTANCE)
13160
                           for group_uuid in
13161
                             self.cfg.GetInstanceNodeGroups(instance_name))
13162
      else:
13163
        # No target groups, need to lock all of them
13164
        lock_groups = locking.ALL_SET
13165

    
13166
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13167

    
13168
    elif level == locking.LEVEL_NODE:
13169
      # This will only lock the nodes in the group to be evacuated which
13170
      # contain actual instances
13171
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13172
      self._LockInstancesNodes()
13173

    
13174
      # Lock all nodes in group to be evacuated and target groups
13175
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13176
      assert self.group_uuid in owned_groups
13177
      member_nodes = [node_name
13178
                      for group in owned_groups
13179
                      for node_name in self.cfg.GetNodeGroup(group).members]
13180
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13181

    
13182
  def CheckPrereq(self):
13183
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13184
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13185
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13186

    
13187
    assert owned_groups.issuperset(self.req_target_uuids)
13188
    assert self.group_uuid in owned_groups
13189

    
13190
    # Check if locked instances are still correct
13191
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13192

    
13193
    # Get instance information
13194
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13195

    
13196
    # Check if node groups for locked instances are still correct
13197
    for instance_name in owned_instances:
13198
      inst = self.instances[instance_name]
13199
      assert owned_nodes.issuperset(inst.all_nodes), \
13200
        "Instance %s's nodes changed while we kept the lock" % instance_name
13201

    
13202
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13203
                                             owned_groups)
13204

    
13205
      assert self.group_uuid in inst_groups, \
13206
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13207

    
13208
    if self.req_target_uuids:
13209
      # User requested specific target groups
13210
      self.target_uuids = self.req_target_uuids
13211
    else:
13212
      # All groups except the one to be evacuated are potential targets
13213
      self.target_uuids = [group_uuid for group_uuid in owned_groups
13214
                           if group_uuid != self.group_uuid]
13215

    
13216
      if not self.target_uuids:
13217
        raise errors.OpPrereqError("There are no possible target groups",
13218
                                   errors.ECODE_INVAL)
13219

    
13220
  def BuildHooksEnv(self):
13221
    """Build hooks env.
13222

13223
    """
13224
    return {
13225
      "GROUP_NAME": self.op.group_name,
13226
      "TARGET_GROUPS": " ".join(self.target_uuids),
13227
      }
13228

    
13229
  def BuildHooksNodes(self):
13230
    """Build hooks nodes.
13231

13232
    """
13233
    mn = self.cfg.GetMasterNode()
13234

    
13235
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13236

    
13237
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13238

    
13239
    return (run_nodes, run_nodes)
13240

    
13241
  def Exec(self, feedback_fn):
13242
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13243

    
13244
    assert self.group_uuid not in self.target_uuids
13245

    
13246
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13247
                     instances=instances, target_groups=self.target_uuids)
13248

    
13249
    ial.Run(self.op.iallocator)
13250

    
13251
    if not ial.success:
13252
      raise errors.OpPrereqError("Can't compute group evacuation using"
13253
                                 " iallocator '%s': %s" %
13254
                                 (self.op.iallocator, ial.info),
13255
                                 errors.ECODE_NORES)
13256

    
13257
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13258

    
13259
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13260
                 len(jobs), self.op.group_name)
13261

    
13262
    return ResultWithJobs(jobs)
13263

    
13264

    
13265
class TagsLU(NoHooksLU): # pylint: disable=W0223
13266
  """Generic tags LU.
13267

13268
  This is an abstract class which is the parent of all the other tags LUs.
13269

13270
  """
13271
  def ExpandNames(self):
13272
    self.group_uuid = None
13273
    self.needed_locks = {}
13274
    if self.op.kind == constants.TAG_NODE:
13275
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13276
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
13277
    elif self.op.kind == constants.TAG_INSTANCE:
13278
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13279
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13280
    elif self.op.kind == constants.TAG_NODEGROUP:
13281
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13282

    
13283
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13284
    # not possible to acquire the BGL based on opcode parameters)
13285

    
13286
  def CheckPrereq(self):
13287
    """Check prerequisites.
13288

13289
    """
13290
    if self.op.kind == constants.TAG_CLUSTER:
13291
      self.target = self.cfg.GetClusterInfo()
13292
    elif self.op.kind == constants.TAG_NODE:
13293
      self.target = self.cfg.GetNodeInfo(self.op.name)
13294
    elif self.op.kind == constants.TAG_INSTANCE:
13295
      self.target = self.cfg.GetInstanceInfo(self.op.name)
13296
    elif self.op.kind == constants.TAG_NODEGROUP:
13297
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
13298
    else:
13299
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13300
                                 str(self.op.kind), errors.ECODE_INVAL)
13301

    
13302

    
13303
class LUTagsGet(TagsLU):
13304
  """Returns the tags of a given object.
13305

13306
  """
13307
  REQ_BGL = False
13308

    
13309
  def ExpandNames(self):
13310
    TagsLU.ExpandNames(self)
13311

    
13312
    # Share locks as this is only a read operation
13313
    self.share_locks = _ShareAll()
13314

    
13315
  def Exec(self, feedback_fn):
13316
    """Returns the tag list.
13317

13318
    """
13319
    return list(self.target.GetTags())
13320

    
13321

    
13322
class LUTagsSearch(NoHooksLU):
13323
  """Searches the tags for a given pattern.
13324

13325
  """
13326
  REQ_BGL = False
13327

    
13328
  def ExpandNames(self):
13329
    self.needed_locks = {}
13330

    
13331
  def CheckPrereq(self):
13332
    """Check prerequisites.
13333

13334
    This checks the pattern passed for validity by compiling it.
13335

13336
    """
13337
    try:
13338
      self.re = re.compile(self.op.pattern)
13339
    except re.error, err:
13340
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13341
                                 (self.op.pattern, err), errors.ECODE_INVAL)
13342

    
13343
  def Exec(self, feedback_fn):
13344
    """Returns the tag list.
13345

13346
    """
13347
    cfg = self.cfg
13348
    tgts = [("/cluster", cfg.GetClusterInfo())]
13349
    ilist = cfg.GetAllInstancesInfo().values()
13350
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13351
    nlist = cfg.GetAllNodesInfo().values()
13352
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13353
    tgts.extend(("/nodegroup/%s" % n.name, n)
13354
                for n in cfg.GetAllNodeGroupsInfo().values())
13355
    results = []
13356
    for path, target in tgts:
13357
      for tag in target.GetTags():
13358
        if self.re.search(tag):
13359
          results.append((path, tag))
13360
    return results
13361

    
13362

    
13363
class LUTagsSet(TagsLU):
13364
  """Sets a tag on a given object.
13365

13366
  """
13367
  REQ_BGL = False
13368

    
13369
  def CheckPrereq(self):
13370
    """Check prerequisites.
13371

13372
    This checks the type and length of the tag name and value.
13373

13374
    """
13375
    TagsLU.CheckPrereq(self)
13376
    for tag in self.op.tags:
13377
      objects.TaggableObject.ValidateTag(tag)
13378

    
13379
  def Exec(self, feedback_fn):
13380
    """Sets the tag.
13381

13382
    """
13383
    try:
13384
      for tag in self.op.tags:
13385
        self.target.AddTag(tag)
13386
    except errors.TagError, err:
13387
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
13388
    self.cfg.Update(self.target, feedback_fn)
13389

    
13390

    
13391
class LUTagsDel(TagsLU):
13392
  """Delete a list of tags from a given object.
13393

13394
  """
13395
  REQ_BGL = False
13396

    
13397
  def CheckPrereq(self):
13398
    """Check prerequisites.
13399

13400
    This checks that we have the given tag.
13401

13402
    """
13403
    TagsLU.CheckPrereq(self)
13404
    for tag in self.op.tags:
13405
      objects.TaggableObject.ValidateTag(tag)
13406
    del_tags = frozenset(self.op.tags)
13407
    cur_tags = self.target.GetTags()
13408

    
13409
    diff_tags = del_tags - cur_tags
13410
    if diff_tags:
13411
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
13412
      raise errors.OpPrereqError("Tag(s) %s not found" %
13413
                                 (utils.CommaJoin(diff_names), ),
13414
                                 errors.ECODE_NOENT)
13415

    
13416
  def Exec(self, feedback_fn):
13417
    """Remove the tag from the object.
13418

13419
    """
13420
    for tag in self.op.tags:
13421
      self.target.RemoveTag(tag)
13422
    self.cfg.Update(self.target, feedback_fn)
13423

    
13424

    
13425
class LUTestDelay(NoHooksLU):
13426
  """Sleep for a specified amount of time.
13427

13428
  This LU sleeps on the master and/or nodes for a specified amount of
13429
  time.
13430

13431
  """
13432
  REQ_BGL = False
13433

    
13434
  def ExpandNames(self):
13435
    """Expand names and set required locks.
13436

13437
    This expands the node list, if any.
13438

13439
    """
13440
    self.needed_locks = {}
13441
    if self.op.on_nodes:
13442
      # _GetWantedNodes can be used here, but is not always appropriate to use
13443
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13444
      # more information.
13445
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13446
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13447

    
13448
  def _TestDelay(self):
13449
    """Do the actual sleep.
13450

13451
    """
13452
    if self.op.on_master:
13453
      if not utils.TestDelay(self.op.duration):
13454
        raise errors.OpExecError("Error during master delay test")
13455
    if self.op.on_nodes:
13456
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13457
      for node, node_result in result.items():
13458
        node_result.Raise("Failure during rpc call to node %s" % node)
13459

    
13460
  def Exec(self, feedback_fn):
13461
    """Execute the test delay opcode, with the wanted repetitions.
13462

13463
    """
13464
    if self.op.repeat == 0:
13465
      self._TestDelay()
13466
    else:
13467
      top_value = self.op.repeat - 1
13468
      for i in range(self.op.repeat):
13469
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13470
        self._TestDelay()
13471

    
13472

    
13473
class LUTestJqueue(NoHooksLU):
13474
  """Utility LU to test some aspects of the job queue.
13475

13476
  """
13477
  REQ_BGL = False
13478

    
13479
  # Must be lower than default timeout for WaitForJobChange to see whether it
13480
  # notices changed jobs
13481
  _CLIENT_CONNECT_TIMEOUT = 20.0
13482
  _CLIENT_CONFIRM_TIMEOUT = 60.0
13483

    
13484
  @classmethod
13485
  def _NotifyUsingSocket(cls, cb, errcls):
13486
    """Opens a Unix socket and waits for another program to connect.
13487

13488
    @type cb: callable
13489
    @param cb: Callback to send socket name to client
13490
    @type errcls: class
13491
    @param errcls: Exception class to use for errors
13492

13493
    """
13494
    # Using a temporary directory as there's no easy way to create temporary
13495
    # sockets without writing a custom loop around tempfile.mktemp and
13496
    # socket.bind
13497
    tmpdir = tempfile.mkdtemp()
13498
    try:
13499
      tmpsock = utils.PathJoin(tmpdir, "sock")
13500

    
13501
      logging.debug("Creating temporary socket at %s", tmpsock)
13502
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13503
      try:
13504
        sock.bind(tmpsock)
13505
        sock.listen(1)
13506

    
13507
        # Send details to client
13508
        cb(tmpsock)
13509

    
13510
        # Wait for client to connect before continuing
13511
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13512
        try:
13513
          (conn, _) = sock.accept()
13514
        except socket.error, err:
13515
          raise errcls("Client didn't connect in time (%s)" % err)
13516
      finally:
13517
        sock.close()
13518
    finally:
13519
      # Remove as soon as client is connected
13520
      shutil.rmtree(tmpdir)
13521

    
13522
    # Wait for client to close
13523
    try:
13524
      try:
13525
        # pylint: disable=E1101
13526
        # Instance of '_socketobject' has no ... member
13527
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13528
        conn.recv(1)
13529
      except socket.error, err:
13530
        raise errcls("Client failed to confirm notification (%s)" % err)
13531
    finally:
13532
      conn.close()
13533

    
13534
  def _SendNotification(self, test, arg, sockname):
13535
    """Sends a notification to the client.
13536

13537
    @type test: string
13538
    @param test: Test name
13539
    @param arg: Test argument (depends on test)
13540
    @type sockname: string
13541
    @param sockname: Socket path
13542

13543
    """
13544
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13545

    
13546
  def _Notify(self, prereq, test, arg):
13547
    """Notifies the client of a test.
13548

13549
    @type prereq: bool
13550
    @param prereq: Whether this is a prereq-phase test
13551
    @type test: string
13552
    @param test: Test name
13553
    @param arg: Test argument (depends on test)
13554

13555
    """
13556
    if prereq:
13557
      errcls = errors.OpPrereqError
13558
    else:
13559
      errcls = errors.OpExecError
13560

    
13561
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13562
                                                  test, arg),
13563
                                   errcls)
13564

    
13565
  def CheckArguments(self):
13566
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13567
    self.expandnames_calls = 0
13568

    
13569
  def ExpandNames(self):
13570
    checkargs_calls = getattr(self, "checkargs_calls", 0)
13571
    if checkargs_calls < 1:
13572
      raise errors.ProgrammerError("CheckArguments was not called")
13573

    
13574
    self.expandnames_calls += 1
13575

    
13576
    if self.op.notify_waitlock:
13577
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
13578

    
13579
    self.LogInfo("Expanding names")
13580

    
13581
    # Get lock on master node (just to get a lock, not for a particular reason)
13582
    self.needed_locks = {
13583
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13584
      }
13585

    
13586
  def Exec(self, feedback_fn):
13587
    if self.expandnames_calls < 1:
13588
      raise errors.ProgrammerError("ExpandNames was not called")
13589

    
13590
    if self.op.notify_exec:
13591
      self._Notify(False, constants.JQT_EXEC, None)
13592

    
13593
    self.LogInfo("Executing")
13594

    
13595
    if self.op.log_messages:
13596
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13597
      for idx, msg in enumerate(self.op.log_messages):
13598
        self.LogInfo("Sending log message %s", idx + 1)
13599
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13600
        # Report how many test messages have been sent
13601
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13602

    
13603
    if self.op.fail:
13604
      raise errors.OpExecError("Opcode failure was requested")
13605

    
13606
    return True
13607

    
13608

    
13609
class IAllocator(object):
13610
  """IAllocator framework.
13611

13612
  An IAllocator instance has three sets of attributes:
13613
    - cfg that is needed to query the cluster
13614
    - input data (all members of the _KEYS class attribute are required)
13615
    - four buffer attributes (in|out_data|text), that represent the
13616
      input (to the external script) in text and data structure format,
13617
      and the output from it, again in two formats
13618
    - the result variables from the script (success, info, nodes) for
13619
      easy usage
13620

13621
  """
13622
  # pylint: disable=R0902
13623
  # lots of instance attributes
13624

    
13625
  def __init__(self, cfg, rpc_runner, mode, **kwargs):
13626
    self.cfg = cfg
13627
    self.rpc = rpc_runner
13628
    # init buffer variables
13629
    self.in_text = self.out_text = self.in_data = self.out_data = None
13630
    # init all input fields so that pylint is happy
13631
    self.mode = mode
13632
    self.memory = self.disks = self.disk_template = None
13633
    self.os = self.tags = self.nics = self.vcpus = None
13634
    self.hypervisor = None
13635
    self.relocate_from = None
13636
    self.name = None
13637
    self.instances = None
13638
    self.evac_mode = None
13639
    self.target_groups = []
13640
    # computed fields
13641
    self.required_nodes = None
13642
    # init result fields
13643
    self.success = self.info = self.result = None
13644

    
13645
    try:
13646
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13647
    except KeyError:
13648
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13649
                                   " IAllocator" % self.mode)
13650

    
13651
    keyset = [n for (n, _) in keydata]
13652

    
13653
    for key in kwargs:
13654
      if key not in keyset:
13655
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13656
                                     " IAllocator" % key)
13657
      setattr(self, key, kwargs[key])
13658

    
13659
    for key in keyset:
13660
      if key not in kwargs:
13661
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13662
                                     " IAllocator" % key)
13663
    self._BuildInputData(compat.partial(fn, self), keydata)
13664

    
13665
  def _ComputeClusterData(self):
13666
    """Compute the generic allocator input data.
13667

13668
    This is the data that is independent of the actual operation.
13669

13670
    """
13671
    cfg = self.cfg
13672
    cluster_info = cfg.GetClusterInfo()
13673
    # cluster data
13674
    data = {
13675
      "version": constants.IALLOCATOR_VERSION,
13676
      "cluster_name": cfg.GetClusterName(),
13677
      "cluster_tags": list(cluster_info.GetTags()),
13678
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13679
      # we don't have job IDs
13680
      }
13681
    ninfo = cfg.GetAllNodesInfo()
13682
    iinfo = cfg.GetAllInstancesInfo().values()
13683
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13684

    
13685
    # node data
13686
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13687

    
13688
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13689
      hypervisor_name = self.hypervisor
13690
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13691
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13692
    else:
13693
      hypervisor_name = cluster_info.primary_hypervisor
13694

    
13695
    node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
13696
                                        [hypervisor_name])
13697
    node_iinfo = \
13698
      self.rpc.call_all_instances_info(node_list,
13699
                                       cluster_info.enabled_hypervisors)
13700

    
13701
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13702

    
13703
    config_ndata = self._ComputeBasicNodeData(ninfo)
13704
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13705
                                                 i_list, config_ndata)
13706
    assert len(data["nodes"]) == len(ninfo), \
13707
        "Incomplete node data computed"
13708

    
13709
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13710

    
13711
    self.in_data = data
13712

    
13713
  @staticmethod
13714
  def _ComputeNodeGroupData(cfg):
13715
    """Compute node groups data.
13716

13717
    """
13718
    ng = dict((guuid, {
13719
      "name": gdata.name,
13720
      "alloc_policy": gdata.alloc_policy,
13721
      })
13722
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13723

    
13724
    return ng
13725

    
13726
  @staticmethod
13727
  def _ComputeBasicNodeData(node_cfg):
13728
    """Compute global node data.
13729

13730
    @rtype: dict
13731
    @returns: a dict of name: (node dict, node config)
13732

13733
    """
13734
    # fill in static (config-based) values
13735
    node_results = dict((ninfo.name, {
13736
      "tags": list(ninfo.GetTags()),
13737
      "primary_ip": ninfo.primary_ip,
13738
      "secondary_ip": ninfo.secondary_ip,
13739
      "offline": ninfo.offline,
13740
      "drained": ninfo.drained,
13741
      "master_candidate": ninfo.master_candidate,
13742
      "group": ninfo.group,
13743
      "master_capable": ninfo.master_capable,
13744
      "vm_capable": ninfo.vm_capable,
13745
      })
13746
      for ninfo in node_cfg.values())
13747

    
13748
    return node_results
13749

    
13750
  @staticmethod
13751
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13752
                              node_results):
13753
    """Compute global node data.
13754

13755
    @param node_results: the basic node structures as filled from the config
13756

13757
    """
13758
    #TODO(dynmem): compute the right data on MAX and MIN memory
13759
    # make a copy of the current dict
13760
    node_results = dict(node_results)
13761
    for nname, nresult in node_data.items():
13762
      assert nname in node_results, "Missing basic data for node %s" % nname
13763
      ninfo = node_cfg[nname]
13764

    
13765
      if not (ninfo.offline or ninfo.drained):
13766
        nresult.Raise("Can't get data for node %s" % nname)
13767
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13768
                                nname)
13769
        remote_info = _MakeLegacyNodeInfo(nresult.payload)
13770

    
13771
        for attr in ["memory_total", "memory_free", "memory_dom0",
13772
                     "vg_size", "vg_free", "cpu_total"]:
13773
          if attr not in remote_info:
13774
            raise errors.OpExecError("Node '%s' didn't return attribute"
13775
                                     " '%s'" % (nname, attr))
13776
          if not isinstance(remote_info[attr], int):
13777
            raise errors.OpExecError("Node '%s' returned invalid value"
13778
                                     " for '%s': %s" %
13779
                                     (nname, attr, remote_info[attr]))
13780
        # compute memory used by primary instances
13781
        i_p_mem = i_p_up_mem = 0
13782
        for iinfo, beinfo in i_list:
13783
          if iinfo.primary_node == nname:
13784
            i_p_mem += beinfo[constants.BE_MAXMEM]
13785
            if iinfo.name not in node_iinfo[nname].payload:
13786
              i_used_mem = 0
13787
            else:
13788
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13789
            i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
13790
            remote_info["memory_free"] -= max(0, i_mem_diff)
13791

    
13792
            if iinfo.admin_state == constants.ADMINST_UP:
13793
              i_p_up_mem += beinfo[constants.BE_MAXMEM]
13794

    
13795
        # compute memory used by instances
13796
        pnr_dyn = {
13797
          "total_memory": remote_info["memory_total"],
13798
          "reserved_memory": remote_info["memory_dom0"],
13799
          "free_memory": remote_info["memory_free"],
13800
          "total_disk": remote_info["vg_size"],
13801
          "free_disk": remote_info["vg_free"],
13802
          "total_cpus": remote_info["cpu_total"],
13803
          "i_pri_memory": i_p_mem,
13804
          "i_pri_up_memory": i_p_up_mem,
13805
          }
13806
        pnr_dyn.update(node_results[nname])
13807
        node_results[nname] = pnr_dyn
13808

    
13809
    return node_results
13810

    
13811
  @staticmethod
13812
  def _ComputeInstanceData(cluster_info, i_list):
13813
    """Compute global instance data.
13814

13815
    """
13816
    instance_data = {}
13817
    for iinfo, beinfo in i_list:
13818
      nic_data = []
13819
      for nic in iinfo.nics:
13820
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13821
        nic_dict = {
13822
          "mac": nic.mac,
13823
          "ip": nic.ip,
13824
          "mode": filled_params[constants.NIC_MODE],
13825
          "link": filled_params[constants.NIC_LINK],
13826
          }
13827
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13828
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13829
        nic_data.append(nic_dict)
13830
      pir = {
13831
        "tags": list(iinfo.GetTags()),
13832
        "admin_state": iinfo.admin_state,
13833
        "vcpus": beinfo[constants.BE_VCPUS],
13834
        "memory": beinfo[constants.BE_MAXMEM],
13835
        "os": iinfo.os,
13836
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13837
        "nics": nic_data,
13838
        "disks": [{constants.IDISK_SIZE: dsk.size,
13839
                   constants.IDISK_MODE: dsk.mode}
13840
                  for dsk in iinfo.disks],
13841
        "disk_template": iinfo.disk_template,
13842
        "hypervisor": iinfo.hypervisor,
13843
        }
13844
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13845
                                                 pir["disks"])
13846
      instance_data[iinfo.name] = pir
13847

    
13848
    return instance_data
13849

    
13850
  def _AddNewInstance(self):
13851
    """Add new instance data to allocator structure.
13852

13853
    This in combination with _AllocatorGetClusterData will create the
13854
    correct structure needed as input for the allocator.
13855

13856
    The checks for the completeness of the opcode must have already been
13857
    done.
13858

13859
    """
13860
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13861

    
13862
    if self.disk_template in constants.DTS_INT_MIRROR:
13863
      self.required_nodes = 2
13864
    else:
13865
      self.required_nodes = 1
13866

    
13867
    request = {
13868
      "name": self.name,
13869
      "disk_template": self.disk_template,
13870
      "tags": self.tags,
13871
      "os": self.os,
13872
      "vcpus": self.vcpus,
13873
      "memory": self.memory,
13874
      "disks": self.disks,
13875
      "disk_space_total": disk_space,
13876
      "nics": self.nics,
13877
      "required_nodes": self.required_nodes,
13878
      "hypervisor": self.hypervisor,
13879
      }
13880

    
13881
    return request
13882

    
13883
  def _AddRelocateInstance(self):
13884
    """Add relocate instance data to allocator structure.
13885

13886
    This in combination with _IAllocatorGetClusterData will create the
13887
    correct structure needed as input for the allocator.
13888

13889
    The checks for the completeness of the opcode must have already been
13890
    done.
13891

13892
    """
13893
    instance = self.cfg.GetInstanceInfo(self.name)
13894
    if instance is None:
13895
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13896
                                   " IAllocator" % self.name)
13897

    
13898
    if instance.disk_template not in constants.DTS_MIRRORED:
13899
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13900
                                 errors.ECODE_INVAL)
13901

    
13902
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13903
        len(instance.secondary_nodes) != 1:
13904
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13905
                                 errors.ECODE_STATE)
13906

    
13907
    self.required_nodes = 1
13908
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13909
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13910

    
13911
    request = {
13912
      "name": self.name,
13913
      "disk_space_total": disk_space,
13914
      "required_nodes": self.required_nodes,
13915
      "relocate_from": self.relocate_from,
13916
      }
13917
    return request
13918

    
13919
  def _AddNodeEvacuate(self):
13920
    """Get data for node-evacuate requests.
13921

13922
    """
13923
    return {
13924
      "instances": self.instances,
13925
      "evac_mode": self.evac_mode,
13926
      }
13927

    
13928
  def _AddChangeGroup(self):
13929
    """Get data for node-evacuate requests.
13930

13931
    """
13932
    return {
13933
      "instances": self.instances,
13934
      "target_groups": self.target_groups,
13935
      }
13936

    
13937
  def _BuildInputData(self, fn, keydata):
13938
    """Build input data structures.
13939

13940
    """
13941
    self._ComputeClusterData()
13942

    
13943
    request = fn()
13944
    request["type"] = self.mode
13945
    for keyname, keytype in keydata:
13946
      if keyname not in request:
13947
        raise errors.ProgrammerError("Request parameter %s is missing" %
13948
                                     keyname)
13949
      val = request[keyname]
13950
      if not keytype(val):
13951
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13952
                                     " validation, value %s, expected"
13953
                                     " type %s" % (keyname, val, keytype))
13954
    self.in_data["request"] = request
13955

    
13956
    self.in_text = serializer.Dump(self.in_data)
13957

    
13958
  _STRING_LIST = ht.TListOf(ht.TString)
13959
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13960
     # pylint: disable=E1101
13961
     # Class '...' has no 'OP_ID' member
13962
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13963
                          opcodes.OpInstanceMigrate.OP_ID,
13964
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13965
     })))
13966

    
13967
  _NEVAC_MOVED = \
13968
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13969
                       ht.TItems([ht.TNonEmptyString,
13970
                                  ht.TNonEmptyString,
13971
                                  ht.TListOf(ht.TNonEmptyString),
13972
                                 ])))
13973
  _NEVAC_FAILED = \
13974
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13975
                       ht.TItems([ht.TNonEmptyString,
13976
                                  ht.TMaybeString,
13977
                                 ])))
13978
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13979
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13980

    
13981
  _MODE_DATA = {
13982
    constants.IALLOCATOR_MODE_ALLOC:
13983
      (_AddNewInstance,
13984
       [
13985
        ("name", ht.TString),
13986
        ("memory", ht.TInt),
13987
        ("disks", ht.TListOf(ht.TDict)),
13988
        ("disk_template", ht.TString),
13989
        ("os", ht.TString),
13990
        ("tags", _STRING_LIST),
13991
        ("nics", ht.TListOf(ht.TDict)),
13992
        ("vcpus", ht.TInt),
13993
        ("hypervisor", ht.TString),
13994
        ], ht.TList),
13995
    constants.IALLOCATOR_MODE_RELOC:
13996
      (_AddRelocateInstance,
13997
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13998
       ht.TList),
13999
     constants.IALLOCATOR_MODE_NODE_EVAC:
14000
      (_AddNodeEvacuate, [
14001
        ("instances", _STRING_LIST),
14002
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
14003
        ], _NEVAC_RESULT),
14004
     constants.IALLOCATOR_MODE_CHG_GROUP:
14005
      (_AddChangeGroup, [
14006
        ("instances", _STRING_LIST),
14007
        ("target_groups", _STRING_LIST),
14008
        ], _NEVAC_RESULT),
14009
    }
14010

    
14011
  def Run(self, name, validate=True, call_fn=None):
14012
    """Run an instance allocator and return the results.
14013

14014
    """
14015
    if call_fn is None:
14016
      call_fn = self.rpc.call_iallocator_runner
14017

    
14018
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
14019
    result.Raise("Failure while running the iallocator script")
14020

    
14021
    self.out_text = result.payload
14022
    if validate:
14023
      self._ValidateResult()
14024

    
14025
  def _ValidateResult(self):
14026
    """Process the allocator results.
14027

14028
    This will process and if successful save the result in
14029
    self.out_data and the other parameters.
14030

14031
    """
14032
    try:
14033
      rdict = serializer.Load(self.out_text)
14034
    except Exception, err:
14035
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
14036

    
14037
    if not isinstance(rdict, dict):
14038
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
14039

    
14040
    # TODO: remove backwards compatiblity in later versions
14041
    if "nodes" in rdict and "result" not in rdict:
14042
      rdict["result"] = rdict["nodes"]
14043
      del rdict["nodes"]
14044

    
14045
    for key in "success", "info", "result":
14046
      if key not in rdict:
14047
        raise errors.OpExecError("Can't parse iallocator results:"
14048
                                 " missing key '%s'" % key)
14049
      setattr(self, key, rdict[key])
14050

    
14051
    if not self._result_check(self.result):
14052
      raise errors.OpExecError("Iallocator returned invalid result,"
14053
                               " expected %s, got %s" %
14054
                               (self._result_check, self.result),
14055
                               errors.ECODE_INVAL)
14056

    
14057
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
14058
      assert self.relocate_from is not None
14059
      assert self.required_nodes == 1
14060

    
14061
      node2group = dict((name, ndata["group"])
14062
                        for (name, ndata) in self.in_data["nodes"].items())
14063

    
14064
      fn = compat.partial(self._NodesToGroups, node2group,
14065
                          self.in_data["nodegroups"])
14066

    
14067
      instance = self.cfg.GetInstanceInfo(self.name)
14068
      request_groups = fn(self.relocate_from + [instance.primary_node])
14069
      result_groups = fn(rdict["result"] + [instance.primary_node])
14070

    
14071
      if self.success and not set(result_groups).issubset(request_groups):
14072
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14073
                                 " differ from original groups (%s)" %
14074
                                 (utils.CommaJoin(result_groups),
14075
                                  utils.CommaJoin(request_groups)))
14076

    
14077
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14078
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14079

    
14080
    self.out_data = rdict
14081

    
14082
  @staticmethod
14083
  def _NodesToGroups(node2group, groups, nodes):
14084
    """Returns a list of unique group names for a list of nodes.
14085

14086
    @type node2group: dict
14087
    @param node2group: Map from node name to group UUID
14088
    @type groups: dict
14089
    @param groups: Group information
14090
    @type nodes: list
14091
    @param nodes: Node names
14092

14093
    """
14094
    result = set()
14095

    
14096
    for node in nodes:
14097
      try:
14098
        group_uuid = node2group[node]
14099
      except KeyError:
14100
        # Ignore unknown node
14101
        pass
14102
      else:
14103
        try:
14104
          group = groups[group_uuid]
14105
        except KeyError:
14106
          # Can't find group, let's use UUID
14107
          group_name = group_uuid
14108
        else:
14109
          group_name = group["name"]
14110

    
14111
        result.add(group_name)
14112

    
14113
    return sorted(result)
14114

    
14115

    
14116
class LUTestAllocator(NoHooksLU):
14117
  """Run allocator tests.
14118

14119
  This LU runs the allocator tests
14120

14121
  """
14122
  def CheckPrereq(self):
14123
    """Check prerequisites.
14124

14125
    This checks the opcode parameters depending on the director and mode test.
14126

14127
    """
14128
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14129
      for attr in ["memory", "disks", "disk_template",
14130
                   "os", "tags", "nics", "vcpus"]:
14131
        if not hasattr(self.op, attr):
14132
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14133
                                     attr, errors.ECODE_INVAL)
14134
      iname = self.cfg.ExpandInstanceName(self.op.name)
14135
      if iname is not None:
14136
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14137
                                   iname, errors.ECODE_EXISTS)
14138
      if not isinstance(self.op.nics, list):
14139
        raise errors.OpPrereqError("Invalid parameter 'nics'",
14140
                                   errors.ECODE_INVAL)
14141
      if not isinstance(self.op.disks, list):
14142
        raise errors.OpPrereqError("Invalid parameter 'disks'",
14143
                                   errors.ECODE_INVAL)
14144
      for row in self.op.disks:
14145
        if (not isinstance(row, dict) or
14146
            constants.IDISK_SIZE not in row or
14147
            not isinstance(row[constants.IDISK_SIZE], int) or
14148
            constants.IDISK_MODE not in row or
14149
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14150
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
14151
                                     " parameter", errors.ECODE_INVAL)
14152
      if self.op.hypervisor is None:
14153
        self.op.hypervisor = self.cfg.GetHypervisorType()
14154
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14155
      fname = _ExpandInstanceName(self.cfg, self.op.name)
14156
      self.op.name = fname
14157
      self.relocate_from = \
14158
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14159
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14160
                          constants.IALLOCATOR_MODE_NODE_EVAC):
14161
      if not self.op.instances:
14162
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14163
      self.op.instances = _GetWantedInstances(self, self.op.instances)
14164
    else:
14165
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14166
                                 self.op.mode, errors.ECODE_INVAL)
14167

    
14168
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14169
      if self.op.allocator is None:
14170
        raise errors.OpPrereqError("Missing allocator name",
14171
                                   errors.ECODE_INVAL)
14172
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14173
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
14174
                                 self.op.direction, errors.ECODE_INVAL)
14175

    
14176
  def Exec(self, feedback_fn):
14177
    """Run the allocator test.
14178

14179
    """
14180
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14181
      ial = IAllocator(self.cfg, self.rpc,
14182
                       mode=self.op.mode,
14183
                       name=self.op.name,
14184
                       memory=self.op.memory,
14185
                       disks=self.op.disks,
14186
                       disk_template=self.op.disk_template,
14187
                       os=self.op.os,
14188
                       tags=self.op.tags,
14189
                       nics=self.op.nics,
14190
                       vcpus=self.op.vcpus,
14191
                       hypervisor=self.op.hypervisor,
14192
                       )
14193
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14194
      ial = IAllocator(self.cfg, self.rpc,
14195
                       mode=self.op.mode,
14196
                       name=self.op.name,
14197
                       relocate_from=list(self.relocate_from),
14198
                       )
14199
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14200
      ial = IAllocator(self.cfg, self.rpc,
14201
                       mode=self.op.mode,
14202
                       instances=self.op.instances,
14203
                       target_groups=self.op.target_groups)
14204
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14205
      ial = IAllocator(self.cfg, self.rpc,
14206
                       mode=self.op.mode,
14207
                       instances=self.op.instances,
14208
                       evac_mode=self.op.evac_mode)
14209
    else:
14210
      raise errors.ProgrammerError("Uncatched mode %s in"
14211
                                   " LUTestAllocator.Exec", self.op.mode)
14212

    
14213
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
14214
      result = ial.in_text
14215
    else:
14216
      ial.Run(self.op.allocator, validate=False)
14217
      result = ial.out_text
14218
    return result
14219

    
14220

    
14221
#: Query type implementations
14222
_QUERY_IMPL = {
14223
  constants.QR_INSTANCE: _InstanceQuery,
14224
  constants.QR_NODE: _NodeQuery,
14225
  constants.QR_GROUP: _GroupQuery,
14226
  constants.QR_OS: _OsQuery,
14227
  }
14228

    
14229
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14230

    
14231

    
14232
def _GetQueryImplementation(name):
14233
  """Returns the implemtnation for a query type.
14234

14235
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
14236

14237
  """
14238
  try:
14239
    return _QUERY_IMPL[name]
14240
  except KeyError:
14241
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14242
                               errors.ECODE_INVAL)