Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 6e9814a1

History | View | Annotate | Download (504.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62
from ganeti import rpc
63

    
64
import ganeti.masterd.instance # pylint: disable=W0611
65

    
66

    
67
#: Size of DRBD meta block device
68
DRBD_META_SIZE = 128
69

    
70
# States of instance
71
INSTANCE_UP = [constants.ADMINST_UP]
72
INSTANCE_DOWN = [constants.ADMINST_DOWN]
73
INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74
INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75
INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
76

    
77

    
78
class ResultWithJobs:
79
  """Data container for LU results with jobs.
80

81
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83
  contained in the C{jobs} attribute and include the job IDs in the opcode
84
  result.
85

86
  """
87
  def __init__(self, jobs, **kwargs):
88
    """Initializes this class.
89

90
    Additional return values can be specified as keyword arguments.
91

92
    @type jobs: list of lists of L{opcode.OpCode}
93
    @param jobs: A list of lists of opcode objects
94

95
    """
96
    self.jobs = jobs
97
    self.other = kwargs
98

    
99

    
100
class LogicalUnit(object):
101
  """Logical Unit base class.
102

103
  Subclasses must follow these rules:
104
    - implement ExpandNames
105
    - implement CheckPrereq (except when tasklets are used)
106
    - implement Exec (except when tasklets are used)
107
    - implement BuildHooksEnv
108
    - implement BuildHooksNodes
109
    - redefine HPATH and HTYPE
110
    - optionally redefine their run requirements:
111
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
112

113
  Note that all commands require root permissions.
114

115
  @ivar dry_run_result: the value (if any) that will be returned to the caller
116
      in dry-run mode (signalled by opcode dry_run parameter)
117

118
  """
119
  HPATH = None
120
  HTYPE = None
121
  REQ_BGL = True
122

    
123
  def __init__(self, processor, op, context, rpc_runner):
124
    """Constructor for LogicalUnit.
125

126
    This needs to be overridden in derived classes in order to check op
127
    validity.
128

129
    """
130
    self.proc = processor
131
    self.op = op
132
    self.cfg = context.cfg
133
    self.glm = context.glm
134
    # readability alias
135
    self.owned_locks = context.glm.list_owned
136
    self.context = context
137
    self.rpc = rpc_runner
138
    # Dicts used to declare locking needs to mcpu
139
    self.needed_locks = None
140
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
141
    self.add_locks = {}
142
    self.remove_locks = {}
143
    # Used to force good behavior when calling helper functions
144
    self.recalculate_locks = {}
145
    # logging
146
    self.Log = processor.Log # pylint: disable=C0103
147
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
148
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
149
    self.LogStep = processor.LogStep # pylint: disable=C0103
150
    # support for dry-run
151
    self.dry_run_result = None
152
    # support for generic debug attribute
153
    if (not hasattr(self.op, "debug_level") or
154
        not isinstance(self.op.debug_level, int)):
155
      self.op.debug_level = 0
156

    
157
    # Tasklets
158
    self.tasklets = None
159

    
160
    # Validate opcode parameters and set defaults
161
    self.op.Validate(True)
162

    
163
    self.CheckArguments()
164

    
165
  def CheckArguments(self):
166
    """Check syntactic validity for the opcode arguments.
167

168
    This method is for doing a simple syntactic check and ensure
169
    validity of opcode parameters, without any cluster-related
170
    checks. While the same can be accomplished in ExpandNames and/or
171
    CheckPrereq, doing these separate is better because:
172

173
      - ExpandNames is left as as purely a lock-related function
174
      - CheckPrereq is run after we have acquired locks (and possible
175
        waited for them)
176

177
    The function is allowed to change the self.op attribute so that
178
    later methods can no longer worry about missing parameters.
179

180
    """
181
    pass
182

    
183
  def ExpandNames(self):
184
    """Expand names for this LU.
185

186
    This method is called before starting to execute the opcode, and it should
187
    update all the parameters of the opcode to their canonical form (e.g. a
188
    short node name must be fully expanded after this method has successfully
189
    completed). This way locking, hooks, logging, etc. can work correctly.
190

191
    LUs which implement this method must also populate the self.needed_locks
192
    member, as a dict with lock levels as keys, and a list of needed lock names
193
    as values. Rules:
194

195
      - use an empty dict if you don't need any lock
196
      - if you don't need any lock at a particular level omit that level
197
      - don't put anything for the BGL level
198
      - if you want all locks at a level use locking.ALL_SET as a value
199

200
    If you need to share locks (rather than acquire them exclusively) at one
201
    level you can modify self.share_locks, setting a true value (usually 1) for
202
    that level. By default locks are not shared.
203

204
    This function can also define a list of tasklets, which then will be
205
    executed in order instead of the usual LU-level CheckPrereq and Exec
206
    functions, if those are not defined by the LU.
207

208
    Examples::
209

210
      # Acquire all nodes and one instance
211
      self.needed_locks = {
212
        locking.LEVEL_NODE: locking.ALL_SET,
213
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
214
      }
215
      # Acquire just two nodes
216
      self.needed_locks = {
217
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
218
      }
219
      # Acquire no locks
220
      self.needed_locks = {} # No, you can't leave it to the default value None
221

222
    """
223
    # The implementation of this method is mandatory only if the new LU is
224
    # concurrent, so that old LUs don't need to be changed all at the same
225
    # time.
226
    if self.REQ_BGL:
227
      self.needed_locks = {} # Exclusive LUs don't need locks.
228
    else:
229
      raise NotImplementedError
230

    
231
  def DeclareLocks(self, level):
232
    """Declare LU locking needs for a level
233

234
    While most LUs can just declare their locking needs at ExpandNames time,
235
    sometimes there's the need to calculate some locks after having acquired
236
    the ones before. This function is called just before acquiring locks at a
237
    particular level, but after acquiring the ones at lower levels, and permits
238
    such calculations. It can be used to modify self.needed_locks, and by
239
    default it does nothing.
240

241
    This function is only called if you have something already set in
242
    self.needed_locks for the level.
243

244
    @param level: Locking level which is going to be locked
245
    @type level: member of ganeti.locking.LEVELS
246

247
    """
248

    
249
  def CheckPrereq(self):
250
    """Check prerequisites for this LU.
251

252
    This method should check that the prerequisites for the execution
253
    of this LU are fulfilled. It can do internode communication, but
254
    it should be idempotent - no cluster or system changes are
255
    allowed.
256

257
    The method should raise errors.OpPrereqError in case something is
258
    not fulfilled. Its return value is ignored.
259

260
    This method should also update all the parameters of the opcode to
261
    their canonical form if it hasn't been done by ExpandNames before.
262

263
    """
264
    if self.tasklets is not None:
265
      for (idx, tl) in enumerate(self.tasklets):
266
        logging.debug("Checking prerequisites for tasklet %s/%s",
267
                      idx + 1, len(self.tasklets))
268
        tl.CheckPrereq()
269
    else:
270
      pass
271

    
272
  def Exec(self, feedback_fn):
273
    """Execute the LU.
274

275
    This method should implement the actual work. It should raise
276
    errors.OpExecError for failures that are somewhat dealt with in
277
    code, or expected.
278

279
    """
280
    if self.tasklets is not None:
281
      for (idx, tl) in enumerate(self.tasklets):
282
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
283
        tl.Exec(feedback_fn)
284
    else:
285
      raise NotImplementedError
286

    
287
  def BuildHooksEnv(self):
288
    """Build hooks environment for this LU.
289

290
    @rtype: dict
291
    @return: Dictionary containing the environment that will be used for
292
      running the hooks for this LU. The keys of the dict must not be prefixed
293
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294
      will extend the environment with additional variables. If no environment
295
      should be defined, an empty dictionary should be returned (not C{None}).
296
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
297
      will not be called.
298

299
    """
300
    raise NotImplementedError
301

    
302
  def BuildHooksNodes(self):
303
    """Build list of nodes to run LU's hooks.
304

305
    @rtype: tuple; (list, list)
306
    @return: Tuple containing a list of node names on which the hook
307
      should run before the execution and a list of node names on which the
308
      hook should run after the execution. No nodes should be returned as an
309
      empty list (and not None).
310
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
311
      will not be called.
312

313
    """
314
    raise NotImplementedError
315

    
316
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317
    """Notify the LU about the results of its hooks.
318

319
    This method is called every time a hooks phase is executed, and notifies
320
    the Logical Unit about the hooks' result. The LU can then use it to alter
321
    its result based on the hooks.  By default the method does nothing and the
322
    previous result is passed back unchanged but any LU can define it if it
323
    wants to use the local cluster hook-scripts somehow.
324

325
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
326
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327
    @param hook_results: the results of the multi-node hooks rpc call
328
    @param feedback_fn: function used send feedback back to the caller
329
    @param lu_result: the previous Exec result this LU had, or None
330
        in the PRE phase
331
    @return: the new Exec result, based on the previous result
332
        and hook results
333

334
    """
335
    # API must be kept, thus we ignore the unused argument and could
336
    # be a function warnings
337
    # pylint: disable=W0613,R0201
338
    return lu_result
339

    
340
  def _ExpandAndLockInstance(self):
341
    """Helper function to expand and lock an instance.
342

343
    Many LUs that work on an instance take its name in self.op.instance_name
344
    and need to expand it and then declare the expanded name for locking. This
345
    function does it, and then updates self.op.instance_name to the expanded
346
    name. It also initializes needed_locks as a dict, if this hasn't been done
347
    before.
348

349
    """
350
    if self.needed_locks is None:
351
      self.needed_locks = {}
352
    else:
353
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354
        "_ExpandAndLockInstance called with instance-level locks set"
355
    self.op.instance_name = _ExpandInstanceName(self.cfg,
356
                                                self.op.instance_name)
357
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
358

    
359
  def _LockInstancesNodes(self, primary_only=False,
360
                          level=locking.LEVEL_NODE):
361
    """Helper function to declare instances' nodes for locking.
362

363
    This function should be called after locking one or more instances to lock
364
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365
    with all primary or secondary nodes for instances already locked and
366
    present in self.needed_locks[locking.LEVEL_INSTANCE].
367

368
    It should be called from DeclareLocks, and for safety only works if
369
    self.recalculate_locks[locking.LEVEL_NODE] is set.
370

371
    In the future it may grow parameters to just lock some instance's nodes, or
372
    to just lock primaries or secondary nodes, if needed.
373

374
    If should be called in DeclareLocks in a way similar to::
375

376
      if level == locking.LEVEL_NODE:
377
        self._LockInstancesNodes()
378

379
    @type primary_only: boolean
380
    @param primary_only: only lock primary nodes of locked instances
381
    @param level: Which lock level to use for locking nodes
382

383
    """
384
    assert level in self.recalculate_locks, \
385
      "_LockInstancesNodes helper function called with no nodes to recalculate"
386

    
387
    # TODO: check if we're really been called with the instance locks held
388

    
389
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390
    # future we might want to have different behaviors depending on the value
391
    # of self.recalculate_locks[locking.LEVEL_NODE]
392
    wanted_nodes = []
393
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395
      wanted_nodes.append(instance.primary_node)
396
      if not primary_only:
397
        wanted_nodes.extend(instance.secondary_nodes)
398

    
399
    if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400
      self.needed_locks[level] = wanted_nodes
401
    elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402
      self.needed_locks[level].extend(wanted_nodes)
403
    else:
404
      raise errors.ProgrammerError("Unknown recalculation mode")
405

    
406
    del self.recalculate_locks[level]
407

    
408

    
409
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410
  """Simple LU which runs no hooks.
411

412
  This LU is intended as a parent for other LogicalUnits which will
413
  run no hooks, in order to reduce duplicate code.
414

415
  """
416
  HPATH = None
417
  HTYPE = None
418

    
419
  def BuildHooksEnv(self):
420
    """Empty BuildHooksEnv for NoHooksLu.
421

422
    This just raises an error.
423

424
    """
425
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
426

    
427
  def BuildHooksNodes(self):
428
    """Empty BuildHooksNodes for NoHooksLU.
429

430
    """
431
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
432

    
433

    
434
class Tasklet:
435
  """Tasklet base class.
436

437
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
439
  tasklets know nothing about locks.
440

441
  Subclasses must follow these rules:
442
    - Implement CheckPrereq
443
    - Implement Exec
444

445
  """
446
  def __init__(self, lu):
447
    self.lu = lu
448

    
449
    # Shortcuts
450
    self.cfg = lu.cfg
451
    self.rpc = lu.rpc
452

    
453
  def CheckPrereq(self):
454
    """Check prerequisites for this tasklets.
455

456
    This method should check whether the prerequisites for the execution of
457
    this tasklet are fulfilled. It can do internode communication, but it
458
    should be idempotent - no cluster or system changes are allowed.
459

460
    The method should raise errors.OpPrereqError in case something is not
461
    fulfilled. Its return value is ignored.
462

463
    This method should also update all parameters to their canonical form if it
464
    hasn't been done before.
465

466
    """
467
    pass
468

    
469
  def Exec(self, feedback_fn):
470
    """Execute the tasklet.
471

472
    This method should implement the actual work. It should raise
473
    errors.OpExecError for failures that are somewhat dealt with in code, or
474
    expected.
475

476
    """
477
    raise NotImplementedError
478

    
479

    
480
class _QueryBase:
481
  """Base for query utility classes.
482

483
  """
484
  #: Attribute holding field definitions
485
  FIELDS = None
486

    
487
  def __init__(self, qfilter, fields, use_locking):
488
    """Initializes this class.
489

490
    """
491
    self.use_locking = use_locking
492

    
493
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
494
                             namefield="name")
495
    self.requested_data = self.query.RequestedData()
496
    self.names = self.query.RequestedNames()
497

    
498
    # Sort only if no names were requested
499
    self.sort_by_name = not self.names
500

    
501
    self.do_locking = None
502
    self.wanted = None
503

    
504
  def _GetNames(self, lu, all_names, lock_level):
505
    """Helper function to determine names asked for in the query.
506

507
    """
508
    if self.do_locking:
509
      names = lu.owned_locks(lock_level)
510
    else:
511
      names = all_names
512

    
513
    if self.wanted == locking.ALL_SET:
514
      assert not self.names
515
      # caller didn't specify names, so ordering is not important
516
      return utils.NiceSort(names)
517

    
518
    # caller specified names and we must keep the same order
519
    assert self.names
520
    assert not self.do_locking or lu.glm.is_owned(lock_level)
521

    
522
    missing = set(self.wanted).difference(names)
523
    if missing:
524
      raise errors.OpExecError("Some items were removed before retrieving"
525
                               " their data: %s" % missing)
526

    
527
    # Return expanded names
528
    return self.wanted
529

    
530
  def ExpandNames(self, lu):
531
    """Expand names for this query.
532

533
    See L{LogicalUnit.ExpandNames}.
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def DeclareLocks(self, lu, level):
539
    """Declare locks for this query.
540

541
    See L{LogicalUnit.DeclareLocks}.
542

543
    """
544
    raise NotImplementedError()
545

    
546
  def _GetQueryData(self, lu):
547
    """Collects all data for this query.
548

549
    @return: Query data object
550

551
    """
552
    raise NotImplementedError()
553

    
554
  def NewStyleQuery(self, lu):
555
    """Collect data and execute query.
556

557
    """
558
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559
                                  sort_by_name=self.sort_by_name)
560

    
561
  def OldStyleQuery(self, lu):
562
    """Collect data and execute query.
563

564
    """
565
    return self.query.OldStyleQuery(self._GetQueryData(lu),
566
                                    sort_by_name=self.sort_by_name)
567

    
568

    
569
def _ShareAll():
570
  """Returns a dict declaring all lock levels shared.
571

572
  """
573
  return dict.fromkeys(locking.LEVELS, 1)
574

    
575

    
576
def _MakeLegacyNodeInfo(data):
577
  """Formats the data returned by L{rpc.RpcRunner.call_node_info}.
578

579
  Converts the data into a single dictionary. This is fine for most use cases,
580
  but some require information from more than one volume group or hypervisor.
581

582
  """
583
  (bootid, (vg_info, ), (hv_info, )) = data
584

    
585
  return utils.JoinDisjointDicts(utils.JoinDisjointDicts(vg_info, hv_info), {
586
    "bootid": bootid,
587
    })
588

    
589

    
590
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
591
  """Checks if the owned node groups are still correct for an instance.
592

593
  @type cfg: L{config.ConfigWriter}
594
  @param cfg: The cluster configuration
595
  @type instance_name: string
596
  @param instance_name: Instance name
597
  @type owned_groups: set or frozenset
598
  @param owned_groups: List of currently owned node groups
599

600
  """
601
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
602

    
603
  if not owned_groups.issuperset(inst_groups):
604
    raise errors.OpPrereqError("Instance %s's node groups changed since"
605
                               " locks were acquired, current groups are"
606
                               " are '%s', owning groups '%s'; retry the"
607
                               " operation" %
608
                               (instance_name,
609
                                utils.CommaJoin(inst_groups),
610
                                utils.CommaJoin(owned_groups)),
611
                               errors.ECODE_STATE)
612

    
613
  return inst_groups
614

    
615

    
616
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
617
  """Checks if the instances in a node group are still correct.
618

619
  @type cfg: L{config.ConfigWriter}
620
  @param cfg: The cluster configuration
621
  @type group_uuid: string
622
  @param group_uuid: Node group UUID
623
  @type owned_instances: set or frozenset
624
  @param owned_instances: List of currently owned instances
625

626
  """
627
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
628
  if owned_instances != wanted_instances:
629
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
630
                               " locks were acquired, wanted '%s', have '%s';"
631
                               " retry the operation" %
632
                               (group_uuid,
633
                                utils.CommaJoin(wanted_instances),
634
                                utils.CommaJoin(owned_instances)),
635
                               errors.ECODE_STATE)
636

    
637
  return wanted_instances
638

    
639

    
640
def _SupportsOob(cfg, node):
641
  """Tells if node supports OOB.
642

643
  @type cfg: L{config.ConfigWriter}
644
  @param cfg: The cluster configuration
645
  @type node: L{objects.Node}
646
  @param node: The node
647
  @return: The OOB script if supported or an empty string otherwise
648

649
  """
650
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
651

    
652

    
653
def _GetWantedNodes(lu, nodes):
654
  """Returns list of checked and expanded node names.
655

656
  @type lu: L{LogicalUnit}
657
  @param lu: the logical unit on whose behalf we execute
658
  @type nodes: list
659
  @param nodes: list of node names or None for all nodes
660
  @rtype: list
661
  @return: the list of nodes, sorted
662
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
663

664
  """
665
  if nodes:
666
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
667

    
668
  return utils.NiceSort(lu.cfg.GetNodeList())
669

    
670

    
671
def _GetWantedInstances(lu, instances):
672
  """Returns list of checked and expanded instance names.
673

674
  @type lu: L{LogicalUnit}
675
  @param lu: the logical unit on whose behalf we execute
676
  @type instances: list
677
  @param instances: list of instance names or None for all instances
678
  @rtype: list
679
  @return: the list of instances, sorted
680
  @raise errors.OpPrereqError: if the instances parameter is wrong type
681
  @raise errors.OpPrereqError: if any of the passed instances is not found
682

683
  """
684
  if instances:
685
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
686
  else:
687
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
688
  return wanted
689

    
690

    
691
def _GetUpdatedParams(old_params, update_dict,
692
                      use_default=True, use_none=False):
693
  """Return the new version of a parameter dictionary.
694

695
  @type old_params: dict
696
  @param old_params: old parameters
697
  @type update_dict: dict
698
  @param update_dict: dict containing new parameter values, or
699
      constants.VALUE_DEFAULT to reset the parameter to its default
700
      value
701
  @param use_default: boolean
702
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
703
      values as 'to be deleted' values
704
  @param use_none: boolean
705
  @type use_none: whether to recognise C{None} values as 'to be
706
      deleted' values
707
  @rtype: dict
708
  @return: the new parameter dictionary
709

710
  """
711
  params_copy = copy.deepcopy(old_params)
712
  for key, val in update_dict.iteritems():
713
    if ((use_default and val == constants.VALUE_DEFAULT) or
714
        (use_none and val is None)):
715
      try:
716
        del params_copy[key]
717
      except KeyError:
718
        pass
719
    else:
720
      params_copy[key] = val
721
  return params_copy
722

    
723

    
724
def _ReleaseLocks(lu, level, names=None, keep=None):
725
  """Releases locks owned by an LU.
726

727
  @type lu: L{LogicalUnit}
728
  @param level: Lock level
729
  @type names: list or None
730
  @param names: Names of locks to release
731
  @type keep: list or None
732
  @param keep: Names of locks to retain
733

734
  """
735
  assert not (keep is not None and names is not None), \
736
         "Only one of the 'names' and the 'keep' parameters can be given"
737

    
738
  if names is not None:
739
    should_release = names.__contains__
740
  elif keep:
741
    should_release = lambda name: name not in keep
742
  else:
743
    should_release = None
744

    
745
  owned = lu.owned_locks(level)
746
  if not owned:
747
    # Not owning any lock at this level, do nothing
748
    pass
749

    
750
  elif should_release:
751
    retain = []
752
    release = []
753

    
754
    # Determine which locks to release
755
    for name in owned:
756
      if should_release(name):
757
        release.append(name)
758
      else:
759
        retain.append(name)
760

    
761
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
762

    
763
    # Release just some locks
764
    lu.glm.release(level, names=release)
765

    
766
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
767
  else:
768
    # Release everything
769
    lu.glm.release(level)
770

    
771
    assert not lu.glm.is_owned(level), "No locks should be owned"
772

    
773

    
774
def _MapInstanceDisksToNodes(instances):
775
  """Creates a map from (node, volume) to instance name.
776

777
  @type instances: list of L{objects.Instance}
778
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
779

780
  """
781
  return dict(((node, vol), inst.name)
782
              for inst in instances
783
              for (node, vols) in inst.MapLVsByNode().items()
784
              for vol in vols)
785

    
786

    
787
def _RunPostHook(lu, node_name):
788
  """Runs the post-hook for an opcode on a single node.
789

790
  """
791
  hm = lu.proc.BuildHooksManager(lu)
792
  try:
793
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
794
  except:
795
    # pylint: disable=W0702
796
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
797

    
798

    
799
def _CheckOutputFields(static, dynamic, selected):
800
  """Checks whether all selected fields are valid.
801

802
  @type static: L{utils.FieldSet}
803
  @param static: static fields set
804
  @type dynamic: L{utils.FieldSet}
805
  @param dynamic: dynamic fields set
806

807
  """
808
  f = utils.FieldSet()
809
  f.Extend(static)
810
  f.Extend(dynamic)
811

    
812
  delta = f.NonMatching(selected)
813
  if delta:
814
    raise errors.OpPrereqError("Unknown output fields selected: %s"
815
                               % ",".join(delta), errors.ECODE_INVAL)
816

    
817

    
818
def _CheckGlobalHvParams(params):
819
  """Validates that given hypervisor params are not global ones.
820

821
  This will ensure that instances don't get customised versions of
822
  global params.
823

824
  """
825
  used_globals = constants.HVC_GLOBALS.intersection(params)
826
  if used_globals:
827
    msg = ("The following hypervisor parameters are global and cannot"
828
           " be customized at instance level, please modify them at"
829
           " cluster level: %s" % utils.CommaJoin(used_globals))
830
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
831

    
832

    
833
def _CheckNodeOnline(lu, node, msg=None):
834
  """Ensure that a given node is online.
835

836
  @param lu: the LU on behalf of which we make the check
837
  @param node: the node to check
838
  @param msg: if passed, should be a message to replace the default one
839
  @raise errors.OpPrereqError: if the node is offline
840

841
  """
842
  if msg is None:
843
    msg = "Can't use offline node"
844
  if lu.cfg.GetNodeInfo(node).offline:
845
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
846

    
847

    
848
def _CheckNodeNotDrained(lu, node):
849
  """Ensure that a given node is not drained.
850

851
  @param lu: the LU on behalf of which we make the check
852
  @param node: the node to check
853
  @raise errors.OpPrereqError: if the node is drained
854

855
  """
856
  if lu.cfg.GetNodeInfo(node).drained:
857
    raise errors.OpPrereqError("Can't use drained node %s" % node,
858
                               errors.ECODE_STATE)
859

    
860

    
861
def _CheckNodeVmCapable(lu, node):
862
  """Ensure that a given node is vm capable.
863

864
  @param lu: the LU on behalf of which we make the check
865
  @param node: the node to check
866
  @raise errors.OpPrereqError: if the node is not vm capable
867

868
  """
869
  if not lu.cfg.GetNodeInfo(node).vm_capable:
870
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
871
                               errors.ECODE_STATE)
872

    
873

    
874
def _CheckNodeHasOS(lu, node, os_name, force_variant):
875
  """Ensure that a node supports a given OS.
876

877
  @param lu: the LU on behalf of which we make the check
878
  @param node: the node to check
879
  @param os_name: the OS to query about
880
  @param force_variant: whether to ignore variant errors
881
  @raise errors.OpPrereqError: if the node is not supporting the OS
882

883
  """
884
  result = lu.rpc.call_os_get(node, os_name)
885
  result.Raise("OS '%s' not in supported OS list for node %s" %
886
               (os_name, node),
887
               prereq=True, ecode=errors.ECODE_INVAL)
888
  if not force_variant:
889
    _CheckOSVariant(result.payload, os_name)
890

    
891

    
892
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
893
  """Ensure that a node has the given secondary ip.
894

895
  @type lu: L{LogicalUnit}
896
  @param lu: the LU on behalf of which we make the check
897
  @type node: string
898
  @param node: the node to check
899
  @type secondary_ip: string
900
  @param secondary_ip: the ip to check
901
  @type prereq: boolean
902
  @param prereq: whether to throw a prerequisite or an execute error
903
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
904
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
905

906
  """
907
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
908
  result.Raise("Failure checking secondary ip on node %s" % node,
909
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
910
  if not result.payload:
911
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
912
           " please fix and re-run this command" % secondary_ip)
913
    if prereq:
914
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
915
    else:
916
      raise errors.OpExecError(msg)
917

    
918

    
919
def _GetClusterDomainSecret():
920
  """Reads the cluster domain secret.
921

922
  """
923
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
924
                               strict=True)
925

    
926

    
927
def _CheckInstanceState(lu, instance, req_states, msg=None):
928
  """Ensure that an instance is in one of the required states.
929

930
  @param lu: the LU on behalf of which we make the check
931
  @param instance: the instance to check
932
  @param msg: if passed, should be a message to replace the default one
933
  @raise errors.OpPrereqError: if the instance is not in the required state
934

935
  """
936
  if msg is None:
937
    msg = "can't use instance from outside %s states" % ", ".join(req_states)
938
  if instance.admin_state not in req_states:
939
    raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
940
                               (instance, instance.admin_state, msg),
941
                               errors.ECODE_STATE)
942

    
943
  if constants.ADMINST_UP not in req_states:
944
    pnode = instance.primary_node
945
    ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
946
    ins_l.Raise("Can't contact node %s for instance information" % pnode,
947
                prereq=True, ecode=errors.ECODE_ENVIRON)
948

    
949
    if instance.name in ins_l.payload:
950
      raise errors.OpPrereqError("Instance %s is running, %s" %
951
                                 (instance.name, msg), errors.ECODE_STATE)
952

    
953

    
954
def _ExpandItemName(fn, name, kind):
955
  """Expand an item name.
956

957
  @param fn: the function to use for expansion
958
  @param name: requested item name
959
  @param kind: text description ('Node' or 'Instance')
960
  @return: the resolved (full) name
961
  @raise errors.OpPrereqError: if the item is not found
962

963
  """
964
  full_name = fn(name)
965
  if full_name is None:
966
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
967
                               errors.ECODE_NOENT)
968
  return full_name
969

    
970

    
971
def _ExpandNodeName(cfg, name):
972
  """Wrapper over L{_ExpandItemName} for nodes."""
973
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
974

    
975

    
976
def _ExpandInstanceName(cfg, name):
977
  """Wrapper over L{_ExpandItemName} for instance."""
978
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
979

    
980

    
981
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
982
                          minmem, maxmem, vcpus, nics, disk_template, disks,
983
                          bep, hvp, hypervisor_name, tags):
984
  """Builds instance related env variables for hooks
985

986
  This builds the hook environment from individual variables.
987

988
  @type name: string
989
  @param name: the name of the instance
990
  @type primary_node: string
991
  @param primary_node: the name of the instance's primary node
992
  @type secondary_nodes: list
993
  @param secondary_nodes: list of secondary nodes as strings
994
  @type os_type: string
995
  @param os_type: the name of the instance's OS
996
  @type status: string
997
  @param status: the desired status of the instance
998
  @type minmem: string
999
  @param minmem: the minimum memory size of the instance
1000
  @type maxmem: string
1001
  @param maxmem: the maximum memory size of the instance
1002
  @type vcpus: string
1003
  @param vcpus: the count of VCPUs the instance has
1004
  @type nics: list
1005
  @param nics: list of tuples (ip, mac, mode, link) representing
1006
      the NICs the instance has
1007
  @type disk_template: string
1008
  @param disk_template: the disk template of the instance
1009
  @type disks: list
1010
  @param disks: the list of (size, mode) pairs
1011
  @type bep: dict
1012
  @param bep: the backend parameters for the instance
1013
  @type hvp: dict
1014
  @param hvp: the hypervisor parameters for the instance
1015
  @type hypervisor_name: string
1016
  @param hypervisor_name: the hypervisor for the instance
1017
  @type tags: list
1018
  @param tags: list of instance tags as strings
1019
  @rtype: dict
1020
  @return: the hook environment for this instance
1021

1022
  """
1023
  env = {
1024
    "OP_TARGET": name,
1025
    "INSTANCE_NAME": name,
1026
    "INSTANCE_PRIMARY": primary_node,
1027
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1028
    "INSTANCE_OS_TYPE": os_type,
1029
    "INSTANCE_STATUS": status,
1030
    "INSTANCE_MINMEM": minmem,
1031
    "INSTANCE_MAXMEM": maxmem,
1032
    # TODO(2.7) remove deprecated "memory" value
1033
    "INSTANCE_MEMORY": maxmem,
1034
    "INSTANCE_VCPUS": vcpus,
1035
    "INSTANCE_DISK_TEMPLATE": disk_template,
1036
    "INSTANCE_HYPERVISOR": hypervisor_name,
1037
  }
1038
  if nics:
1039
    nic_count = len(nics)
1040
    for idx, (ip, mac, mode, link) in enumerate(nics):
1041
      if ip is None:
1042
        ip = ""
1043
      env["INSTANCE_NIC%d_IP" % idx] = ip
1044
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1045
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1046
      env["INSTANCE_NIC%d_LINK" % idx] = link
1047
      if mode == constants.NIC_MODE_BRIDGED:
1048
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1049
  else:
1050
    nic_count = 0
1051

    
1052
  env["INSTANCE_NIC_COUNT"] = nic_count
1053

    
1054
  if disks:
1055
    disk_count = len(disks)
1056
    for idx, (size, mode) in enumerate(disks):
1057
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1058
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1059
  else:
1060
    disk_count = 0
1061

    
1062
  env["INSTANCE_DISK_COUNT"] = disk_count
1063

    
1064
  if not tags:
1065
    tags = []
1066

    
1067
  env["INSTANCE_TAGS"] = " ".join(tags)
1068

    
1069
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1070
    for key, value in source.items():
1071
      env["INSTANCE_%s_%s" % (kind, key)] = value
1072

    
1073
  return env
1074

    
1075

    
1076
def _NICListToTuple(lu, nics):
1077
  """Build a list of nic information tuples.
1078

1079
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1080
  value in LUInstanceQueryData.
1081

1082
  @type lu:  L{LogicalUnit}
1083
  @param lu: the logical unit on whose behalf we execute
1084
  @type nics: list of L{objects.NIC}
1085
  @param nics: list of nics to convert to hooks tuples
1086

1087
  """
1088
  hooks_nics = []
1089
  cluster = lu.cfg.GetClusterInfo()
1090
  for nic in nics:
1091
    ip = nic.ip
1092
    mac = nic.mac
1093
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1094
    mode = filled_params[constants.NIC_MODE]
1095
    link = filled_params[constants.NIC_LINK]
1096
    hooks_nics.append((ip, mac, mode, link))
1097
  return hooks_nics
1098

    
1099

    
1100
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1101
  """Builds instance related env variables for hooks from an object.
1102

1103
  @type lu: L{LogicalUnit}
1104
  @param lu: the logical unit on whose behalf we execute
1105
  @type instance: L{objects.Instance}
1106
  @param instance: the instance for which we should build the
1107
      environment
1108
  @type override: dict
1109
  @param override: dictionary with key/values that will override
1110
      our values
1111
  @rtype: dict
1112
  @return: the hook environment dictionary
1113

1114
  """
1115
  cluster = lu.cfg.GetClusterInfo()
1116
  bep = cluster.FillBE(instance)
1117
  hvp = cluster.FillHV(instance)
1118
  args = {
1119
    "name": instance.name,
1120
    "primary_node": instance.primary_node,
1121
    "secondary_nodes": instance.secondary_nodes,
1122
    "os_type": instance.os,
1123
    "status": instance.admin_state,
1124
    "maxmem": bep[constants.BE_MAXMEM],
1125
    "minmem": bep[constants.BE_MINMEM],
1126
    "vcpus": bep[constants.BE_VCPUS],
1127
    "nics": _NICListToTuple(lu, instance.nics),
1128
    "disk_template": instance.disk_template,
1129
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1130
    "bep": bep,
1131
    "hvp": hvp,
1132
    "hypervisor_name": instance.hypervisor,
1133
    "tags": instance.tags,
1134
  }
1135
  if override:
1136
    args.update(override)
1137
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1138

    
1139

    
1140
def _AdjustCandidatePool(lu, exceptions):
1141
  """Adjust the candidate pool after node operations.
1142

1143
  """
1144
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1145
  if mod_list:
1146
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1147
               utils.CommaJoin(node.name for node in mod_list))
1148
    for name in mod_list:
1149
      lu.context.ReaddNode(name)
1150
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1151
  if mc_now > mc_max:
1152
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1153
               (mc_now, mc_max))
1154

    
1155

    
1156
def _DecideSelfPromotion(lu, exceptions=None):
1157
  """Decide whether I should promote myself as a master candidate.
1158

1159
  """
1160
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1161
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1162
  # the new node will increase mc_max with one, so:
1163
  mc_should = min(mc_should + 1, cp_size)
1164
  return mc_now < mc_should
1165

    
1166

    
1167
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1168
  """Check that the brigdes needed by a list of nics exist.
1169

1170
  """
1171
  cluster = lu.cfg.GetClusterInfo()
1172
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1173
  brlist = [params[constants.NIC_LINK] for params in paramslist
1174
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1175
  if brlist:
1176
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1177
    result.Raise("Error checking bridges on destination node '%s'" %
1178
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1179

    
1180

    
1181
def _CheckInstanceBridgesExist(lu, instance, node=None):
1182
  """Check that the brigdes needed by an instance exist.
1183

1184
  """
1185
  if node is None:
1186
    node = instance.primary_node
1187
  _CheckNicsBridgesExist(lu, instance.nics, node)
1188

    
1189

    
1190
def _CheckOSVariant(os_obj, name):
1191
  """Check whether an OS name conforms to the os variants specification.
1192

1193
  @type os_obj: L{objects.OS}
1194
  @param os_obj: OS object to check
1195
  @type name: string
1196
  @param name: OS name passed by the user, to check for validity
1197

1198
  """
1199
  variant = objects.OS.GetVariant(name)
1200
  if not os_obj.supported_variants:
1201
    if variant:
1202
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1203
                                 " passed)" % (os_obj.name, variant),
1204
                                 errors.ECODE_INVAL)
1205
    return
1206
  if not variant:
1207
    raise errors.OpPrereqError("OS name must include a variant",
1208
                               errors.ECODE_INVAL)
1209

    
1210
  if variant not in os_obj.supported_variants:
1211
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1212

    
1213

    
1214
def _GetNodeInstancesInner(cfg, fn):
1215
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1216

    
1217

    
1218
def _GetNodeInstances(cfg, node_name):
1219
  """Returns a list of all primary and secondary instances on a node.
1220

1221
  """
1222

    
1223
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1224

    
1225

    
1226
def _GetNodePrimaryInstances(cfg, node_name):
1227
  """Returns primary instances on a node.
1228

1229
  """
1230
  return _GetNodeInstancesInner(cfg,
1231
                                lambda inst: node_name == inst.primary_node)
1232

    
1233

    
1234
def _GetNodeSecondaryInstances(cfg, node_name):
1235
  """Returns secondary instances on a node.
1236

1237
  """
1238
  return _GetNodeInstancesInner(cfg,
1239
                                lambda inst: node_name in inst.secondary_nodes)
1240

    
1241

    
1242
def _GetStorageTypeArgs(cfg, storage_type):
1243
  """Returns the arguments for a storage type.
1244

1245
  """
1246
  # Special case for file storage
1247
  if storage_type == constants.ST_FILE:
1248
    # storage.FileStorage wants a list of storage directories
1249
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1250

    
1251
  return []
1252

    
1253

    
1254
def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1255
  faulty = []
1256

    
1257
  for dev in instance.disks:
1258
    cfg.SetDiskID(dev, node_name)
1259

    
1260
  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1261
  result.Raise("Failed to get disk status from node %s" % node_name,
1262
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1263

    
1264
  for idx, bdev_status in enumerate(result.payload):
1265
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1266
      faulty.append(idx)
1267

    
1268
  return faulty
1269

    
1270

    
1271
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1272
  """Check the sanity of iallocator and node arguments and use the
1273
  cluster-wide iallocator if appropriate.
1274

1275
  Check that at most one of (iallocator, node) is specified. If none is
1276
  specified, then the LU's opcode's iallocator slot is filled with the
1277
  cluster-wide default iallocator.
1278

1279
  @type iallocator_slot: string
1280
  @param iallocator_slot: the name of the opcode iallocator slot
1281
  @type node_slot: string
1282
  @param node_slot: the name of the opcode target node slot
1283

1284
  """
1285
  node = getattr(lu.op, node_slot, None)
1286
  iallocator = getattr(lu.op, iallocator_slot, None)
1287

    
1288
  if node is not None and iallocator is not None:
1289
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1290
                               errors.ECODE_INVAL)
1291
  elif node is None and iallocator is None:
1292
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1293
    if default_iallocator:
1294
      setattr(lu.op, iallocator_slot, default_iallocator)
1295
    else:
1296
      raise errors.OpPrereqError("No iallocator or node given and no"
1297
                                 " cluster-wide default iallocator found;"
1298
                                 " please specify either an iallocator or a"
1299
                                 " node, or set a cluster-wide default"
1300
                                 " iallocator")
1301

    
1302

    
1303
def _GetDefaultIAllocator(cfg, iallocator):
1304
  """Decides on which iallocator to use.
1305

1306
  @type cfg: L{config.ConfigWriter}
1307
  @param cfg: Cluster configuration object
1308
  @type iallocator: string or None
1309
  @param iallocator: Iallocator specified in opcode
1310
  @rtype: string
1311
  @return: Iallocator name
1312

1313
  """
1314
  if not iallocator:
1315
    # Use default iallocator
1316
    iallocator = cfg.GetDefaultIAllocator()
1317

    
1318
  if not iallocator:
1319
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1320
                               " opcode nor as a cluster-wide default",
1321
                               errors.ECODE_INVAL)
1322

    
1323
  return iallocator
1324

    
1325

    
1326
class LUClusterPostInit(LogicalUnit):
1327
  """Logical unit for running hooks after cluster initialization.
1328

1329
  """
1330
  HPATH = "cluster-init"
1331
  HTYPE = constants.HTYPE_CLUSTER
1332

    
1333
  def BuildHooksEnv(self):
1334
    """Build hooks env.
1335

1336
    """
1337
    return {
1338
      "OP_TARGET": self.cfg.GetClusterName(),
1339
      }
1340

    
1341
  def BuildHooksNodes(self):
1342
    """Build hooks nodes.
1343

1344
    """
1345
    return ([], [self.cfg.GetMasterNode()])
1346

    
1347
  def Exec(self, feedback_fn):
1348
    """Nothing to do.
1349

1350
    """
1351
    return True
1352

    
1353

    
1354
class LUClusterDestroy(LogicalUnit):
1355
  """Logical unit for destroying the cluster.
1356

1357
  """
1358
  HPATH = "cluster-destroy"
1359
  HTYPE = constants.HTYPE_CLUSTER
1360

    
1361
  def BuildHooksEnv(self):
1362
    """Build hooks env.
1363

1364
    """
1365
    return {
1366
      "OP_TARGET": self.cfg.GetClusterName(),
1367
      }
1368

    
1369
  def BuildHooksNodes(self):
1370
    """Build hooks nodes.
1371

1372
    """
1373
    return ([], [])
1374

    
1375
  def CheckPrereq(self):
1376
    """Check prerequisites.
1377

1378
    This checks whether the cluster is empty.
1379

1380
    Any errors are signaled by raising errors.OpPrereqError.
1381

1382
    """
1383
    master = self.cfg.GetMasterNode()
1384

    
1385
    nodelist = self.cfg.GetNodeList()
1386
    if len(nodelist) != 1 or nodelist[0] != master:
1387
      raise errors.OpPrereqError("There are still %d node(s) in"
1388
                                 " this cluster." % (len(nodelist) - 1),
1389
                                 errors.ECODE_INVAL)
1390
    instancelist = self.cfg.GetInstanceList()
1391
    if instancelist:
1392
      raise errors.OpPrereqError("There are still %d instance(s) in"
1393
                                 " this cluster." % len(instancelist),
1394
                                 errors.ECODE_INVAL)
1395

    
1396
  def Exec(self, feedback_fn):
1397
    """Destroys the cluster.
1398

1399
    """
1400
    master_params = self.cfg.GetMasterNetworkParameters()
1401

    
1402
    # Run post hooks on master node before it's removed
1403
    _RunPostHook(self, master_params.name)
1404

    
1405
    ems = self.cfg.GetUseExternalMipScript()
1406
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1407
                                                     master_params, ems)
1408
    result.Raise("Could not disable the master role")
1409

    
1410
    return master_params.name
1411

    
1412

    
1413
def _VerifyCertificate(filename):
1414
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1415

1416
  @type filename: string
1417
  @param filename: Path to PEM file
1418

1419
  """
1420
  try:
1421
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1422
                                           utils.ReadFile(filename))
1423
  except Exception, err: # pylint: disable=W0703
1424
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1425
            "Failed to load X509 certificate %s: %s" % (filename, err))
1426

    
1427
  (errcode, msg) = \
1428
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1429
                                constants.SSL_CERT_EXPIRATION_ERROR)
1430

    
1431
  if msg:
1432
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1433
  else:
1434
    fnamemsg = None
1435

    
1436
  if errcode is None:
1437
    return (None, fnamemsg)
1438
  elif errcode == utils.CERT_WARNING:
1439
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1440
  elif errcode == utils.CERT_ERROR:
1441
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1442

    
1443
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1444

    
1445

    
1446
def _GetAllHypervisorParameters(cluster, instances):
1447
  """Compute the set of all hypervisor parameters.
1448

1449
  @type cluster: L{objects.Cluster}
1450
  @param cluster: the cluster object
1451
  @param instances: list of L{objects.Instance}
1452
  @param instances: additional instances from which to obtain parameters
1453
  @rtype: list of (origin, hypervisor, parameters)
1454
  @return: a list with all parameters found, indicating the hypervisor they
1455
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1456

1457
  """
1458
  hvp_data = []
1459

    
1460
  for hv_name in cluster.enabled_hypervisors:
1461
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1462

    
1463
  for os_name, os_hvp in cluster.os_hvp.items():
1464
    for hv_name, hv_params in os_hvp.items():
1465
      if hv_params:
1466
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1467
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1468

    
1469
  # TODO: collapse identical parameter values in a single one
1470
  for instance in instances:
1471
    if instance.hvparams:
1472
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1473
                       cluster.FillHV(instance)))
1474

    
1475
  return hvp_data
1476

    
1477

    
1478
class _VerifyErrors(object):
1479
  """Mix-in for cluster/group verify LUs.
1480

1481
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1482
  self.op and self._feedback_fn to be available.)
1483

1484
  """
1485

    
1486
  ETYPE_FIELD = "code"
1487
  ETYPE_ERROR = "ERROR"
1488
  ETYPE_WARNING = "WARNING"
1489

    
1490
  def _Error(self, ecode, item, msg, *args, **kwargs):
1491
    """Format an error message.
1492

1493
    Based on the opcode's error_codes parameter, either format a
1494
    parseable error code, or a simpler error string.
1495

1496
    This must be called only from Exec and functions called from Exec.
1497

1498
    """
1499
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1500
    itype, etxt, _ = ecode
1501
    # first complete the msg
1502
    if args:
1503
      msg = msg % args
1504
    # then format the whole message
1505
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1506
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1507
    else:
1508
      if item:
1509
        item = " " + item
1510
      else:
1511
        item = ""
1512
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1513
    # and finally report it via the feedback_fn
1514
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1515

    
1516
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1517
    """Log an error message if the passed condition is True.
1518

1519
    """
1520
    cond = (bool(cond)
1521
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1522

    
1523
    # If the error code is in the list of ignored errors, demote the error to a
1524
    # warning
1525
    (_, etxt, _) = ecode
1526
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1527
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1528

    
1529
    if cond:
1530
      self._Error(ecode, *args, **kwargs)
1531

    
1532
    # do not mark the operation as failed for WARN cases only
1533
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1534
      self.bad = self.bad or cond
1535

    
1536

    
1537
class LUClusterVerify(NoHooksLU):
1538
  """Submits all jobs necessary to verify the cluster.
1539

1540
  """
1541
  REQ_BGL = False
1542

    
1543
  def ExpandNames(self):
1544
    self.needed_locks = {}
1545

    
1546
  def Exec(self, feedback_fn):
1547
    jobs = []
1548

    
1549
    if self.op.group_name:
1550
      groups = [self.op.group_name]
1551
      depends_fn = lambda: None
1552
    else:
1553
      groups = self.cfg.GetNodeGroupList()
1554

    
1555
      # Verify global configuration
1556
      jobs.append([
1557
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1558
        ])
1559

    
1560
      # Always depend on global verification
1561
      depends_fn = lambda: [(-len(jobs), [])]
1562

    
1563
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1564
                                            ignore_errors=self.op.ignore_errors,
1565
                                            depends=depends_fn())]
1566
                for group in groups)
1567

    
1568
    # Fix up all parameters
1569
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1570
      op.debug_simulate_errors = self.op.debug_simulate_errors
1571
      op.verbose = self.op.verbose
1572
      op.error_codes = self.op.error_codes
1573
      try:
1574
        op.skip_checks = self.op.skip_checks
1575
      except AttributeError:
1576
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1577

    
1578
    return ResultWithJobs(jobs)
1579

    
1580

    
1581
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1582
  """Verifies the cluster config.
1583

1584
  """
1585
  REQ_BGL = True
1586

    
1587
  def _VerifyHVP(self, hvp_data):
1588
    """Verifies locally the syntax of the hypervisor parameters.
1589

1590
    """
1591
    for item, hv_name, hv_params in hvp_data:
1592
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1593
             (item, hv_name))
1594
      try:
1595
        hv_class = hypervisor.GetHypervisor(hv_name)
1596
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1597
        hv_class.CheckParameterSyntax(hv_params)
1598
      except errors.GenericError, err:
1599
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1600

    
1601
  def ExpandNames(self):
1602
    # Information can be safely retrieved as the BGL is acquired in exclusive
1603
    # mode
1604
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1605
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1606
    self.all_node_info = self.cfg.GetAllNodesInfo()
1607
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1608
    self.needed_locks = {}
1609

    
1610
  def Exec(self, feedback_fn):
1611
    """Verify integrity of cluster, performing various test on nodes.
1612

1613
    """
1614
    self.bad = False
1615
    self._feedback_fn = feedback_fn
1616

    
1617
    feedback_fn("* Verifying cluster config")
1618

    
1619
    for msg in self.cfg.VerifyConfig():
1620
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1621

    
1622
    feedback_fn("* Verifying cluster certificate files")
1623

    
1624
    for cert_filename in constants.ALL_CERT_FILES:
1625
      (errcode, msg) = _VerifyCertificate(cert_filename)
1626
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1627

    
1628
    feedback_fn("* Verifying hypervisor parameters")
1629

    
1630
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1631
                                                self.all_inst_info.values()))
1632

    
1633
    feedback_fn("* Verifying all nodes belong to an existing group")
1634

    
1635
    # We do this verification here because, should this bogus circumstance
1636
    # occur, it would never be caught by VerifyGroup, which only acts on
1637
    # nodes/instances reachable from existing node groups.
1638

    
1639
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1640
                         if node.group not in self.all_group_info)
1641

    
1642
    dangling_instances = {}
1643
    no_node_instances = []
1644

    
1645
    for inst in self.all_inst_info.values():
1646
      if inst.primary_node in dangling_nodes:
1647
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1648
      elif inst.primary_node not in self.all_node_info:
1649
        no_node_instances.append(inst.name)
1650

    
1651
    pretty_dangling = [
1652
        "%s (%s)" %
1653
        (node.name,
1654
         utils.CommaJoin(dangling_instances.get(node.name,
1655
                                                ["no instances"])))
1656
        for node in dangling_nodes]
1657

    
1658
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1659
                  None,
1660
                  "the following nodes (and their instances) belong to a non"
1661
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1662

    
1663
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1664
                  None,
1665
                  "the following instances have a non-existing primary-node:"
1666
                  " %s", utils.CommaJoin(no_node_instances))
1667

    
1668
    return not self.bad
1669

    
1670

    
1671
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1672
  """Verifies the status of a node group.
1673

1674
  """
1675
  HPATH = "cluster-verify"
1676
  HTYPE = constants.HTYPE_CLUSTER
1677
  REQ_BGL = False
1678

    
1679
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1680

    
1681
  class NodeImage(object):
1682
    """A class representing the logical and physical status of a node.
1683

1684
    @type name: string
1685
    @ivar name: the node name to which this object refers
1686
    @ivar volumes: a structure as returned from
1687
        L{ganeti.backend.GetVolumeList} (runtime)
1688
    @ivar instances: a list of running instances (runtime)
1689
    @ivar pinst: list of configured primary instances (config)
1690
    @ivar sinst: list of configured secondary instances (config)
1691
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1692
        instances for which this node is secondary (config)
1693
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1694
    @ivar dfree: free disk, as reported by the node (runtime)
1695
    @ivar offline: the offline status (config)
1696
    @type rpc_fail: boolean
1697
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1698
        not whether the individual keys were correct) (runtime)
1699
    @type lvm_fail: boolean
1700
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1701
    @type hyp_fail: boolean
1702
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1703
    @type ghost: boolean
1704
    @ivar ghost: whether this is a known node or not (config)
1705
    @type os_fail: boolean
1706
    @ivar os_fail: whether the RPC call didn't return valid OS data
1707
    @type oslist: list
1708
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1709
    @type vm_capable: boolean
1710
    @ivar vm_capable: whether the node can host instances
1711

1712
    """
1713
    def __init__(self, offline=False, name=None, vm_capable=True):
1714
      self.name = name
1715
      self.volumes = {}
1716
      self.instances = []
1717
      self.pinst = []
1718
      self.sinst = []
1719
      self.sbp = {}
1720
      self.mfree = 0
1721
      self.dfree = 0
1722
      self.offline = offline
1723
      self.vm_capable = vm_capable
1724
      self.rpc_fail = False
1725
      self.lvm_fail = False
1726
      self.hyp_fail = False
1727
      self.ghost = False
1728
      self.os_fail = False
1729
      self.oslist = {}
1730

    
1731
  def ExpandNames(self):
1732
    # This raises errors.OpPrereqError on its own:
1733
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1734

    
1735
    # Get instances in node group; this is unsafe and needs verification later
1736
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1737

    
1738
    self.needed_locks = {
1739
      locking.LEVEL_INSTANCE: inst_names,
1740
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1741
      locking.LEVEL_NODE: [],
1742
      }
1743

    
1744
    self.share_locks = _ShareAll()
1745

    
1746
  def DeclareLocks(self, level):
1747
    if level == locking.LEVEL_NODE:
1748
      # Get members of node group; this is unsafe and needs verification later
1749
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1750

    
1751
      all_inst_info = self.cfg.GetAllInstancesInfo()
1752

    
1753
      # In Exec(), we warn about mirrored instances that have primary and
1754
      # secondary living in separate node groups. To fully verify that
1755
      # volumes for these instances are healthy, we will need to do an
1756
      # extra call to their secondaries. We ensure here those nodes will
1757
      # be locked.
1758
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1759
        # Important: access only the instances whose lock is owned
1760
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1761
          nodes.update(all_inst_info[inst].secondary_nodes)
1762

    
1763
      self.needed_locks[locking.LEVEL_NODE] = nodes
1764

    
1765
  def CheckPrereq(self):
1766
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1767
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1768

    
1769
    group_nodes = set(self.group_info.members)
1770
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1771

    
1772
    unlocked_nodes = \
1773
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1774

    
1775
    unlocked_instances = \
1776
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1777

    
1778
    if unlocked_nodes:
1779
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1780
                                 utils.CommaJoin(unlocked_nodes))
1781

    
1782
    if unlocked_instances:
1783
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1784
                                 utils.CommaJoin(unlocked_instances))
1785

    
1786
    self.all_node_info = self.cfg.GetAllNodesInfo()
1787
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1788

    
1789
    self.my_node_names = utils.NiceSort(group_nodes)
1790
    self.my_inst_names = utils.NiceSort(group_instances)
1791

    
1792
    self.my_node_info = dict((name, self.all_node_info[name])
1793
                             for name in self.my_node_names)
1794

    
1795
    self.my_inst_info = dict((name, self.all_inst_info[name])
1796
                             for name in self.my_inst_names)
1797

    
1798
    # We detect here the nodes that will need the extra RPC calls for verifying
1799
    # split LV volumes; they should be locked.
1800
    extra_lv_nodes = set()
1801

    
1802
    for inst in self.my_inst_info.values():
1803
      if inst.disk_template in constants.DTS_INT_MIRROR:
1804
        group = self.my_node_info[inst.primary_node].group
1805
        for nname in inst.secondary_nodes:
1806
          if self.all_node_info[nname].group != group:
1807
            extra_lv_nodes.add(nname)
1808

    
1809
    unlocked_lv_nodes = \
1810
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1811

    
1812
    if unlocked_lv_nodes:
1813
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1814
                                 utils.CommaJoin(unlocked_lv_nodes))
1815
    self.extra_lv_nodes = list(extra_lv_nodes)
1816

    
1817
  def _VerifyNode(self, ninfo, nresult):
1818
    """Perform some basic validation on data returned from a node.
1819

1820
      - check the result data structure is well formed and has all the
1821
        mandatory fields
1822
      - check ganeti version
1823

1824
    @type ninfo: L{objects.Node}
1825
    @param ninfo: the node to check
1826
    @param nresult: the results from the node
1827
    @rtype: boolean
1828
    @return: whether overall this call was successful (and we can expect
1829
         reasonable values in the respose)
1830

1831
    """
1832
    node = ninfo.name
1833
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1834

    
1835
    # main result, nresult should be a non-empty dict
1836
    test = not nresult or not isinstance(nresult, dict)
1837
    _ErrorIf(test, constants.CV_ENODERPC, node,
1838
                  "unable to verify node: no data returned")
1839
    if test:
1840
      return False
1841

    
1842
    # compares ganeti version
1843
    local_version = constants.PROTOCOL_VERSION
1844
    remote_version = nresult.get("version", None)
1845
    test = not (remote_version and
1846
                isinstance(remote_version, (list, tuple)) and
1847
                len(remote_version) == 2)
1848
    _ErrorIf(test, constants.CV_ENODERPC, node,
1849
             "connection to node returned invalid data")
1850
    if test:
1851
      return False
1852

    
1853
    test = local_version != remote_version[0]
1854
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1855
             "incompatible protocol versions: master %s,"
1856
             " node %s", local_version, remote_version[0])
1857
    if test:
1858
      return False
1859

    
1860
    # node seems compatible, we can actually try to look into its results
1861

    
1862
    # full package version
1863
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1864
                  constants.CV_ENODEVERSION, node,
1865
                  "software version mismatch: master %s, node %s",
1866
                  constants.RELEASE_VERSION, remote_version[1],
1867
                  code=self.ETYPE_WARNING)
1868

    
1869
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1870
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1871
      for hv_name, hv_result in hyp_result.iteritems():
1872
        test = hv_result is not None
1873
        _ErrorIf(test, constants.CV_ENODEHV, node,
1874
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1875

    
1876
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1877
    if ninfo.vm_capable and isinstance(hvp_result, list):
1878
      for item, hv_name, hv_result in hvp_result:
1879
        _ErrorIf(True, constants.CV_ENODEHV, node,
1880
                 "hypervisor %s parameter verify failure (source %s): %s",
1881
                 hv_name, item, hv_result)
1882

    
1883
    test = nresult.get(constants.NV_NODESETUP,
1884
                       ["Missing NODESETUP results"])
1885
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1886
             "; ".join(test))
1887

    
1888
    return True
1889

    
1890
  def _VerifyNodeTime(self, ninfo, nresult,
1891
                      nvinfo_starttime, nvinfo_endtime):
1892
    """Check the node time.
1893

1894
    @type ninfo: L{objects.Node}
1895
    @param ninfo: the node to check
1896
    @param nresult: the remote results for the node
1897
    @param nvinfo_starttime: the start time of the RPC call
1898
    @param nvinfo_endtime: the end time of the RPC call
1899

1900
    """
1901
    node = ninfo.name
1902
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1903

    
1904
    ntime = nresult.get(constants.NV_TIME, None)
1905
    try:
1906
      ntime_merged = utils.MergeTime(ntime)
1907
    except (ValueError, TypeError):
1908
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1909
      return
1910

    
1911
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1912
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1913
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1914
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1915
    else:
1916
      ntime_diff = None
1917

    
1918
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1919
             "Node time diverges by at least %s from master node time",
1920
             ntime_diff)
1921

    
1922
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1923
    """Check the node LVM results.
1924

1925
    @type ninfo: L{objects.Node}
1926
    @param ninfo: the node to check
1927
    @param nresult: the remote results for the node
1928
    @param vg_name: the configured VG name
1929

1930
    """
1931
    if vg_name is None:
1932
      return
1933

    
1934
    node = ninfo.name
1935
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1936

    
1937
    # checks vg existence and size > 20G
1938
    vglist = nresult.get(constants.NV_VGLIST, None)
1939
    test = not vglist
1940
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1941
    if not test:
1942
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1943
                                            constants.MIN_VG_SIZE)
1944
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1945

    
1946
    # check pv names
1947
    pvlist = nresult.get(constants.NV_PVLIST, None)
1948
    test = pvlist is None
1949
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1950
    if not test:
1951
      # check that ':' is not present in PV names, since it's a
1952
      # special character for lvcreate (denotes the range of PEs to
1953
      # use on the PV)
1954
      for _, pvname, owner_vg in pvlist:
1955
        test = ":" in pvname
1956
        _ErrorIf(test, constants.CV_ENODELVM, node,
1957
                 "Invalid character ':' in PV '%s' of VG '%s'",
1958
                 pvname, owner_vg)
1959

    
1960
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1961
    """Check the node bridges.
1962

1963
    @type ninfo: L{objects.Node}
1964
    @param ninfo: the node to check
1965
    @param nresult: the remote results for the node
1966
    @param bridges: the expected list of bridges
1967

1968
    """
1969
    if not bridges:
1970
      return
1971

    
1972
    node = ninfo.name
1973
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1974

    
1975
    missing = nresult.get(constants.NV_BRIDGES, None)
1976
    test = not isinstance(missing, list)
1977
    _ErrorIf(test, constants.CV_ENODENET, node,
1978
             "did not return valid bridge information")
1979
    if not test:
1980
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1981
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1982

    
1983
  def _VerifyNodeUserScripts(self, ninfo, nresult):
1984
    """Check the results of user scripts presence and executability on the node
1985

1986
    @type ninfo: L{objects.Node}
1987
    @param ninfo: the node to check
1988
    @param nresult: the remote results for the node
1989

1990
    """
1991
    node = ninfo.name
1992

    
1993
    test = not constants.NV_USERSCRIPTS in nresult
1994
    self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
1995
                  "did not return user scripts information")
1996

    
1997
    broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
1998
    if not test:
1999
      self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
2000
                    "user scripts not present or not executable: %s" %
2001
                    utils.CommaJoin(sorted(broken_scripts)))
2002

    
2003
  def _VerifyNodeNetwork(self, ninfo, nresult):
2004
    """Check the node network connectivity results.
2005

2006
    @type ninfo: L{objects.Node}
2007
    @param ninfo: the node to check
2008
    @param nresult: the remote results for the node
2009

2010
    """
2011
    node = ninfo.name
2012
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2013

    
2014
    test = constants.NV_NODELIST not in nresult
2015
    _ErrorIf(test, constants.CV_ENODESSH, node,
2016
             "node hasn't returned node ssh connectivity data")
2017
    if not test:
2018
      if nresult[constants.NV_NODELIST]:
2019
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2020
          _ErrorIf(True, constants.CV_ENODESSH, node,
2021
                   "ssh communication with node '%s': %s", a_node, a_msg)
2022

    
2023
    test = constants.NV_NODENETTEST not in nresult
2024
    _ErrorIf(test, constants.CV_ENODENET, node,
2025
             "node hasn't returned node tcp connectivity data")
2026
    if not test:
2027
      if nresult[constants.NV_NODENETTEST]:
2028
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2029
        for anode in nlist:
2030
          _ErrorIf(True, constants.CV_ENODENET, node,
2031
                   "tcp communication with node '%s': %s",
2032
                   anode, nresult[constants.NV_NODENETTEST][anode])
2033

    
2034
    test = constants.NV_MASTERIP not in nresult
2035
    _ErrorIf(test, constants.CV_ENODENET, node,
2036
             "node hasn't returned node master IP reachability data")
2037
    if not test:
2038
      if not nresult[constants.NV_MASTERIP]:
2039
        if node == self.master_node:
2040
          msg = "the master node cannot reach the master IP (not configured?)"
2041
        else:
2042
          msg = "cannot reach the master IP"
2043
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
2044

    
2045
  def _VerifyInstance(self, instance, instanceconfig, node_image,
2046
                      diskstatus):
2047
    """Verify an instance.
2048

2049
    This function checks to see if the required block devices are
2050
    available on the instance's node.
2051

2052
    """
2053
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2054
    node_current = instanceconfig.primary_node
2055

    
2056
    node_vol_should = {}
2057
    instanceconfig.MapLVsByNode(node_vol_should)
2058

    
2059
    for node in node_vol_should:
2060
      n_img = node_image[node]
2061
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2062
        # ignore missing volumes on offline or broken nodes
2063
        continue
2064
      for volume in node_vol_should[node]:
2065
        test = volume not in n_img.volumes
2066
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2067
                 "volume %s missing on node %s", volume, node)
2068

    
2069
    if instanceconfig.admin_state == constants.ADMINST_UP:
2070
      pri_img = node_image[node_current]
2071
      test = instance not in pri_img.instances and not pri_img.offline
2072
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2073
               "instance not running on its primary node %s",
2074
               node_current)
2075

    
2076
    diskdata = [(nname, success, status, idx)
2077
                for (nname, disks) in diskstatus.items()
2078
                for idx, (success, status) in enumerate(disks)]
2079

    
2080
    for nname, success, bdev_status, idx in diskdata:
2081
      # the 'ghost node' construction in Exec() ensures that we have a
2082
      # node here
2083
      snode = node_image[nname]
2084
      bad_snode = snode.ghost or snode.offline
2085
      _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2086
               not success and not bad_snode,
2087
               constants.CV_EINSTANCEFAULTYDISK, instance,
2088
               "couldn't retrieve status for disk/%s on %s: %s",
2089
               idx, nname, bdev_status)
2090
      _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2091
                success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2092
               constants.CV_EINSTANCEFAULTYDISK, instance,
2093
               "disk/%s on %s is faulty", idx, nname)
2094

    
2095
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2096
    """Verify if there are any unknown volumes in the cluster.
2097

2098
    The .os, .swap and backup volumes are ignored. All other volumes are
2099
    reported as unknown.
2100

2101
    @type reserved: L{ganeti.utils.FieldSet}
2102
    @param reserved: a FieldSet of reserved volume names
2103

2104
    """
2105
    for node, n_img in node_image.items():
2106
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2107
        # skip non-healthy nodes
2108
        continue
2109
      for volume in n_img.volumes:
2110
        test = ((node not in node_vol_should or
2111
                volume not in node_vol_should[node]) and
2112
                not reserved.Matches(volume))
2113
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2114
                      "volume %s is unknown", volume)
2115

    
2116
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2117
    """Verify N+1 Memory Resilience.
2118

2119
    Check that if one single node dies we can still start all the
2120
    instances it was primary for.
2121

2122
    """
2123
    cluster_info = self.cfg.GetClusterInfo()
2124
    for node, n_img in node_image.items():
2125
      # This code checks that every node which is now listed as
2126
      # secondary has enough memory to host all instances it is
2127
      # supposed to should a single other node in the cluster fail.
2128
      # FIXME: not ready for failover to an arbitrary node
2129
      # FIXME: does not support file-backed instances
2130
      # WARNING: we currently take into account down instances as well
2131
      # as up ones, considering that even if they're down someone
2132
      # might want to start them even in the event of a node failure.
2133
      if n_img.offline:
2134
        # we're skipping offline nodes from the N+1 warning, since
2135
        # most likely we don't have good memory infromation from them;
2136
        # we already list instances living on such nodes, and that's
2137
        # enough warning
2138
        continue
2139
      #TODO(dynmem): use MINMEM for checking
2140
      #TODO(dynmem): also consider ballooning out other instances
2141
      for prinode, instances in n_img.sbp.items():
2142
        needed_mem = 0
2143
        for instance in instances:
2144
          bep = cluster_info.FillBE(instance_cfg[instance])
2145
          if bep[constants.BE_AUTO_BALANCE]:
2146
            needed_mem += bep[constants.BE_MAXMEM]
2147
        test = n_img.mfree < needed_mem
2148
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2149
                      "not enough memory to accomodate instance failovers"
2150
                      " should node %s fail (%dMiB needed, %dMiB available)",
2151
                      prinode, needed_mem, n_img.mfree)
2152

    
2153
  @classmethod
2154
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2155
                   (files_all, files_opt, files_mc, files_vm)):
2156
    """Verifies file checksums collected from all nodes.
2157

2158
    @param errorif: Callback for reporting errors
2159
    @param nodeinfo: List of L{objects.Node} objects
2160
    @param master_node: Name of master node
2161
    @param all_nvinfo: RPC results
2162

2163
    """
2164
    # Define functions determining which nodes to consider for a file
2165
    files2nodefn = [
2166
      (files_all, None),
2167
      (files_mc, lambda node: (node.master_candidate or
2168
                               node.name == master_node)),
2169
      (files_vm, lambda node: node.vm_capable),
2170
      ]
2171

    
2172
    # Build mapping from filename to list of nodes which should have the file
2173
    nodefiles = {}
2174
    for (files, fn) in files2nodefn:
2175
      if fn is None:
2176
        filenodes = nodeinfo
2177
      else:
2178
        filenodes = filter(fn, nodeinfo)
2179
      nodefiles.update((filename,
2180
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2181
                       for filename in files)
2182

    
2183
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2184

    
2185
    fileinfo = dict((filename, {}) for filename in nodefiles)
2186
    ignore_nodes = set()
2187

    
2188
    for node in nodeinfo:
2189
      if node.offline:
2190
        ignore_nodes.add(node.name)
2191
        continue
2192

    
2193
      nresult = all_nvinfo[node.name]
2194

    
2195
      if nresult.fail_msg or not nresult.payload:
2196
        node_files = None
2197
      else:
2198
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2199

    
2200
      test = not (node_files and isinstance(node_files, dict))
2201
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2202
              "Node did not return file checksum data")
2203
      if test:
2204
        ignore_nodes.add(node.name)
2205
        continue
2206

    
2207
      # Build per-checksum mapping from filename to nodes having it
2208
      for (filename, checksum) in node_files.items():
2209
        assert filename in nodefiles
2210
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2211

    
2212
    for (filename, checksums) in fileinfo.items():
2213
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2214

    
2215
      # Nodes having the file
2216
      with_file = frozenset(node_name
2217
                            for nodes in fileinfo[filename].values()
2218
                            for node_name in nodes) - ignore_nodes
2219

    
2220
      expected_nodes = nodefiles[filename] - ignore_nodes
2221

    
2222
      # Nodes missing file
2223
      missing_file = expected_nodes - with_file
2224

    
2225
      if filename in files_opt:
2226
        # All or no nodes
2227
        errorif(missing_file and missing_file != expected_nodes,
2228
                constants.CV_ECLUSTERFILECHECK, None,
2229
                "File %s is optional, but it must exist on all or no"
2230
                " nodes (not found on %s)",
2231
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2232
      else:
2233
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2234
                "File %s is missing from node(s) %s", filename,
2235
                utils.CommaJoin(utils.NiceSort(missing_file)))
2236

    
2237
        # Warn if a node has a file it shouldn't
2238
        unexpected = with_file - expected_nodes
2239
        errorif(unexpected,
2240
                constants.CV_ECLUSTERFILECHECK, None,
2241
                "File %s should not exist on node(s) %s",
2242
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2243

    
2244
      # See if there are multiple versions of the file
2245
      test = len(checksums) > 1
2246
      if test:
2247
        variants = ["variant %s on %s" %
2248
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2249
                    for (idx, (checksum, nodes)) in
2250
                      enumerate(sorted(checksums.items()))]
2251
      else:
2252
        variants = []
2253

    
2254
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2255
              "File %s found with %s different checksums (%s)",
2256
              filename, len(checksums), "; ".join(variants))
2257

    
2258
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2259
                      drbd_map):
2260
    """Verifies and the node DRBD status.
2261

2262
    @type ninfo: L{objects.Node}
2263
    @param ninfo: the node to check
2264
    @param nresult: the remote results for the node
2265
    @param instanceinfo: the dict of instances
2266
    @param drbd_helper: the configured DRBD usermode helper
2267
    @param drbd_map: the DRBD map as returned by
2268
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2269

2270
    """
2271
    node = ninfo.name
2272
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2273

    
2274
    if drbd_helper:
2275
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2276
      test = (helper_result == None)
2277
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2278
               "no drbd usermode helper returned")
2279
      if helper_result:
2280
        status, payload = helper_result
2281
        test = not status
2282
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2283
                 "drbd usermode helper check unsuccessful: %s", payload)
2284
        test = status and (payload != drbd_helper)
2285
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2286
                 "wrong drbd usermode helper: %s", payload)
2287

    
2288
    # compute the DRBD minors
2289
    node_drbd = {}
2290
    for minor, instance in drbd_map[node].items():
2291
      test = instance not in instanceinfo
2292
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2293
               "ghost instance '%s' in temporary DRBD map", instance)
2294
        # ghost instance should not be running, but otherwise we
2295
        # don't give double warnings (both ghost instance and
2296
        # unallocated minor in use)
2297
      if test:
2298
        node_drbd[minor] = (instance, False)
2299
      else:
2300
        instance = instanceinfo[instance]
2301
        node_drbd[minor] = (instance.name,
2302
                            instance.admin_state == constants.ADMINST_UP)
2303

    
2304
    # and now check them
2305
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2306
    test = not isinstance(used_minors, (tuple, list))
2307
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2308
             "cannot parse drbd status file: %s", str(used_minors))
2309
    if test:
2310
      # we cannot check drbd status
2311
      return
2312

    
2313
    for minor, (iname, must_exist) in node_drbd.items():
2314
      test = minor not in used_minors and must_exist
2315
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2316
               "drbd minor %d of instance %s is not active", minor, iname)
2317
    for minor in used_minors:
2318
      test = minor not in node_drbd
2319
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2320
               "unallocated drbd minor %d is in use", minor)
2321

    
2322
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2323
    """Builds the node OS structures.
2324

2325
    @type ninfo: L{objects.Node}
2326
    @param ninfo: the node to check
2327
    @param nresult: the remote results for the node
2328
    @param nimg: the node image object
2329

2330
    """
2331
    node = ninfo.name
2332
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2333

    
2334
    remote_os = nresult.get(constants.NV_OSLIST, None)
2335
    test = (not isinstance(remote_os, list) or
2336
            not compat.all(isinstance(v, list) and len(v) == 7
2337
                           for v in remote_os))
2338

    
2339
    _ErrorIf(test, constants.CV_ENODEOS, node,
2340
             "node hasn't returned valid OS data")
2341

    
2342
    nimg.os_fail = test
2343

    
2344
    if test:
2345
      return
2346

    
2347
    os_dict = {}
2348

    
2349
    for (name, os_path, status, diagnose,
2350
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2351

    
2352
      if name not in os_dict:
2353
        os_dict[name] = []
2354

    
2355
      # parameters is a list of lists instead of list of tuples due to
2356
      # JSON lacking a real tuple type, fix it:
2357
      parameters = [tuple(v) for v in parameters]
2358
      os_dict[name].append((os_path, status, diagnose,
2359
                            set(variants), set(parameters), set(api_ver)))
2360

    
2361
    nimg.oslist = os_dict
2362

    
2363
  def _VerifyNodeOS(self, ninfo, nimg, base):
2364
    """Verifies the node OS list.
2365

2366
    @type ninfo: L{objects.Node}
2367
    @param ninfo: the node to check
2368
    @param nimg: the node image object
2369
    @param base: the 'template' node we match against (e.g. from the master)
2370

2371
    """
2372
    node = ninfo.name
2373
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2374

    
2375
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2376

    
2377
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2378
    for os_name, os_data in nimg.oslist.items():
2379
      assert os_data, "Empty OS status for OS %s?!" % os_name
2380
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2381
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2382
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2383
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2384
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2385
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2386
      # comparisons with the 'base' image
2387
      test = os_name not in base.oslist
2388
      _ErrorIf(test, constants.CV_ENODEOS, node,
2389
               "Extra OS %s not present on reference node (%s)",
2390
               os_name, base.name)
2391
      if test:
2392
        continue
2393
      assert base.oslist[os_name], "Base node has empty OS status?"
2394
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2395
      if not b_status:
2396
        # base OS is invalid, skipping
2397
        continue
2398
      for kind, a, b in [("API version", f_api, b_api),
2399
                         ("variants list", f_var, b_var),
2400
                         ("parameters", beautify_params(f_param),
2401
                          beautify_params(b_param))]:
2402
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2403
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2404
                 kind, os_name, base.name,
2405
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2406

    
2407
    # check any missing OSes
2408
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2409
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2410
             "OSes present on reference node %s but missing on this node: %s",
2411
             base.name, utils.CommaJoin(missing))
2412

    
2413
  def _VerifyOob(self, ninfo, nresult):
2414
    """Verifies out of band functionality of a node.
2415

2416
    @type ninfo: L{objects.Node}
2417
    @param ninfo: the node to check
2418
    @param nresult: the remote results for the node
2419

2420
    """
2421
    node = ninfo.name
2422
    # We just have to verify the paths on master and/or master candidates
2423
    # as the oob helper is invoked on the master
2424
    if ((ninfo.master_candidate or ninfo.master_capable) and
2425
        constants.NV_OOB_PATHS in nresult):
2426
      for path_result in nresult[constants.NV_OOB_PATHS]:
2427
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2428

    
2429
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2430
    """Verifies and updates the node volume data.
2431

2432
    This function will update a L{NodeImage}'s internal structures
2433
    with data from the remote call.
2434

2435
    @type ninfo: L{objects.Node}
2436
    @param ninfo: the node to check
2437
    @param nresult: the remote results for the node
2438
    @param nimg: the node image object
2439
    @param vg_name: the configured VG name
2440

2441
    """
2442
    node = ninfo.name
2443
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2444

    
2445
    nimg.lvm_fail = True
2446
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2447
    if vg_name is None:
2448
      pass
2449
    elif isinstance(lvdata, basestring):
2450
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2451
               utils.SafeEncode(lvdata))
2452
    elif not isinstance(lvdata, dict):
2453
      _ErrorIf(True, constants.CV_ENODELVM, node,
2454
               "rpc call to node failed (lvlist)")
2455
    else:
2456
      nimg.volumes = lvdata
2457
      nimg.lvm_fail = False
2458

    
2459
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2460
    """Verifies and updates the node instance list.
2461

2462
    If the listing was successful, then updates this node's instance
2463
    list. Otherwise, it marks the RPC call as failed for the instance
2464
    list key.
2465

2466
    @type ninfo: L{objects.Node}
2467
    @param ninfo: the node to check
2468
    @param nresult: the remote results for the node
2469
    @param nimg: the node image object
2470

2471
    """
2472
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2473
    test = not isinstance(idata, list)
2474
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2475
                  "rpc call to node failed (instancelist): %s",
2476
                  utils.SafeEncode(str(idata)))
2477
    if test:
2478
      nimg.hyp_fail = True
2479
    else:
2480
      nimg.instances = idata
2481

    
2482
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2483
    """Verifies and computes a node information map
2484

2485
    @type ninfo: L{objects.Node}
2486
    @param ninfo: the node to check
2487
    @param nresult: the remote results for the node
2488
    @param nimg: the node image object
2489
    @param vg_name: the configured VG name
2490

2491
    """
2492
    node = ninfo.name
2493
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2494

    
2495
    # try to read free memory (from the hypervisor)
2496
    hv_info = nresult.get(constants.NV_HVINFO, None)
2497
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2498
    _ErrorIf(test, constants.CV_ENODEHV, node,
2499
             "rpc call to node failed (hvinfo)")
2500
    if not test:
2501
      try:
2502
        nimg.mfree = int(hv_info["memory_free"])
2503
      except (ValueError, TypeError):
2504
        _ErrorIf(True, constants.CV_ENODERPC, node,
2505
                 "node returned invalid nodeinfo, check hypervisor")
2506

    
2507
    # FIXME: devise a free space model for file based instances as well
2508
    if vg_name is not None:
2509
      test = (constants.NV_VGLIST not in nresult or
2510
              vg_name not in nresult[constants.NV_VGLIST])
2511
      _ErrorIf(test, constants.CV_ENODELVM, node,
2512
               "node didn't return data for the volume group '%s'"
2513
               " - it is either missing or broken", vg_name)
2514
      if not test:
2515
        try:
2516
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2517
        except (ValueError, TypeError):
2518
          _ErrorIf(True, constants.CV_ENODERPC, node,
2519
                   "node returned invalid LVM info, check LVM status")
2520

    
2521
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2522
    """Gets per-disk status information for all instances.
2523

2524
    @type nodelist: list of strings
2525
    @param nodelist: Node names
2526
    @type node_image: dict of (name, L{objects.Node})
2527
    @param node_image: Node objects
2528
    @type instanceinfo: dict of (name, L{objects.Instance})
2529
    @param instanceinfo: Instance objects
2530
    @rtype: {instance: {node: [(succes, payload)]}}
2531
    @return: a dictionary of per-instance dictionaries with nodes as
2532
        keys and disk information as values; the disk information is a
2533
        list of tuples (success, payload)
2534

2535
    """
2536
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2537

    
2538
    node_disks = {}
2539
    node_disks_devonly = {}
2540
    diskless_instances = set()
2541
    diskless = constants.DT_DISKLESS
2542

    
2543
    for nname in nodelist:
2544
      node_instances = list(itertools.chain(node_image[nname].pinst,
2545
                                            node_image[nname].sinst))
2546
      diskless_instances.update(inst for inst in node_instances
2547
                                if instanceinfo[inst].disk_template == diskless)
2548
      disks = [(inst, disk)
2549
               for inst in node_instances
2550
               for disk in instanceinfo[inst].disks]
2551

    
2552
      if not disks:
2553
        # No need to collect data
2554
        continue
2555

    
2556
      node_disks[nname] = disks
2557

    
2558
      # Creating copies as SetDiskID below will modify the objects and that can
2559
      # lead to incorrect data returned from nodes
2560
      devonly = [dev.Copy() for (_, dev) in disks]
2561

    
2562
      for dev in devonly:
2563
        self.cfg.SetDiskID(dev, nname)
2564

    
2565
      node_disks_devonly[nname] = devonly
2566

    
2567
    assert len(node_disks) == len(node_disks_devonly)
2568

    
2569
    # Collect data from all nodes with disks
2570
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2571
                                                          node_disks_devonly)
2572

    
2573
    assert len(result) == len(node_disks)
2574

    
2575
    instdisk = {}
2576

    
2577
    for (nname, nres) in result.items():
2578
      disks = node_disks[nname]
2579

    
2580
      if nres.offline:
2581
        # No data from this node
2582
        data = len(disks) * [(False, "node offline")]
2583
      else:
2584
        msg = nres.fail_msg
2585
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2586
                 "while getting disk information: %s", msg)
2587
        if msg:
2588
          # No data from this node
2589
          data = len(disks) * [(False, msg)]
2590
        else:
2591
          data = []
2592
          for idx, i in enumerate(nres.payload):
2593
            if isinstance(i, (tuple, list)) and len(i) == 2:
2594
              data.append(i)
2595
            else:
2596
              logging.warning("Invalid result from node %s, entry %d: %s",
2597
                              nname, idx, i)
2598
              data.append((False, "Invalid result from the remote node"))
2599

    
2600
      for ((inst, _), status) in zip(disks, data):
2601
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2602

    
2603
    # Add empty entries for diskless instances.
2604
    for inst in diskless_instances:
2605
      assert inst not in instdisk
2606
      instdisk[inst] = {}
2607

    
2608
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2609
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2610
                      compat.all(isinstance(s, (tuple, list)) and
2611
                                 len(s) == 2 for s in statuses)
2612
                      for inst, nnames in instdisk.items()
2613
                      for nname, statuses in nnames.items())
2614
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2615

    
2616
    return instdisk
2617

    
2618
  @staticmethod
2619
  def _SshNodeSelector(group_uuid, all_nodes):
2620
    """Create endless iterators for all potential SSH check hosts.
2621

2622
    """
2623
    nodes = [node for node in all_nodes
2624
             if (node.group != group_uuid and
2625
                 not node.offline)]
2626
    keyfunc = operator.attrgetter("group")
2627

    
2628
    return map(itertools.cycle,
2629
               [sorted(map(operator.attrgetter("name"), names))
2630
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2631
                                                  keyfunc)])
2632

    
2633
  @classmethod
2634
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2635
    """Choose which nodes should talk to which other nodes.
2636

2637
    We will make nodes contact all nodes in their group, and one node from
2638
    every other group.
2639

2640
    @warning: This algorithm has a known issue if one node group is much
2641
      smaller than others (e.g. just one node). In such a case all other
2642
      nodes will talk to the single node.
2643

2644
    """
2645
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2646
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2647

    
2648
    return (online_nodes,
2649
            dict((name, sorted([i.next() for i in sel]))
2650
                 for name in online_nodes))
2651

    
2652
  def BuildHooksEnv(self):
2653
    """Build hooks env.
2654

2655
    Cluster-Verify hooks just ran in the post phase and their failure makes
2656
    the output be logged in the verify output and the verification to fail.
2657

2658
    """
2659
    env = {
2660
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2661
      }
2662

    
2663
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2664
               for node in self.my_node_info.values())
2665

    
2666
    return env
2667

    
2668
  def BuildHooksNodes(self):
2669
    """Build hooks nodes.
2670

2671
    """
2672
    return ([], self.my_node_names)
2673

    
2674
  def Exec(self, feedback_fn):
2675
    """Verify integrity of the node group, performing various test on nodes.
2676

2677
    """
2678
    # This method has too many local variables. pylint: disable=R0914
2679
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2680

    
2681
    if not self.my_node_names:
2682
      # empty node group
2683
      feedback_fn("* Empty node group, skipping verification")
2684
      return True
2685

    
2686
    self.bad = False
2687
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2688
    verbose = self.op.verbose
2689
    self._feedback_fn = feedback_fn
2690

    
2691
    vg_name = self.cfg.GetVGName()
2692
    drbd_helper = self.cfg.GetDRBDHelper()
2693
    cluster = self.cfg.GetClusterInfo()
2694
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2695
    hypervisors = cluster.enabled_hypervisors
2696
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2697

    
2698
    i_non_redundant = [] # Non redundant instances
2699
    i_non_a_balanced = [] # Non auto-balanced instances
2700
    i_offline = 0 # Count of offline instances
2701
    n_offline = 0 # Count of offline nodes
2702
    n_drained = 0 # Count of nodes being drained
2703
    node_vol_should = {}
2704

    
2705
    # FIXME: verify OS list
2706

    
2707
    # File verification
2708
    filemap = _ComputeAncillaryFiles(cluster, False)
2709

    
2710
    # do local checksums
2711
    master_node = self.master_node = self.cfg.GetMasterNode()
2712
    master_ip = self.cfg.GetMasterIP()
2713

    
2714
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2715

    
2716
    user_scripts = []
2717
    if self.cfg.GetUseExternalMipScript():
2718
      user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2719

    
2720
    node_verify_param = {
2721
      constants.NV_FILELIST:
2722
        utils.UniqueSequence(filename
2723
                             for files in filemap
2724
                             for filename in files),
2725
      constants.NV_NODELIST:
2726
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2727
                                  self.all_node_info.values()),
2728
      constants.NV_HYPERVISOR: hypervisors,
2729
      constants.NV_HVPARAMS:
2730
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2731
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2732
                                 for node in node_data_list
2733
                                 if not node.offline],
2734
      constants.NV_INSTANCELIST: hypervisors,
2735
      constants.NV_VERSION: None,
2736
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2737
      constants.NV_NODESETUP: None,
2738
      constants.NV_TIME: None,
2739
      constants.NV_MASTERIP: (master_node, master_ip),
2740
      constants.NV_OSLIST: None,
2741
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2742
      constants.NV_USERSCRIPTS: user_scripts,
2743
      }
2744

    
2745
    if vg_name is not None:
2746
      node_verify_param[constants.NV_VGLIST] = None
2747
      node_verify_param[constants.NV_LVLIST] = vg_name
2748
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2749
      node_verify_param[constants.NV_DRBDLIST] = None
2750

    
2751
    if drbd_helper:
2752
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2753

    
2754
    # bridge checks
2755
    # FIXME: this needs to be changed per node-group, not cluster-wide
2756
    bridges = set()
2757
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2758
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2759
      bridges.add(default_nicpp[constants.NIC_LINK])
2760
    for instance in self.my_inst_info.values():
2761
      for nic in instance.nics:
2762
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2763
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2764
          bridges.add(full_nic[constants.NIC_LINK])
2765

    
2766
    if bridges:
2767
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2768

    
2769
    # Build our expected cluster state
2770
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2771
                                                 name=node.name,
2772
                                                 vm_capable=node.vm_capable))
2773
                      for node in node_data_list)
2774

    
2775
    # Gather OOB paths
2776
    oob_paths = []
2777
    for node in self.all_node_info.values():
2778
      path = _SupportsOob(self.cfg, node)
2779
      if path and path not in oob_paths:
2780
        oob_paths.append(path)
2781

    
2782
    if oob_paths:
2783
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2784

    
2785
    for instance in self.my_inst_names:
2786
      inst_config = self.my_inst_info[instance]
2787

    
2788
      for nname in inst_config.all_nodes:
2789
        if nname not in node_image:
2790
          gnode = self.NodeImage(name=nname)
2791
          gnode.ghost = (nname not in self.all_node_info)
2792
          node_image[nname] = gnode
2793

    
2794
      inst_config.MapLVsByNode(node_vol_should)
2795

    
2796
      pnode = inst_config.primary_node
2797
      node_image[pnode].pinst.append(instance)
2798

    
2799
      for snode in inst_config.secondary_nodes:
2800
        nimg = node_image[snode]
2801
        nimg.sinst.append(instance)
2802
        if pnode not in nimg.sbp:
2803
          nimg.sbp[pnode] = []
2804
        nimg.sbp[pnode].append(instance)
2805

    
2806
    # At this point, we have the in-memory data structures complete,
2807
    # except for the runtime information, which we'll gather next
2808

    
2809
    # Due to the way our RPC system works, exact response times cannot be
2810
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2811
    # time before and after executing the request, we can at least have a time
2812
    # window.
2813
    nvinfo_starttime = time.time()
2814
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2815
                                           node_verify_param,
2816
                                           self.cfg.GetClusterName())
2817
    nvinfo_endtime = time.time()
2818

    
2819
    if self.extra_lv_nodes and vg_name is not None:
2820
      extra_lv_nvinfo = \
2821
          self.rpc.call_node_verify(self.extra_lv_nodes,
2822
                                    {constants.NV_LVLIST: vg_name},
2823
                                    self.cfg.GetClusterName())
2824
    else:
2825
      extra_lv_nvinfo = {}
2826

    
2827
    all_drbd_map = self.cfg.ComputeDRBDMap()
2828

    
2829
    feedback_fn("* Gathering disk information (%s nodes)" %
2830
                len(self.my_node_names))
2831
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2832
                                     self.my_inst_info)
2833

    
2834
    feedback_fn("* Verifying configuration file consistency")
2835

    
2836
    # If not all nodes are being checked, we need to make sure the master node
2837
    # and a non-checked vm_capable node are in the list.
2838
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2839
    if absent_nodes:
2840
      vf_nvinfo = all_nvinfo.copy()
2841
      vf_node_info = list(self.my_node_info.values())
2842
      additional_nodes = []
2843
      if master_node not in self.my_node_info:
2844
        additional_nodes.append(master_node)
2845
        vf_node_info.append(self.all_node_info[master_node])
2846
      # Add the first vm_capable node we find which is not included
2847
      for node in absent_nodes:
2848
        nodeinfo = self.all_node_info[node]
2849
        if nodeinfo.vm_capable and not nodeinfo.offline:
2850
          additional_nodes.append(node)
2851
          vf_node_info.append(self.all_node_info[node])
2852
          break
2853
      key = constants.NV_FILELIST
2854
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2855
                                                 {key: node_verify_param[key]},
2856
                                                 self.cfg.GetClusterName()))
2857
    else:
2858
      vf_nvinfo = all_nvinfo
2859
      vf_node_info = self.my_node_info.values()
2860

    
2861
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2862

    
2863
    feedback_fn("* Verifying node status")
2864

    
2865
    refos_img = None
2866

    
2867
    for node_i in node_data_list:
2868
      node = node_i.name
2869
      nimg = node_image[node]
2870

    
2871
      if node_i.offline:
2872
        if verbose:
2873
          feedback_fn("* Skipping offline node %s" % (node,))
2874
        n_offline += 1
2875
        continue
2876

    
2877
      if node == master_node:
2878
        ntype = "master"
2879
      elif node_i.master_candidate:
2880
        ntype = "master candidate"
2881
      elif node_i.drained:
2882
        ntype = "drained"
2883
        n_drained += 1
2884
      else:
2885
        ntype = "regular"
2886
      if verbose:
2887
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2888

    
2889
      msg = all_nvinfo[node].fail_msg
2890
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2891
               msg)
2892
      if msg:
2893
        nimg.rpc_fail = True
2894
        continue
2895

    
2896
      nresult = all_nvinfo[node].payload
2897

    
2898
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2899
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2900
      self._VerifyNodeNetwork(node_i, nresult)
2901
      self._VerifyNodeUserScripts(node_i, nresult)
2902
      self._VerifyOob(node_i, nresult)
2903

    
2904
      if nimg.vm_capable:
2905
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2906
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2907
                             all_drbd_map)
2908

    
2909
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2910
        self._UpdateNodeInstances(node_i, nresult, nimg)
2911
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2912
        self._UpdateNodeOS(node_i, nresult, nimg)
2913

    
2914
        if not nimg.os_fail:
2915
          if refos_img is None:
2916
            refos_img = nimg
2917
          self._VerifyNodeOS(node_i, nimg, refos_img)
2918
        self._VerifyNodeBridges(node_i, nresult, bridges)
2919

    
2920
        # Check whether all running instancies are primary for the node. (This
2921
        # can no longer be done from _VerifyInstance below, since some of the
2922
        # wrong instances could be from other node groups.)
2923
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2924

    
2925
        for inst in non_primary_inst:
2926
          # FIXME: investigate best way to handle offline insts
2927
          if inst.admin_state == constants.ADMINST_OFFLINE:
2928
            if verbose:
2929
              feedback_fn("* Skipping offline instance %s" % inst.name)
2930
            i_offline += 1
2931
            continue
2932
          test = inst in self.all_inst_info
2933
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2934
                   "instance should not run on node %s", node_i.name)
2935
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2936
                   "node is running unknown instance %s", inst)
2937

    
2938
    for node, result in extra_lv_nvinfo.items():
2939
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2940
                              node_image[node], vg_name)
2941

    
2942
    feedback_fn("* Verifying instance status")
2943
    for instance in self.my_inst_names:
2944
      if verbose:
2945
        feedback_fn("* Verifying instance %s" % instance)
2946
      inst_config = self.my_inst_info[instance]
2947
      self._VerifyInstance(instance, inst_config, node_image,
2948
                           instdisk[instance])
2949
      inst_nodes_offline = []
2950

    
2951
      pnode = inst_config.primary_node
2952
      pnode_img = node_image[pnode]
2953
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2954
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2955
               " primary node failed", instance)
2956

    
2957
      _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2958
               pnode_img.offline,
2959
               constants.CV_EINSTANCEBADNODE, instance,
2960
               "instance is marked as running and lives on offline node %s",
2961
               inst_config.primary_node)
2962

    
2963
      # If the instance is non-redundant we cannot survive losing its primary
2964
      # node, so we are not N+1 compliant. On the other hand we have no disk
2965
      # templates with more than one secondary so that situation is not well
2966
      # supported either.
2967
      # FIXME: does not support file-backed instances
2968
      if not inst_config.secondary_nodes:
2969
        i_non_redundant.append(instance)
2970

    
2971
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2972
               constants.CV_EINSTANCELAYOUT,
2973
               instance, "instance has multiple secondary nodes: %s",
2974
               utils.CommaJoin(inst_config.secondary_nodes),
2975
               code=self.ETYPE_WARNING)
2976

    
2977
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2978
        pnode = inst_config.primary_node
2979
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2980
        instance_groups = {}
2981

    
2982
        for node in instance_nodes:
2983
          instance_groups.setdefault(self.all_node_info[node].group,
2984
                                     []).append(node)
2985

    
2986
        pretty_list = [
2987
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2988
          # Sort so that we always list the primary node first.
2989
          for group, nodes in sorted(instance_groups.items(),
2990
                                     key=lambda (_, nodes): pnode in nodes,
2991
                                     reverse=True)]
2992

    
2993
        self._ErrorIf(len(instance_groups) > 1,
2994
                      constants.CV_EINSTANCESPLITGROUPS,
2995
                      instance, "instance has primary and secondary nodes in"
2996
                      " different groups: %s", utils.CommaJoin(pretty_list),
2997
                      code=self.ETYPE_WARNING)
2998

    
2999
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
3000
        i_non_a_balanced.append(instance)
3001

    
3002
      for snode in inst_config.secondary_nodes:
3003
        s_img = node_image[snode]
3004
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
3005
                 snode, "instance %s, connection to secondary node failed",
3006
                 instance)
3007

    
3008
        if s_img.offline:
3009
          inst_nodes_offline.append(snode)
3010

    
3011
      # warn that the instance lives on offline nodes
3012
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
3013
               "instance has offline secondary node(s) %s",
3014
               utils.CommaJoin(inst_nodes_offline))
3015
      # ... or ghost/non-vm_capable nodes
3016
      for node in inst_config.all_nodes:
3017
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3018
                 instance, "instance lives on ghost node %s", node)
3019
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3020
                 instance, "instance lives on non-vm_capable node %s", node)
3021

    
3022
    feedback_fn("* Verifying orphan volumes")
3023
    reserved = utils.FieldSet(*cluster.reserved_lvs)
3024

    
3025
    # We will get spurious "unknown volume" warnings if any node of this group
3026
    # is secondary for an instance whose primary is in another group. To avoid
3027
    # them, we find these instances and add their volumes to node_vol_should.
3028
    for inst in self.all_inst_info.values():
3029
      for secondary in inst.secondary_nodes:
3030
        if (secondary in self.my_node_info
3031
            and inst.name not in self.my_inst_info):
3032
          inst.MapLVsByNode(node_vol_should)
3033
          break
3034

    
3035
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3036

    
3037
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3038
      feedback_fn("* Verifying N+1 Memory redundancy")
3039
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3040

    
3041
    feedback_fn("* Other Notes")
3042
    if i_non_redundant:
3043
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3044
                  % len(i_non_redundant))
3045

    
3046
    if i_non_a_balanced:
3047
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3048
                  % len(i_non_a_balanced))
3049

    
3050
    if i_offline:
3051
      feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3052

    
3053
    if n_offline:
3054
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3055

    
3056
    if n_drained:
3057
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3058

    
3059
    return not self.bad
3060

    
3061
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3062
    """Analyze the post-hooks' result
3063

3064
    This method analyses the hook result, handles it, and sends some
3065
    nicely-formatted feedback back to the user.
3066

3067
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
3068
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3069
    @param hooks_results: the results of the multi-node hooks rpc call
3070
    @param feedback_fn: function used send feedback back to the caller
3071
    @param lu_result: previous Exec result
3072
    @return: the new Exec result, based on the previous result
3073
        and hook results
3074

3075
    """
3076
    # We only really run POST phase hooks, only for non-empty groups,
3077
    # and are only interested in their results
3078
    if not self.my_node_names:
3079
      # empty node group
3080
      pass
3081
    elif phase == constants.HOOKS_PHASE_POST:
3082
      # Used to change hooks' output to proper indentation
3083
      feedback_fn("* Hooks Results")
3084
      assert hooks_results, "invalid result from hooks"
3085

    
3086
      for node_name in hooks_results:
3087
        res = hooks_results[node_name]
3088
        msg = res.fail_msg
3089
        test = msg and not res.offline
3090
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3091
                      "Communication failure in hooks execution: %s", msg)
3092
        if res.offline or msg:
3093
          # No need to investigate payload if node is offline or gave
3094
          # an error.
3095
          continue
3096
        for script, hkr, output in res.payload:
3097
          test = hkr == constants.HKR_FAIL
3098
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3099
                        "Script %s failed, output:", script)
3100
          if test:
3101
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3102
            feedback_fn("%s" % output)
3103
            lu_result = False
3104

    
3105
    return lu_result
3106

    
3107

    
3108
class LUClusterVerifyDisks(NoHooksLU):
3109
  """Verifies the cluster disks status.
3110

3111
  """
3112
  REQ_BGL = False
3113

    
3114
  def ExpandNames(self):
3115
    self.share_locks = _ShareAll()
3116
    self.needed_locks = {
3117
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3118
      }
3119

    
3120
  def Exec(self, feedback_fn):
3121
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3122

    
3123
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3124
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3125
                           for group in group_names])
3126

    
3127

    
3128
class LUGroupVerifyDisks(NoHooksLU):
3129
  """Verifies the status of all disks in a node group.
3130

3131
  """
3132
  REQ_BGL = False
3133

    
3134
  def ExpandNames(self):
3135
    # Raises errors.OpPrereqError on its own if group can't be found
3136
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3137

    
3138
    self.share_locks = _ShareAll()
3139
    self.needed_locks = {
3140
      locking.LEVEL_INSTANCE: [],
3141
      locking.LEVEL_NODEGROUP: [],
3142
      locking.LEVEL_NODE: [],
3143
      }
3144

    
3145
  def DeclareLocks(self, level):
3146
    if level == locking.LEVEL_INSTANCE:
3147
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3148

    
3149
      # Lock instances optimistically, needs verification once node and group
3150
      # locks have been acquired
3151
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3152
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3153

    
3154
    elif level == locking.LEVEL_NODEGROUP:
3155
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3156

    
3157
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3158
        set([self.group_uuid] +
3159
            # Lock all groups used by instances optimistically; this requires
3160
            # going via the node before it's locked, requiring verification
3161
            # later on
3162
            [group_uuid
3163
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3164
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3165

    
3166
    elif level == locking.LEVEL_NODE:
3167
      # This will only lock the nodes in the group to be verified which contain
3168
      # actual instances
3169
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3170
      self._LockInstancesNodes()
3171

    
3172
      # Lock all nodes in group to be verified
3173
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3174
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3175
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3176

    
3177
  def CheckPrereq(self):
3178
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3179
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3180
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3181

    
3182
    assert self.group_uuid in owned_groups
3183

    
3184
    # Check if locked instances are still correct
3185
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3186

    
3187
    # Get instance information
3188
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3189

    
3190
    # Check if node groups for locked instances are still correct
3191
    for (instance_name, inst) in self.instances.items():
3192
      assert owned_nodes.issuperset(inst.all_nodes), \
3193
        "Instance %s's nodes changed while we kept the lock" % instance_name
3194

    
3195
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3196
                                             owned_groups)
3197

    
3198
      assert self.group_uuid in inst_groups, \
3199
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3200

    
3201
  def Exec(self, feedback_fn):
3202
    """Verify integrity of cluster disks.
3203

3204
    @rtype: tuple of three items
3205
    @return: a tuple of (dict of node-to-node_error, list of instances
3206
        which need activate-disks, dict of instance: (node, volume) for
3207
        missing volumes
3208

3209
    """
3210
    res_nodes = {}
3211
    res_instances = set()
3212
    res_missing = {}
3213

    
3214
    nv_dict = _MapInstanceDisksToNodes([inst
3215
            for inst in self.instances.values()
3216
            if inst.admin_state == constants.ADMINST_UP])
3217

    
3218
    if nv_dict:
3219
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3220
                             set(self.cfg.GetVmCapableNodeList()))
3221

    
3222
      node_lvs = self.rpc.call_lv_list(nodes, [])
3223

    
3224
      for (node, node_res) in node_lvs.items():
3225
        if node_res.offline:
3226
          continue
3227

    
3228
        msg = node_res.fail_msg
3229
        if msg:
3230
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3231
          res_nodes[node] = msg
3232
          continue
3233

    
3234
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3235
          inst = nv_dict.pop((node, lv_name), None)
3236
          if not (lv_online or inst is None):
3237
            res_instances.add(inst)
3238

    
3239
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3240
      # better
3241
      for key, inst in nv_dict.iteritems():
3242
        res_missing.setdefault(inst, []).append(list(key))
3243

    
3244
    return (res_nodes, list(res_instances), res_missing)
3245

    
3246

    
3247
class LUClusterRepairDiskSizes(NoHooksLU):
3248
  """Verifies the cluster disks sizes.
3249

3250
  """
3251
  REQ_BGL = False
3252

    
3253
  def ExpandNames(self):
3254
    if self.op.instances:
3255
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3256
      self.needed_locks = {
3257
        locking.LEVEL_NODE_RES: [],
3258
        locking.LEVEL_INSTANCE: self.wanted_names,
3259
        }
3260
      self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3261
    else:
3262
      self.wanted_names = None
3263
      self.needed_locks = {
3264
        locking.LEVEL_NODE_RES: locking.ALL_SET,
3265
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3266
        }
3267
    self.share_locks = {
3268
      locking.LEVEL_NODE_RES: 1,
3269
      locking.LEVEL_INSTANCE: 0,
3270
      }
3271

    
3272
  def DeclareLocks(self, level):
3273
    if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3274
      self._LockInstancesNodes(primary_only=True, level=level)
3275

    
3276
  def CheckPrereq(self):
3277
    """Check prerequisites.
3278

3279
    This only checks the optional instance list against the existing names.
3280

3281
    """
3282
    if self.wanted_names is None:
3283
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3284

    
3285
    self.wanted_instances = \
3286
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3287

    
3288
  def _EnsureChildSizes(self, disk):
3289
    """Ensure children of the disk have the needed disk size.
3290

3291
    This is valid mainly for DRBD8 and fixes an issue where the
3292
    children have smaller disk size.
3293

3294
    @param disk: an L{ganeti.objects.Disk} object
3295

3296
    """
3297
    if disk.dev_type == constants.LD_DRBD8:
3298
      assert disk.children, "Empty children for DRBD8?"
3299
      fchild = disk.children[0]
3300
      mismatch = fchild.size < disk.size
3301
      if mismatch:
3302
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3303
                     fchild.size, disk.size)
3304
        fchild.size = disk.size
3305

    
3306
      # and we recurse on this child only, not on the metadev
3307
      return self._EnsureChildSizes(fchild) or mismatch
3308
    else:
3309
      return False
3310

    
3311
  def Exec(self, feedback_fn):
3312
    """Verify the size of cluster disks.
3313

3314
    """
3315
    # TODO: check child disks too
3316
    # TODO: check differences in size between primary/secondary nodes
3317
    per_node_disks = {}
3318
    for instance in self.wanted_instances:
3319
      pnode = instance.primary_node
3320
      if pnode not in per_node_disks:
3321
        per_node_disks[pnode] = []
3322
      for idx, disk in enumerate(instance.disks):
3323
        per_node_disks[pnode].append((instance, idx, disk))
3324

    
3325
    assert not (frozenset(per_node_disks.keys()) -
3326
                self.owned_locks(locking.LEVEL_NODE_RES)), \
3327
      "Not owning correct locks"
3328
    assert not self.owned_locks(locking.LEVEL_NODE)
3329

    
3330
    changed = []
3331
    for node, dskl in per_node_disks.items():
3332
      newl = [v[2].Copy() for v in dskl]
3333
      for dsk in newl:
3334
        self.cfg.SetDiskID(dsk, node)
3335
      result = self.rpc.call_blockdev_getsize(node, newl)
3336
      if result.fail_msg:
3337
        self.LogWarning("Failure in blockdev_getsize call to node"
3338
                        " %s, ignoring", node)
3339
        continue
3340
      if len(result.payload) != len(dskl):
3341
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3342
                        " result.payload=%s", node, len(dskl), result.payload)
3343
        self.LogWarning("Invalid result from node %s, ignoring node results",
3344
                        node)
3345
        continue
3346
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3347
        if size is None:
3348
          self.LogWarning("Disk %d of instance %s did not return size"
3349
                          " information, ignoring", idx, instance.name)
3350
          continue
3351
        if not isinstance(size, (int, long)):
3352
          self.LogWarning("Disk %d of instance %s did not return valid"
3353
                          " size information, ignoring", idx, instance.name)
3354
          continue
3355
        size = size >> 20
3356
        if size != disk.size:
3357
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3358
                       " correcting: recorded %d, actual %d", idx,
3359
                       instance.name, disk.size, size)
3360
          disk.size = size
3361
          self.cfg.Update(instance, feedback_fn)
3362
          changed.append((instance.name, idx, size))
3363
        if self._EnsureChildSizes(disk):
3364
          self.cfg.Update(instance, feedback_fn)
3365
          changed.append((instance.name, idx, disk.size))
3366
    return changed
3367

    
3368

    
3369
class LUClusterRename(LogicalUnit):
3370
  """Rename the cluster.
3371

3372
  """
3373
  HPATH = "cluster-rename"
3374
  HTYPE = constants.HTYPE_CLUSTER
3375

    
3376
  def BuildHooksEnv(self):
3377
    """Build hooks env.
3378

3379
    """
3380
    return {
3381
      "OP_TARGET": self.cfg.GetClusterName(),
3382
      "NEW_NAME": self.op.name,
3383
      }
3384

    
3385
  def BuildHooksNodes(self):
3386
    """Build hooks nodes.
3387

3388
    """
3389
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3390

    
3391
  def CheckPrereq(self):
3392
    """Verify that the passed name is a valid one.
3393

3394
    """
3395
    hostname = netutils.GetHostname(name=self.op.name,
3396
                                    family=self.cfg.GetPrimaryIPFamily())
3397

    
3398
    new_name = hostname.name
3399
    self.ip = new_ip = hostname.ip
3400
    old_name = self.cfg.GetClusterName()
3401
    old_ip = self.cfg.GetMasterIP()
3402
    if new_name == old_name and new_ip == old_ip:
3403
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3404
                                 " cluster has changed",
3405
                                 errors.ECODE_INVAL)
3406
    if new_ip != old_ip:
3407
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3408
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3409
                                   " reachable on the network" %
3410
                                   new_ip, errors.ECODE_NOTUNIQUE)
3411

    
3412
    self.op.name = new_name
3413

    
3414
  def Exec(self, feedback_fn):
3415
    """Rename the cluster.
3416

3417
    """
3418
    clustername = self.op.name
3419
    new_ip = self.ip
3420

    
3421
    # shutdown the master IP
3422
    master_params = self.cfg.GetMasterNetworkParameters()
3423
    ems = self.cfg.GetUseExternalMipScript()
3424
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3425
                                                     master_params, ems)
3426
    result.Raise("Could not disable the master role")
3427

    
3428
    try:
3429
      cluster = self.cfg.GetClusterInfo()
3430
      cluster.cluster_name = clustername
3431
      cluster.master_ip = new_ip
3432
      self.cfg.Update(cluster, feedback_fn)
3433

    
3434
      # update the known hosts file
3435
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3436
      node_list = self.cfg.GetOnlineNodeList()
3437
      try:
3438
        node_list.remove(master_params.name)
3439
      except ValueError:
3440
        pass
3441
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3442
    finally:
3443
      master_params.ip = new_ip
3444
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3445
                                                     master_params, ems)
3446
      msg = result.fail_msg
3447
      if msg:
3448
        self.LogWarning("Could not re-enable the master role on"
3449
                        " the master, please restart manually: %s", msg)
3450

    
3451
    return clustername
3452

    
3453

    
3454
def _ValidateNetmask(cfg, netmask):
3455
  """Checks if a netmask is valid.
3456

3457
  @type cfg: L{config.ConfigWriter}
3458
  @param cfg: The cluster configuration
3459
  @type netmask: int
3460
  @param netmask: the netmask to be verified
3461
  @raise errors.OpPrereqError: if the validation fails
3462

3463
  """
3464
  ip_family = cfg.GetPrimaryIPFamily()
3465
  try:
3466
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3467
  except errors.ProgrammerError:
3468
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3469
                               ip_family)
3470
  if not ipcls.ValidateNetmask(netmask):
3471
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3472
                                (netmask))
3473

    
3474

    
3475
class LUClusterSetParams(LogicalUnit):
3476
  """Change the parameters of the cluster.
3477

3478
  """
3479
  HPATH = "cluster-modify"
3480
  HTYPE = constants.HTYPE_CLUSTER
3481
  REQ_BGL = False
3482

    
3483
  def CheckArguments(self):
3484
    """Check parameters
3485

3486
    """
3487
    if self.op.uid_pool:
3488
      uidpool.CheckUidPool(self.op.uid_pool)
3489

    
3490
    if self.op.add_uids:
3491
      uidpool.CheckUidPool(self.op.add_uids)
3492

    
3493
    if self.op.remove_uids:
3494
      uidpool.CheckUidPool(self.op.remove_uids)
3495

    
3496
    if self.op.master_netmask is not None:
3497
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3498

    
3499
    if self.op.diskparams:
3500
      for dt_params in self.op.diskparams.values():
3501
        utils.ForceDictType(dt_params, constants.DISK_DT_TYPES)
3502

    
3503
  def ExpandNames(self):
3504
    # FIXME: in the future maybe other cluster params won't require checking on
3505
    # all nodes to be modified.
3506
    self.needed_locks = {
3507
      locking.LEVEL_NODE: locking.ALL_SET,
3508
    }
3509
    self.share_locks[locking.LEVEL_NODE] = 1
3510

    
3511
  def BuildHooksEnv(self):
3512
    """Build hooks env.
3513

3514
    """
3515
    return {
3516
      "OP_TARGET": self.cfg.GetClusterName(),
3517
      "NEW_VG_NAME": self.op.vg_name,
3518
      }
3519

    
3520
  def BuildHooksNodes(self):
3521
    """Build hooks nodes.
3522

3523
    """
3524
    mn = self.cfg.GetMasterNode()
3525
    return ([mn], [mn])
3526

    
3527
  def CheckPrereq(self):
3528
    """Check prerequisites.
3529

3530
    This checks whether the given params don't conflict and
3531
    if the given volume group is valid.
3532

3533
    """
3534
    if self.op.vg_name is not None and not self.op.vg_name:
3535
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3536
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3537
                                   " instances exist", errors.ECODE_INVAL)
3538

    
3539
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3540
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3541
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3542
                                   " drbd-based instances exist",
3543
                                   errors.ECODE_INVAL)
3544

    
3545
    node_list = self.owned_locks(locking.LEVEL_NODE)
3546

    
3547
    # if vg_name not None, checks given volume group on all nodes
3548
    if self.op.vg_name:
3549
      vglist = self.rpc.call_vg_list(node_list)
3550
      for node in node_list:
3551
        msg = vglist[node].fail_msg
3552
        if msg:
3553
          # ignoring down node
3554
          self.LogWarning("Error while gathering data on node %s"
3555
                          " (ignoring node): %s", node, msg)
3556
          continue
3557
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3558
                                              self.op.vg_name,
3559
                                              constants.MIN_VG_SIZE)
3560
        if vgstatus:
3561
          raise errors.OpPrereqError("Error on node '%s': %s" %
3562
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3563

    
3564
    if self.op.drbd_helper:
3565
      # checks given drbd helper on all nodes
3566
      helpers = self.rpc.call_drbd_helper(node_list)
3567
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3568
        if ninfo.offline:
3569
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3570
          continue
3571
        msg = helpers[node].fail_msg
3572
        if msg:
3573
          raise errors.OpPrereqError("Error checking drbd helper on node"
3574
                                     " '%s': %s" % (node, msg),
3575
                                     errors.ECODE_ENVIRON)
3576
        node_helper = helpers[node].payload
3577
        if node_helper != self.op.drbd_helper:
3578
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3579
                                     (node, node_helper), errors.ECODE_ENVIRON)
3580

    
3581
    self.cluster = cluster = self.cfg.GetClusterInfo()
3582
    # validate params changes
3583
    if self.op.beparams:
3584
      objects.UpgradeBeParams(self.op.beparams)
3585
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3586
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3587

    
3588
    if self.op.ndparams:
3589
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3590
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3591

    
3592
      # TODO: we need a more general way to handle resetting
3593
      # cluster-level parameters to default values
3594
      if self.new_ndparams["oob_program"] == "":
3595
        self.new_ndparams["oob_program"] = \
3596
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3597

    
3598
    if self.op.nicparams:
3599
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3600
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3601
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3602
      nic_errors = []
3603

    
3604
      # check all instances for consistency
3605
      for instance in self.cfg.GetAllInstancesInfo().values():
3606
        for nic_idx, nic in enumerate(instance.nics):
3607
          params_copy = copy.deepcopy(nic.nicparams)
3608
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3609

    
3610
          # check parameter syntax
3611
          try:
3612
            objects.NIC.CheckParameterSyntax(params_filled)
3613
          except errors.ConfigurationError, err:
3614
            nic_errors.append("Instance %s, nic/%d: %s" %
3615
                              (instance.name, nic_idx, err))
3616

    
3617
          # if we're moving instances to routed, check that they have an ip
3618
          target_mode = params_filled[constants.NIC_MODE]
3619
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3620
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3621
                              " address" % (instance.name, nic_idx))
3622
      if nic_errors:
3623
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3624
                                   "\n".join(nic_errors))
3625

    
3626
    # hypervisor list/parameters
3627
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3628
    if self.op.hvparams:
3629
      for hv_name, hv_dict in self.op.hvparams.items():
3630
        if hv_name not in self.new_hvparams:
3631
          self.new_hvparams[hv_name] = hv_dict
3632
        else:
3633
          self.new_hvparams[hv_name].update(hv_dict)
3634

    
3635
    # disk template parameters
3636
    self.new_diskparams = objects.FillDict(cluster.diskparams, {})
3637
    if self.op.diskparams:
3638
      for dt_name, dt_params in self.op.diskparams.items():
3639
        if dt_name not in self.op.diskparams:
3640
          self.new_diskparams[dt_name] = dt_params
3641
        else:
3642
          self.new_diskparams[dt_name].update(dt_params)
3643

    
3644
    # os hypervisor parameters
3645
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3646
    if self.op.os_hvp:
3647
      for os_name, hvs in self.op.os_hvp.items():
3648
        if os_name not in self.new_os_hvp:
3649
          self.new_os_hvp[os_name] = hvs
3650
        else:
3651
          for hv_name, hv_dict in hvs.items():
3652
            if hv_name not in self.new_os_hvp[os_name]:
3653
              self.new_os_hvp[os_name][hv_name] = hv_dict
3654
            else:
3655
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3656

    
3657
    # os parameters
3658
    self.new_osp = objects.FillDict(cluster.osparams, {})
3659
    if self.op.osparams:
3660
      for os_name, osp in self.op.osparams.items():
3661
        if os_name not in self.new_osp:
3662
          self.new_osp[os_name] = {}
3663

    
3664
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3665
                                                  use_none=True)
3666

    
3667
        if not self.new_osp[os_name]:
3668
          # we removed all parameters
3669
          del self.new_osp[os_name]
3670
        else:
3671
          # check the parameter validity (remote check)
3672
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3673
                         os_name, self.new_osp[os_name])
3674

    
3675
    # changes to the hypervisor list
3676
    if self.op.enabled_hypervisors is not None:
3677
      self.hv_list = self.op.enabled_hypervisors
3678
      for hv in self.hv_list:
3679
        # if the hypervisor doesn't already exist in the cluster
3680
        # hvparams, we initialize it to empty, and then (in both
3681
        # cases) we make sure to fill the defaults, as we might not
3682
        # have a complete defaults list if the hypervisor wasn't
3683
        # enabled before
3684
        if hv not in new_hvp:
3685
          new_hvp[hv] = {}
3686
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3687
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3688
    else:
3689
      self.hv_list = cluster.enabled_hypervisors
3690

    
3691
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3692
      # either the enabled list has changed, or the parameters have, validate
3693
      for hv_name, hv_params in self.new_hvparams.items():
3694
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3695
            (self.op.enabled_hypervisors and
3696
             hv_name in self.op.enabled_hypervisors)):
3697
          # either this is a new hypervisor, or its parameters have changed
3698
          hv_class = hypervisor.GetHypervisor(hv_name)
3699
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3700
          hv_class.CheckParameterSyntax(hv_params)
3701
          _CheckHVParams(self, node_list, hv_name, hv_params)
3702

    
3703
    if self.op.os_hvp:
3704
      # no need to check any newly-enabled hypervisors, since the
3705
      # defaults have already been checked in the above code-block
3706
      for os_name, os_hvp in self.new_os_hvp.items():
3707
        for hv_name, hv_params in os_hvp.items():
3708
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3709
          # we need to fill in the new os_hvp on top of the actual hv_p
3710
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3711
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3712
          hv_class = hypervisor.GetHypervisor(hv_name)
3713
          hv_class.CheckParameterSyntax(new_osp)
3714
          _CheckHVParams(self, node_list, hv_name, new_osp)
3715

    
3716
    if self.op.default_iallocator:
3717
      alloc_script = utils.FindFile(self.op.default_iallocator,
3718
                                    constants.IALLOCATOR_SEARCH_PATH,
3719
                                    os.path.isfile)
3720
      if alloc_script is None:
3721
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3722
                                   " specified" % self.op.default_iallocator,
3723
                                   errors.ECODE_INVAL)
3724

    
3725
  def Exec(self, feedback_fn):
3726
    """Change the parameters of the cluster.
3727

3728
    """
3729
    if self.op.vg_name is not None:
3730
      new_volume = self.op.vg_name
3731
      if not new_volume:
3732
        new_volume = None
3733
      if new_volume != self.cfg.GetVGName():
3734
        self.cfg.SetVGName(new_volume)
3735
      else:
3736
        feedback_fn("Cluster LVM configuration already in desired"
3737
                    " state, not changing")
3738
    if self.op.drbd_helper is not None:
3739
      new_helper = self.op.drbd_helper
3740
      if not new_helper:
3741
        new_helper = None
3742
      if new_helper != self.cfg.GetDRBDHelper():
3743
        self.cfg.SetDRBDHelper(new_helper)
3744
      else:
3745
        feedback_fn("Cluster DRBD helper already in desired state,"
3746
                    " not changing")
3747
    if self.op.hvparams:
3748
      self.cluster.hvparams = self.new_hvparams
3749
    if self.op.os_hvp:
3750
      self.cluster.os_hvp = self.new_os_hvp
3751
    if self.op.enabled_hypervisors is not None:
3752
      self.cluster.hvparams = self.new_hvparams
3753
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3754
    if self.op.beparams:
3755
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3756
    if self.op.nicparams:
3757
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3758
    if self.op.osparams:
3759
      self.cluster.osparams = self.new_osp
3760
    if self.op.ndparams:
3761
      self.cluster.ndparams = self.new_ndparams
3762
    if self.op.diskparams:
3763
      self.cluster.diskparams = self.new_diskparams
3764

    
3765
    if self.op.candidate_pool_size is not None:
3766
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3767
      # we need to update the pool size here, otherwise the save will fail
3768
      _AdjustCandidatePool(self, [])
3769

    
3770
    if self.op.maintain_node_health is not None:
3771
      if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3772
        feedback_fn("Note: CONFD was disabled at build time, node health"
3773
                    " maintenance is not useful (still enabling it)")
3774
      self.cluster.maintain_node_health = self.op.maintain_node_health
3775

    
3776
    if self.op.prealloc_wipe_disks is not None:
3777
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3778

    
3779
    if self.op.add_uids is not None:
3780
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3781

    
3782
    if self.op.remove_uids is not None:
3783
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3784

    
3785
    if self.op.uid_pool is not None:
3786
      self.cluster.uid_pool = self.op.uid_pool
3787

    
3788
    if self.op.default_iallocator is not None:
3789
      self.cluster.default_iallocator = self.op.default_iallocator
3790

    
3791
    if self.op.reserved_lvs is not None:
3792
      self.cluster.reserved_lvs = self.op.reserved_lvs
3793

    
3794
    if self.op.use_external_mip_script is not None:
3795
      self.cluster.use_external_mip_script = self.op.use_external_mip_script
3796

    
3797
    def helper_os(aname, mods, desc):
3798
      desc += " OS list"
3799
      lst = getattr(self.cluster, aname)
3800
      for key, val in mods:
3801
        if key == constants.DDM_ADD:
3802
          if val in lst:
3803
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3804
          else:
3805
            lst.append(val)
3806
        elif key == constants.DDM_REMOVE:
3807
          if val in lst:
3808
            lst.remove(val)
3809
          else:
3810
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3811
        else:
3812
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3813

    
3814
    if self.op.hidden_os:
3815
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3816

    
3817
    if self.op.blacklisted_os:
3818
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3819

    
3820
    if self.op.master_netdev:
3821
      master_params = self.cfg.GetMasterNetworkParameters()
3822
      ems = self.cfg.GetUseExternalMipScript()
3823
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3824
                  self.cluster.master_netdev)
3825
      result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3826
                                                       master_params, ems)
3827
      result.Raise("Could not disable the master ip")
3828
      feedback_fn("Changing master_netdev from %s to %s" %
3829
                  (master_params.netdev, self.op.master_netdev))
3830
      self.cluster.master_netdev = self.op.master_netdev
3831

    
3832
    if self.op.master_netmask:
3833
      master_params = self.cfg.GetMasterNetworkParameters()
3834
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3835
      result = self.rpc.call_node_change_master_netmask(master_params.name,
3836
                                                        master_params.netmask,
3837
                                                        self.op.master_netmask,
3838
                                                        master_params.ip,
3839
                                                        master_params.netdev)
3840
      if result.fail_msg:
3841
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3842
        feedback_fn(msg)
3843

    
3844
      self.cluster.master_netmask = self.op.master_netmask
3845

    
3846
    self.cfg.Update(self.cluster, feedback_fn)
3847

    
3848
    if self.op.master_netdev:
3849
      master_params = self.cfg.GetMasterNetworkParameters()
3850
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3851
                  self.op.master_netdev)
3852
      ems = self.cfg.GetUseExternalMipScript()
3853
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3854
                                                     master_params, ems)
3855
      if result.fail_msg:
3856
        self.LogWarning("Could not re-enable the master ip on"
3857
                        " the master, please restart manually: %s",
3858
                        result.fail_msg)
3859

    
3860

    
3861
def _UploadHelper(lu, nodes, fname):
3862
  """Helper for uploading a file and showing warnings.
3863

3864
  """
3865
  if os.path.exists(fname):
3866
    result = lu.rpc.call_upload_file(nodes, fname)
3867
    for to_node, to_result in result.items():
3868
      msg = to_result.fail_msg
3869
      if msg:
3870
        msg = ("Copy of file %s to node %s failed: %s" %
3871
               (fname, to_node, msg))
3872
        lu.proc.LogWarning(msg)
3873

    
3874

    
3875
def _ComputeAncillaryFiles(cluster, redist):
3876
  """Compute files external to Ganeti which need to be consistent.
3877

3878
  @type redist: boolean
3879
  @param redist: Whether to include files which need to be redistributed
3880

3881
  """
3882
  # Compute files for all nodes
3883
  files_all = set([
3884
    constants.SSH_KNOWN_HOSTS_FILE,
3885
    constants.CONFD_HMAC_KEY,
3886
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3887
    constants.SPICE_CERT_FILE,
3888
    constants.SPICE_CACERT_FILE,
3889
    constants.RAPI_USERS_FILE,
3890
    ])
3891

    
3892
  if not redist:
3893
    files_all.update(constants.ALL_CERT_FILES)
3894
    files_all.update(ssconf.SimpleStore().GetFileList())
3895
  else:
3896
    # we need to ship at least the RAPI certificate
3897
    files_all.add(constants.RAPI_CERT_FILE)
3898

    
3899
  if cluster.modify_etc_hosts:
3900
    files_all.add(constants.ETC_HOSTS)
3901

    
3902
  # Files which are optional, these must:
3903
  # - be present in one other category as well
3904
  # - either exist or not exist on all nodes of that category (mc, vm all)
3905
  files_opt = set([
3906
    constants.RAPI_USERS_FILE,
3907
    ])
3908

    
3909
  # Files which should only be on master candidates
3910
  files_mc = set()
3911

    
3912
  if not redist:
3913
    files_mc.add(constants.CLUSTER_CONF_FILE)
3914

    
3915
    # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3916
    # replication
3917
    files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
3918

    
3919
  # Files which should only be on VM-capable nodes
3920
  files_vm = set(filename
3921
    for hv_name in cluster.enabled_hypervisors
3922
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3923

    
3924
  files_opt |= set(filename
3925
    for hv_name in cluster.enabled_hypervisors
3926
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3927

    
3928
  # Filenames in each category must be unique
3929
  all_files_set = files_all | files_mc | files_vm
3930
  assert (len(all_files_set) ==
3931
          sum(map(len, [files_all, files_mc, files_vm]))), \
3932
         "Found file listed in more than one file list"
3933

    
3934
  # Optional files must be present in one other category
3935
  assert all_files_set.issuperset(files_opt), \
3936
         "Optional file not in a different required list"
3937

    
3938
  return (files_all, files_opt, files_mc, files_vm)
3939

    
3940

    
3941
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3942
  """Distribute additional files which are part of the cluster configuration.
3943

3944
  ConfigWriter takes care of distributing the config and ssconf files, but
3945
  there are more files which should be distributed to all nodes. This function
3946
  makes sure those are copied.
3947

3948
  @param lu: calling logical unit
3949
  @param additional_nodes: list of nodes not in the config to distribute to
3950
  @type additional_vm: boolean
3951
  @param additional_vm: whether the additional nodes are vm-capable or not
3952

3953
  """
3954
  # Gather target nodes
3955
  cluster = lu.cfg.GetClusterInfo()
3956
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3957

    
3958
  online_nodes = lu.cfg.GetOnlineNodeList()
3959
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3960

    
3961
  if additional_nodes is not None:
3962
    online_nodes.extend(additional_nodes)
3963
    if additional_vm:
3964
      vm_nodes.extend(additional_nodes)
3965

    
3966
  # Never distribute to master node
3967
  for nodelist in [online_nodes, vm_nodes]:
3968
    if master_info.name in nodelist:
3969
      nodelist.remove(master_info.name)
3970

    
3971
  # Gather file lists
3972
  (files_all, _, files_mc, files_vm) = \
3973
    _ComputeAncillaryFiles(cluster, True)
3974

    
3975
  # Never re-distribute configuration file from here
3976
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3977
              constants.CLUSTER_CONF_FILE in files_vm)
3978
  assert not files_mc, "Master candidates not handled in this function"
3979

    
3980
  filemap = [
3981
    (online_nodes, files_all),
3982
    (vm_nodes, files_vm),
3983
    ]
3984

    
3985
  # Upload the files
3986
  for (node_list, files) in filemap:
3987
    for fname in files:
3988
      _UploadHelper(lu, node_list, fname)
3989

    
3990

    
3991
class LUClusterRedistConf(NoHooksLU):
3992
  """Force the redistribution of cluster configuration.
3993

3994
  This is a very simple LU.
3995

3996
  """
3997
  REQ_BGL = False
3998

    
3999
  def ExpandNames(self):
4000
    self.needed_locks = {
4001
      locking.LEVEL_NODE: locking.ALL_SET,
4002
    }
4003
    self.share_locks[locking.LEVEL_NODE] = 1
4004

    
4005
  def Exec(self, feedback_fn):
4006
    """Redistribute the configuration.
4007

4008
    """
4009
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
4010
    _RedistributeAncillaryFiles(self)
4011

    
4012

    
4013
class LUClusterActivateMasterIp(NoHooksLU):
4014
  """Activate the master IP on the master node.
4015

4016
  """
4017
  def Exec(self, feedback_fn):
4018
    """Activate the master IP.
4019

4020
    """
4021
    master_params = self.cfg.GetMasterNetworkParameters()
4022
    ems = self.cfg.GetUseExternalMipScript()
4023
    result = self.rpc.call_node_activate_master_ip(master_params.name,
4024
                                                   master_params, ems)
4025
    result.Raise("Could not activate the master IP")
4026

    
4027

    
4028
class LUClusterDeactivateMasterIp(NoHooksLU):
4029
  """Deactivate the master IP on the master node.
4030

4031
  """
4032
  def Exec(self, feedback_fn):
4033
    """Deactivate the master IP.
4034

4035
    """
4036
    master_params = self.cfg.GetMasterNetworkParameters()
4037
    ems = self.cfg.GetUseExternalMipScript()
4038
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4039
                                                     master_params, ems)
4040
    result.Raise("Could not deactivate the master IP")
4041

    
4042

    
4043
def _WaitForSync(lu, instance, disks=None, oneshot=False):
4044
  """Sleep and poll for an instance's disk to sync.
4045

4046
  """
4047
  if not instance.disks or disks is not None and not disks:
4048
    return True
4049

    
4050
  disks = _ExpandCheckDisks(instance, disks)
4051

    
4052
  if not oneshot:
4053
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4054

    
4055
  node = instance.primary_node
4056

    
4057
  for dev in disks:
4058
    lu.cfg.SetDiskID(dev, node)
4059

    
4060
  # TODO: Convert to utils.Retry
4061

    
4062
  retries = 0
4063
  degr_retries = 10 # in seconds, as we sleep 1 second each time
4064
  while True:
4065
    max_time = 0
4066
    done = True
4067
    cumul_degraded = False
4068
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4069
    msg = rstats.fail_msg
4070
    if msg:
4071
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4072
      retries += 1
4073
      if retries >= 10:
4074
        raise errors.RemoteError("Can't contact node %s for mirror data,"
4075
                                 " aborting." % node)
4076
      time.sleep(6)
4077
      continue
4078
    rstats = rstats.payload
4079
    retries = 0
4080
    for i, mstat in enumerate(rstats):
4081
      if mstat is None:
4082
        lu.LogWarning("Can't compute data for node %s/%s",
4083
                           node, disks[i].iv_name)
4084
        continue
4085

    
4086
      cumul_degraded = (cumul_degraded or
4087
                        (mstat.is_degraded and mstat.sync_percent is None))
4088
      if mstat.sync_percent is not None:
4089
        done = False
4090
        if mstat.estimated_time is not None:
4091
          rem_time = ("%s remaining (estimated)" %
4092
                      utils.FormatSeconds(mstat.estimated_time))
4093
          max_time = mstat.estimated_time
4094
        else:
4095
          rem_time = "no time estimate"
4096
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4097
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
4098

    
4099
    # if we're done but degraded, let's do a few small retries, to
4100
    # make sure we see a stable and not transient situation; therefore
4101
    # we force restart of the loop
4102
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
4103
      logging.info("Degraded disks found, %d retries left", degr_retries)
4104
      degr_retries -= 1
4105
      time.sleep(1)
4106
      continue
4107

    
4108
    if done or oneshot:
4109
      break
4110

    
4111
    time.sleep(min(60, max_time))
4112

    
4113
  if done:
4114
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4115
  return not cumul_degraded
4116

    
4117

    
4118
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4119
  """Check that mirrors are not degraded.
4120

4121
  The ldisk parameter, if True, will change the test from the
4122
  is_degraded attribute (which represents overall non-ok status for
4123
  the device(s)) to the ldisk (representing the local storage status).
4124

4125
  """
4126
  lu.cfg.SetDiskID(dev, node)
4127

    
4128
  result = True
4129

    
4130
  if on_primary or dev.AssembleOnSecondary():
4131
    rstats = lu.rpc.call_blockdev_find(node, dev)
4132
    msg = rstats.fail_msg
4133
    if msg:
4134
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4135
      result = False
4136
    elif not rstats.payload:
4137
      lu.LogWarning("Can't find disk on node %s", node)
4138
      result = False
4139
    else:
4140
      if ldisk:
4141
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4142
      else:
4143
        result = result and not rstats.payload.is_degraded
4144

    
4145
  if dev.children:
4146
    for child in dev.children:
4147
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4148

    
4149
  return result
4150

    
4151

    
4152
class LUOobCommand(NoHooksLU):
4153
  """Logical unit for OOB handling.
4154

4155
  """
4156
  REG_BGL = False
4157
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4158

    
4159
  def ExpandNames(self):
4160
    """Gather locks we need.
4161

4162
    """
4163
    if self.op.node_names:
4164
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4165
      lock_names = self.op.node_names
4166
    else:
4167
      lock_names = locking.ALL_SET
4168

    
4169
    self.needed_locks = {
4170
      locking.LEVEL_NODE: lock_names,
4171
      }
4172

    
4173
  def CheckPrereq(self):
4174
    """Check prerequisites.
4175

4176
    This checks:
4177
     - the node exists in the configuration
4178
     - OOB is supported
4179

4180
    Any errors are signaled by raising errors.OpPrereqError.
4181

4182
    """
4183
    self.nodes = []
4184
    self.master_node = self.cfg.GetMasterNode()
4185

    
4186
    assert self.op.power_delay >= 0.0
4187

    
4188
    if self.op.node_names:
4189
      if (self.op.command in self._SKIP_MASTER and
4190
          self.master_node in self.op.node_names):
4191
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4192
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4193

    
4194
        if master_oob_handler:
4195
          additional_text = ("run '%s %s %s' if you want to operate on the"
4196
                             " master regardless") % (master_oob_handler,
4197
                                                      self.op.command,
4198
                                                      self.master_node)
4199
        else:
4200
          additional_text = "it does not support out-of-band operations"
4201

    
4202
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4203
                                    " allowed for %s; %s") %
4204
                                   (self.master_node, self.op.command,
4205
                                    additional_text), errors.ECODE_INVAL)
4206
    else:
4207
      self.op.node_names = self.cfg.GetNodeList()
4208
      if self.op.command in self._SKIP_MASTER:
4209
        self.op.node_names.remove(self.master_node)
4210

    
4211
    if self.op.command in self._SKIP_MASTER:
4212
      assert self.master_node not in self.op.node_names
4213

    
4214
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4215
      if node is None:
4216
        raise errors.OpPrereqError("Node %s not found" % node_name,
4217
                                   errors.ECODE_NOENT)
4218
      else:
4219
        self.nodes.append(node)
4220

    
4221
      if (not self.op.ignore_status and
4222
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4223
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4224
                                    " not marked offline") % node_name,
4225
                                   errors.ECODE_STATE)
4226

    
4227
  def Exec(self, feedback_fn):
4228
    """Execute OOB and return result if we expect any.
4229

4230
    """
4231
    master_node = self.master_node
4232
    ret = []
4233

    
4234
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4235
                                              key=lambda node: node.name)):
4236
      node_entry = [(constants.RS_NORMAL, node.name)]
4237
      ret.append(node_entry)
4238

    
4239
      oob_program = _SupportsOob(self.cfg, node)
4240

    
4241
      if not oob_program:
4242
        node_entry.append((constants.RS_UNAVAIL, None))
4243
        continue
4244

    
4245
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4246
                   self.op.command, oob_program, node.name)
4247
      result = self.rpc.call_run_oob(master_node, oob_program,
4248
                                     self.op.command, node.name,
4249
                                     self.op.timeout)
4250

    
4251
      if result.fail_msg:
4252
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4253
                        node.name, result.fail_msg)
4254
        node_entry.append((constants.RS_NODATA, None))
4255
      else:
4256
        try:
4257
          self._CheckPayload(result)
4258
        except errors.OpExecError, err:
4259
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4260
                          node.name, err)
4261
          node_entry.append((constants.RS_NODATA, None))
4262
        else:
4263
          if self.op.command == constants.OOB_HEALTH:
4264
            # For health we should log important events
4265
            for item, status in result.payload:
4266
              if status in [constants.OOB_STATUS_WARNING,
4267
                            constants.OOB_STATUS_CRITICAL]:
4268
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4269
                                item, node.name, status)
4270

    
4271
          if self.op.command == constants.OOB_POWER_ON:
4272
            node.powered = True
4273
          elif self.op.command == constants.OOB_POWER_OFF:
4274
            node.powered = False
4275
          elif self.op.command == constants.OOB_POWER_STATUS:
4276
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4277
            if powered != node.powered:
4278
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4279
                               " match actual power state (%s)"), node.powered,
4280
                              node.name, powered)
4281

    
4282
          # For configuration changing commands we should update the node
4283
          if self.op.command in (constants.OOB_POWER_ON,
4284
                                 constants.OOB_POWER_OFF):
4285
            self.cfg.Update(node, feedback_fn)
4286

    
4287
          node_entry.append((constants.RS_NORMAL, result.payload))
4288

    
4289
          if (self.op.command == constants.OOB_POWER_ON and
4290
              idx < len(self.nodes) - 1):
4291
            time.sleep(self.op.power_delay)
4292

    
4293
    return ret
4294

    
4295
  def _CheckPayload(self, result):
4296
    """Checks if the payload is valid.
4297

4298
    @param result: RPC result
4299
    @raises errors.OpExecError: If payload is not valid
4300

4301
    """
4302
    errs = []
4303
    if self.op.command == constants.OOB_HEALTH:
4304
      if not isinstance(result.payload, list):
4305
        errs.append("command 'health' is expected to return a list but got %s" %
4306
                    type(result.payload))
4307
      else:
4308
        for item, status in result.payload:
4309
          if status not in constants.OOB_STATUSES:
4310
            errs.append("health item '%s' has invalid status '%s'" %
4311
                        (item, status))
4312

    
4313
    if self.op.command == constants.OOB_POWER_STATUS:
4314
      if not isinstance(result.payload, dict):
4315
        errs.append("power-status is expected to return a dict but got %s" %
4316
                    type(result.payload))
4317

    
4318
    if self.op.command in [
4319
        constants.OOB_POWER_ON,
4320
        constants.OOB_POWER_OFF,
4321
        constants.OOB_POWER_CYCLE,
4322
        ]:
4323
      if result.payload is not None:
4324
        errs.append("%s is expected to not return payload but got '%s'" %
4325
                    (self.op.command, result.payload))
4326

    
4327
    if errs:
4328
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4329
                               utils.CommaJoin(errs))
4330

    
4331

    
4332
class _OsQuery(_QueryBase):
4333
  FIELDS = query.OS_FIELDS
4334

    
4335
  def ExpandNames(self, lu):
4336
    # Lock all nodes in shared mode
4337
    # Temporary removal of locks, should be reverted later
4338
    # TODO: reintroduce locks when they are lighter-weight
4339
    lu.needed_locks = {}
4340
    #self.share_locks[locking.LEVEL_NODE] = 1
4341
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4342

    
4343
    # The following variables interact with _QueryBase._GetNames
4344
    if self.names:
4345
      self.wanted = self.names
4346
    else:
4347
      self.wanted = locking.ALL_SET
4348

    
4349
    self.do_locking = self.use_locking
4350

    
4351
  def DeclareLocks(self, lu, level):
4352
    pass
4353

    
4354
  @staticmethod
4355
  def _DiagnoseByOS(rlist):
4356
    """Remaps a per-node return list into an a per-os per-node dictionary
4357

4358
    @param rlist: a map with node names as keys and OS objects as values
4359

4360
    @rtype: dict
4361
    @return: a dictionary with osnames as keys and as value another
4362
        map, with nodes as keys and tuples of (path, status, diagnose,
4363
        variants, parameters, api_versions) as values, eg::
4364

4365
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4366
                                     (/srv/..., False, "invalid api")],
4367
                           "node2": [(/srv/..., True, "", [], [])]}
4368
          }
4369

4370
    """
4371
    all_os = {}
4372
    # we build here the list of nodes that didn't fail the RPC (at RPC
4373
    # level), so that nodes with a non-responding node daemon don't
4374
    # make all OSes invalid
4375
    good_nodes = [node_name for node_name in rlist
4376
                  if not rlist[node_name].fail_msg]
4377
    for node_name, nr in rlist.items():
4378
      if nr.fail_msg or not nr.payload:
4379
        continue
4380
      for (name, path, status, diagnose, variants,
4381
           params, api_versions) in nr.payload:
4382
        if name not in all_os:
4383
          # build a list of nodes for this os containing empty lists
4384
          # for each node in node_list
4385
          all_os[name] = {}
4386
          for nname in good_nodes:
4387
            all_os[name][nname] = []
4388
        # convert params from [name, help] to (name, help)
4389
        params = [tuple(v) for v in params]
4390
        all_os[name][node_name].append((path, status, diagnose,
4391
                                        variants, params, api_versions))
4392
    return all_os
4393

    
4394
  def _GetQueryData(self, lu):
4395
    """Computes the list of nodes and their attributes.
4396

4397
    """
4398
    # Locking is not used
4399
    assert not (compat.any(lu.glm.is_owned(level)
4400
                           for level in locking.LEVELS
4401
                           if level != locking.LEVEL_CLUSTER) or
4402
                self.do_locking or self.use_locking)
4403

    
4404
    valid_nodes = [node.name
4405
                   for node in lu.cfg.GetAllNodesInfo().values()
4406
                   if not node.offline and node.vm_capable]
4407
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4408
    cluster = lu.cfg.GetClusterInfo()
4409

    
4410
    data = {}
4411

    
4412
    for (os_name, os_data) in pol.items():
4413
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4414
                          hidden=(os_name in cluster.hidden_os),
4415
                          blacklisted=(os_name in cluster.blacklisted_os))
4416

    
4417
      variants = set()
4418
      parameters = set()
4419
      api_versions = set()
4420

    
4421
      for idx, osl in enumerate(os_data.values()):
4422
        info.valid = bool(info.valid and osl and osl[0][1])
4423
        if not info.valid:
4424
          break
4425

    
4426
        (node_variants, node_params, node_api) = osl[0][3:6]
4427
        if idx == 0:
4428
          # First entry
4429
          variants.update(node_variants)
4430
          parameters.update(node_params)
4431
          api_versions.update(node_api)
4432
        else:
4433
          # Filter out inconsistent values
4434
          variants.intersection_update(node_variants)
4435
          parameters.intersection_update(node_params)
4436
          api_versions.intersection_update(node_api)
4437

    
4438
      info.variants = list(variants)
4439
      info.parameters = list(parameters)
4440
      info.api_versions = list(api_versions)
4441

    
4442
      data[os_name] = info
4443

    
4444
    # Prepare data in requested order
4445
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4446
            if name in data]
4447

    
4448

    
4449
class LUOsDiagnose(NoHooksLU):
4450
  """Logical unit for OS diagnose/query.
4451

4452
  """
4453
  REQ_BGL = False
4454

    
4455
  @staticmethod
4456
  def _BuildFilter(fields, names):
4457
    """Builds a filter for querying OSes.
4458

4459
    """
4460
    name_filter = qlang.MakeSimpleFilter("name", names)
4461

    
4462
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4463
    # respective field is not requested
4464
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4465
                     for fname in ["hidden", "blacklisted"]
4466
                     if fname not in fields]
4467
    if "valid" not in fields:
4468
      status_filter.append([qlang.OP_TRUE, "valid"])
4469

    
4470
    if status_filter:
4471
      status_filter.insert(0, qlang.OP_AND)
4472
    else:
4473
      status_filter = None
4474

    
4475
    if name_filter and status_filter:
4476
      return [qlang.OP_AND, name_filter, status_filter]
4477
    elif name_filter:
4478
      return name_filter
4479
    else:
4480
      return status_filter
4481

    
4482
  def CheckArguments(self):
4483
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4484
                       self.op.output_fields, False)
4485

    
4486
  def ExpandNames(self):
4487
    self.oq.ExpandNames(self)
4488

    
4489
  def Exec(self, feedback_fn):
4490
    return self.oq.OldStyleQuery(self)
4491

    
4492

    
4493
class LUNodeRemove(LogicalUnit):
4494
  """Logical unit for removing a node.
4495

4496
  """
4497
  HPATH = "node-remove"
4498
  HTYPE = constants.HTYPE_NODE
4499

    
4500
  def BuildHooksEnv(self):
4501
    """Build hooks env.
4502

4503
    This doesn't run on the target node in the pre phase as a failed
4504
    node would then be impossible to remove.
4505

4506
    """
4507
    return {
4508
      "OP_TARGET": self.op.node_name,
4509
      "NODE_NAME": self.op.node_name,
4510
      }
4511

    
4512
  def BuildHooksNodes(self):
4513
    """Build hooks nodes.
4514

4515
    """
4516
    all_nodes = self.cfg.GetNodeList()
4517
    try:
4518
      all_nodes.remove(self.op.node_name)
4519
    except ValueError:
4520
      logging.warning("Node '%s', which is about to be removed, was not found"
4521
                      " in the list of all nodes", self.op.node_name)
4522
    return (all_nodes, all_nodes)
4523

    
4524
  def CheckPrereq(self):
4525
    """Check prerequisites.
4526

4527
    This checks:
4528
     - the node exists in the configuration
4529
     - it does not have primary or secondary instances
4530
     - it's not the master
4531

4532
    Any errors are signaled by raising errors.OpPrereqError.
4533

4534
    """
4535
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4536
    node = self.cfg.GetNodeInfo(self.op.node_name)
4537
    assert node is not None
4538

    
4539
    masternode = self.cfg.GetMasterNode()
4540
    if node.name == masternode:
4541
      raise errors.OpPrereqError("Node is the master node, failover to another"
4542
                                 " node is required", errors.ECODE_INVAL)
4543

    
4544
    for instance_name, instance in self.cfg.GetAllInstancesInfo().items():
4545
      if node.name in instance.all_nodes:
4546
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4547
                                   " please remove first" % instance_name,
4548
                                   errors.ECODE_INVAL)
4549
    self.op.node_name = node.name
4550
    self.node = node
4551

    
4552
  def Exec(self, feedback_fn):
4553
    """Removes the node from the cluster.
4554

4555
    """
4556
    node = self.node
4557
    logging.info("Stopping the node daemon and removing configs from node %s",
4558
                 node.name)
4559

    
4560
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4561

    
4562
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4563
      "Not owning BGL"
4564

    
4565
    # Promote nodes to master candidate as needed
4566
    _AdjustCandidatePool(self, exceptions=[node.name])
4567
    self.context.RemoveNode(node.name)
4568

    
4569
    # Run post hooks on the node before it's removed
4570
    _RunPostHook(self, node.name)
4571

    
4572
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4573
    msg = result.fail_msg
4574
    if msg:
4575
      self.LogWarning("Errors encountered on the remote node while leaving"
4576
                      " the cluster: %s", msg)
4577

    
4578
    # Remove node from our /etc/hosts
4579
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4580
      master_node = self.cfg.GetMasterNode()
4581
      result = self.rpc.call_etc_hosts_modify(master_node,
4582
                                              constants.ETC_HOSTS_REMOVE,
4583
                                              node.name, None)
4584
      result.Raise("Can't update hosts file with new host data")
4585
      _RedistributeAncillaryFiles(self)
4586

    
4587

    
4588
class _NodeQuery(_QueryBase):
4589
  FIELDS = query.NODE_FIELDS
4590

    
4591
  def ExpandNames(self, lu):
4592
    lu.needed_locks = {}
4593
    lu.share_locks = _ShareAll()
4594

    
4595
    if self.names:
4596
      self.wanted = _GetWantedNodes(lu, self.names)
4597
    else:
4598
      self.wanted = locking.ALL_SET
4599

    
4600
    self.do_locking = (self.use_locking and
4601
                       query.NQ_LIVE in self.requested_data)
4602

    
4603
    if self.do_locking:
4604
      # If any non-static field is requested we need to lock the nodes
4605
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4606

    
4607
  def DeclareLocks(self, lu, level):
4608
    pass
4609

    
4610
  def _GetQueryData(self, lu):
4611
    """Computes the list of nodes and their attributes.
4612

4613
    """
4614
    all_info = lu.cfg.GetAllNodesInfo()
4615

    
4616
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4617

    
4618
    # Gather data as requested
4619
    if query.NQ_LIVE in self.requested_data:
4620
      # filter out non-vm_capable nodes
4621
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4622

    
4623
      node_data = lu.rpc.call_node_info(toquery_nodes, [lu.cfg.GetVGName()],
4624
                                        [lu.cfg.GetHypervisorType()])
4625
      live_data = dict((name, _MakeLegacyNodeInfo(nresult.payload))
4626
                       for (name, nresult) in node_data.items()
4627
                       if not nresult.fail_msg and nresult.payload)
4628
    else:
4629
      live_data = None
4630

    
4631
    if query.NQ_INST in self.requested_data:
4632
      node_to_primary = dict([(name, set()) for name in nodenames])
4633
      node_to_secondary = dict([(name, set()) for name in nodenames])
4634

    
4635
      inst_data = lu.cfg.GetAllInstancesInfo()
4636

    
4637
      for inst in inst_data.values():
4638
        if inst.primary_node in node_to_primary:
4639
          node_to_primary[inst.primary_node].add(inst.name)
4640
        for secnode in inst.secondary_nodes:
4641
          if secnode in node_to_secondary:
4642
            node_to_secondary[secnode].add(inst.name)
4643
    else:
4644
      node_to_primary = None
4645
      node_to_secondary = None
4646

    
4647
    if query.NQ_OOB in self.requested_data:
4648
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4649
                         for name, node in all_info.iteritems())
4650
    else:
4651
      oob_support = None
4652

    
4653
    if query.NQ_GROUP in self.requested_data:
4654
      groups = lu.cfg.GetAllNodeGroupsInfo()
4655
    else:
4656
      groups = {}
4657

    
4658
    return query.NodeQueryData([all_info[name] for name in nodenames],
4659
                               live_data, lu.cfg.GetMasterNode(),
4660
                               node_to_primary, node_to_secondary, groups,
4661
                               oob_support, lu.cfg.GetClusterInfo())
4662

    
4663

    
4664
class LUNodeQuery(NoHooksLU):
4665
  """Logical unit for querying nodes.
4666

4667
  """
4668
  # pylint: disable=W0142
4669
  REQ_BGL = False
4670

    
4671
  def CheckArguments(self):
4672
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4673
                         self.op.output_fields, self.op.use_locking)
4674

    
4675
  def ExpandNames(self):
4676
    self.nq.ExpandNames(self)
4677

    
4678
  def DeclareLocks(self, level):
4679
    self.nq.DeclareLocks(self, level)
4680

    
4681
  def Exec(self, feedback_fn):
4682
    return self.nq.OldStyleQuery(self)
4683

    
4684

    
4685
class LUNodeQueryvols(NoHooksLU):
4686
  """Logical unit for getting volumes on node(s).
4687

4688
  """
4689
  REQ_BGL = False
4690
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4691
  _FIELDS_STATIC = utils.FieldSet("node")
4692

    
4693
  def CheckArguments(self):
4694
    _CheckOutputFields(static=self._FIELDS_STATIC,
4695
                       dynamic=self._FIELDS_DYNAMIC,
4696
                       selected=self.op.output_fields)
4697

    
4698
  def ExpandNames(self):
4699
    self.share_locks = _ShareAll()
4700
    self.needed_locks = {}
4701

    
4702
    if not self.op.nodes:
4703
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4704
    else:
4705
      self.needed_locks[locking.LEVEL_NODE] = \
4706
        _GetWantedNodes(self, self.op.nodes)
4707

    
4708
  def Exec(self, feedback_fn):
4709
    """Computes the list of nodes and their attributes.
4710

4711
    """
4712
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4713
    volumes = self.rpc.call_node_volumes(nodenames)
4714

    
4715
    ilist = self.cfg.GetAllInstancesInfo()
4716
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4717

    
4718
    output = []
4719
    for node in nodenames:
4720
      nresult = volumes[node]
4721
      if nresult.offline:
4722
        continue
4723
      msg = nresult.fail_msg
4724
      if msg:
4725
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4726
        continue
4727

    
4728
      node_vols = sorted(nresult.payload,
4729
                         key=operator.itemgetter("dev"))
4730

    
4731
      for vol in node_vols:
4732
        node_output = []
4733
        for field in self.op.output_fields:
4734
          if field == "node":
4735
            val = node
4736
          elif field == "phys":
4737
            val = vol["dev"]
4738
          elif field == "vg":
4739
            val = vol["vg"]
4740
          elif field == "name":
4741
            val = vol["name"]
4742
          elif field == "size":
4743
            val = int(float(vol["size"]))
4744
          elif field == "instance":
4745
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4746
          else:
4747
            raise errors.ParameterError(field)
4748
          node_output.append(str(val))
4749

    
4750
        output.append(node_output)
4751

    
4752
    return output
4753

    
4754

    
4755
class LUNodeQueryStorage(NoHooksLU):
4756
  """Logical unit for getting information on storage units on node(s).
4757

4758
  """
4759
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4760
  REQ_BGL = False
4761

    
4762
  def CheckArguments(self):
4763
    _CheckOutputFields(static=self._FIELDS_STATIC,
4764
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4765
                       selected=self.op.output_fields)
4766

    
4767
  def ExpandNames(self):
4768
    self.share_locks = _ShareAll()
4769
    self.needed_locks = {}
4770

    
4771
    if self.op.nodes:
4772
      self.needed_locks[locking.LEVEL_NODE] = \
4773
        _GetWantedNodes(self, self.op.nodes)
4774
    else:
4775
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4776

    
4777
  def Exec(self, feedback_fn):
4778
    """Computes the list of nodes and their attributes.
4779

4780
    """
4781
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4782

    
4783
    # Always get name to sort by
4784
    if constants.SF_NAME in self.op.output_fields:
4785
      fields = self.op.output_fields[:]
4786
    else:
4787
      fields = [constants.SF_NAME] + self.op.output_fields
4788

    
4789
    # Never ask for node or type as it's only known to the LU
4790
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4791
      while extra in fields:
4792
        fields.remove(extra)
4793

    
4794
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4795
    name_idx = field_idx[constants.SF_NAME]
4796

    
4797
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4798
    data = self.rpc.call_storage_list(self.nodes,
4799
                                      self.op.storage_type, st_args,
4800
                                      self.op.name, fields)
4801

    
4802
    result = []
4803

    
4804
    for node in utils.NiceSort(self.nodes):
4805
      nresult = data[node]
4806
      if nresult.offline:
4807
        continue
4808

    
4809
      msg = nresult.fail_msg
4810
      if msg:
4811
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4812
        continue
4813

    
4814
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4815

    
4816
      for name in utils.NiceSort(rows.keys()):
4817
        row = rows[name]
4818

    
4819
        out = []
4820

    
4821
        for field in self.op.output_fields:
4822
          if field == constants.SF_NODE:
4823
            val = node
4824
          elif field == constants.SF_TYPE:
4825
            val = self.op.storage_type
4826
          elif field in field_idx:
4827
            val = row[field_idx[field]]
4828
          else:
4829
            raise errors.ParameterError(field)
4830

    
4831
          out.append(val)
4832

    
4833
        result.append(out)
4834

    
4835
    return result
4836

    
4837

    
4838
class _InstanceQuery(_QueryBase):
4839
  FIELDS = query.INSTANCE_FIELDS
4840

    
4841
  def ExpandNames(self, lu):
4842
    lu.needed_locks = {}
4843
    lu.share_locks = _ShareAll()
4844

    
4845
    if self.names:
4846
      self.wanted = _GetWantedInstances(lu, self.names)
4847
    else:
4848
      self.wanted = locking.ALL_SET
4849

    
4850
    self.do_locking = (self.use_locking and
4851
                       query.IQ_LIVE in self.requested_data)
4852
    if self.do_locking:
4853
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4854
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4855
      lu.needed_locks[locking.LEVEL_NODE] = []
4856
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4857

    
4858
    self.do_grouplocks = (self.do_locking and
4859
                          query.IQ_NODES in self.requested_data)
4860

    
4861
  def DeclareLocks(self, lu, level):
4862
    if self.do_locking:
4863
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4864
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4865

    
4866
        # Lock all groups used by instances optimistically; this requires going
4867
        # via the node before it's locked, requiring verification later on
4868
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4869
          set(group_uuid
4870
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4871
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4872
      elif level == locking.LEVEL_NODE:
4873
        lu._LockInstancesNodes() # pylint: disable=W0212
4874

    
4875
  @staticmethod
4876
  def _CheckGroupLocks(lu):
4877
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4878
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4879

    
4880
    # Check if node groups for locked instances are still correct
4881
    for instance_name in owned_instances:
4882
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4883

    
4884
  def _GetQueryData(self, lu):
4885
    """Computes the list of instances and their attributes.
4886

4887
    """
4888
    if self.do_grouplocks:
4889
      self._CheckGroupLocks(lu)
4890

    
4891
    cluster = lu.cfg.GetClusterInfo()
4892
    all_info = lu.cfg.GetAllInstancesInfo()
4893

    
4894
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4895

    
4896
    instance_list = [all_info[name] for name in instance_names]
4897
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4898
                                        for inst in instance_list)))
4899
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4900
    bad_nodes = []
4901
    offline_nodes = []
4902
    wrongnode_inst = set()
4903

    
4904
    # Gather data as requested
4905
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4906
      live_data = {}
4907
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4908
      for name in nodes:
4909
        result = node_data[name]
4910
        if result.offline:
4911
          # offline nodes will be in both lists
4912
          assert result.fail_msg
4913
          offline_nodes.append(name)
4914
        if result.fail_msg:
4915
          bad_nodes.append(name)
4916
        elif result.payload:
4917
          for inst in result.payload:
4918
            if inst in all_info:
4919
              if all_info[inst].primary_node == name:
4920
                live_data.update(result.payload)
4921
              else:
4922
                wrongnode_inst.add(inst)
4923
            else:
4924
              # orphan instance; we don't list it here as we don't
4925
              # handle this case yet in the output of instance listing
4926
              logging.warning("Orphan instance '%s' found on node %s",
4927
                              inst, name)
4928
        # else no instance is alive
4929
    else:
4930
      live_data = {}
4931

    
4932
    if query.IQ_DISKUSAGE in self.requested_data:
4933
      disk_usage = dict((inst.name,
4934
                         _ComputeDiskSize(inst.disk_template,
4935
                                          [{constants.IDISK_SIZE: disk.size}
4936
                                           for disk in inst.disks]))
4937
                        for inst in instance_list)
4938
    else:
4939
      disk_usage = None
4940

    
4941
    if query.IQ_CONSOLE in self.requested_data:
4942
      consinfo = {}
4943
      for inst in instance_list:
4944
        if inst.name in live_data:
4945
          # Instance is running
4946
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4947
        else:
4948
          consinfo[inst.name] = None
4949
      assert set(consinfo.keys()) == set(instance_names)
4950
    else:
4951
      consinfo = None
4952

    
4953
    if query.IQ_NODES in self.requested_data:
4954
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4955
                                            instance_list)))
4956
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4957
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4958
                    for uuid in set(map(operator.attrgetter("group"),
4959
                                        nodes.values())))
4960
    else:
4961
      nodes = None
4962
      groups = None
4963

    
4964
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4965
                                   disk_usage, offline_nodes, bad_nodes,
4966
                                   live_data, wrongnode_inst, consinfo,
4967
                                   nodes, groups)
4968

    
4969

    
4970
class LUQuery(NoHooksLU):
4971
  """Query for resources/items of a certain kind.
4972

4973
  """
4974
  # pylint: disable=W0142
4975
  REQ_BGL = False
4976

    
4977
  def CheckArguments(self):
4978
    qcls = _GetQueryImplementation(self.op.what)
4979

    
4980
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4981

    
4982
  def ExpandNames(self):
4983
    self.impl.ExpandNames(self)
4984

    
4985
  def DeclareLocks(self, level):
4986
    self.impl.DeclareLocks(self, level)
4987

    
4988
  def Exec(self, feedback_fn):
4989
    return self.impl.NewStyleQuery(self)
4990

    
4991

    
4992
class LUQueryFields(NoHooksLU):
4993
  """Query for resources/items of a certain kind.
4994

4995
  """
4996
  # pylint: disable=W0142
4997
  REQ_BGL = False
4998

    
4999
  def CheckArguments(self):
5000
    self.qcls = _GetQueryImplementation(self.op.what)
5001

    
5002
  def ExpandNames(self):
5003
    self.needed_locks = {}
5004

    
5005
  def Exec(self, feedback_fn):
5006
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
5007

    
5008

    
5009
class LUNodeModifyStorage(NoHooksLU):
5010
  """Logical unit for modifying a storage volume on a node.
5011

5012
  """
5013
  REQ_BGL = False
5014

    
5015
  def CheckArguments(self):
5016
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5017

    
5018
    storage_type = self.op.storage_type
5019

    
5020
    try:
5021
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
5022
    except KeyError:
5023
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
5024
                                 " modified" % storage_type,
5025
                                 errors.ECODE_INVAL)
5026

    
5027
    diff = set(self.op.changes.keys()) - modifiable
5028
    if diff:
5029
      raise errors.OpPrereqError("The following fields can not be modified for"
5030
                                 " storage units of type '%s': %r" %
5031
                                 (storage_type, list(diff)),
5032
                                 errors.ECODE_INVAL)
5033

    
5034
  def ExpandNames(self):
5035
    self.needed_locks = {
5036
      locking.LEVEL_NODE: self.op.node_name,
5037
      }
5038

    
5039
  def Exec(self, feedback_fn):
5040
    """Computes the list of nodes and their attributes.
5041

5042
    """
5043
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5044
    result = self.rpc.call_storage_modify(self.op.node_name,
5045
                                          self.op.storage_type, st_args,
5046
                                          self.op.name, self.op.changes)
5047
    result.Raise("Failed to modify storage unit '%s' on %s" %
5048
                 (self.op.name, self.op.node_name))
5049

    
5050

    
5051
class LUNodeAdd(LogicalUnit):
5052
  """Logical unit for adding node to the cluster.
5053

5054
  """
5055
  HPATH = "node-add"
5056
  HTYPE = constants.HTYPE_NODE
5057
  _NFLAGS = ["master_capable", "vm_capable"]
5058

    
5059
  def CheckArguments(self):
5060
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5061
    # validate/normalize the node name
5062
    self.hostname = netutils.GetHostname(name=self.op.node_name,
5063
                                         family=self.primary_ip_family)
5064
    self.op.node_name = self.hostname.name
5065

    
5066
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5067
      raise errors.OpPrereqError("Cannot readd the master node",
5068
                                 errors.ECODE_STATE)
5069

    
5070
    if self.op.readd and self.op.group:
5071
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
5072
                                 " being readded", errors.ECODE_INVAL)
5073

    
5074
  def BuildHooksEnv(self):
5075
    """Build hooks env.
5076

5077
    This will run on all nodes before, and on all nodes + the new node after.
5078

5079
    """
5080
    return {
5081
      "OP_TARGET": self.op.node_name,
5082
      "NODE_NAME": self.op.node_name,
5083
      "NODE_PIP": self.op.primary_ip,
5084
      "NODE_SIP": self.op.secondary_ip,
5085
      "MASTER_CAPABLE": str(self.op.master_capable),
5086
      "VM_CAPABLE": str(self.op.vm_capable),
5087
      }
5088

    
5089
  def BuildHooksNodes(self):
5090
    """Build hooks nodes.
5091

5092
    """
5093
    # Exclude added node
5094
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5095
    post_nodes = pre_nodes + [self.op.node_name, ]
5096

    
5097
    return (pre_nodes, post_nodes)
5098

    
5099
  def CheckPrereq(self):
5100
    """Check prerequisites.
5101

5102
    This checks:
5103
     - the new node is not already in the config
5104
     - it is resolvable
5105
     - its parameters (single/dual homed) matches the cluster
5106

5107
    Any errors are signaled by raising errors.OpPrereqError.
5108

5109
    """
5110
    cfg = self.cfg
5111
    hostname = self.hostname
5112
    node = hostname.name
5113
    primary_ip = self.op.primary_ip = hostname.ip
5114
    if self.op.secondary_ip is None:
5115
      if self.primary_ip_family == netutils.IP6Address.family:
5116
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5117
                                   " IPv4 address must be given as secondary",
5118
                                   errors.ECODE_INVAL)
5119
      self.op.secondary_ip = primary_ip
5120

    
5121
    secondary_ip = self.op.secondary_ip
5122
    if not netutils.IP4Address.IsValid(secondary_ip):
5123
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5124
                                 " address" % secondary_ip, errors.ECODE_INVAL)
5125

    
5126
    node_list = cfg.GetNodeList()
5127
    if not self.op.readd and node in node_list:
5128
      raise errors.OpPrereqError("Node %s is already in the configuration" %
5129
                                 node, errors.ECODE_EXISTS)
5130
    elif self.op.readd and node not in node_list:
5131
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5132
                                 errors.ECODE_NOENT)
5133

    
5134
    self.changed_primary_ip = False
5135

    
5136
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5137
      if self.op.readd and node == existing_node_name:
5138
        if existing_node.secondary_ip != secondary_ip:
5139
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
5140
                                     " address configuration as before",
5141
                                     errors.ECODE_INVAL)
5142
        if existing_node.primary_ip != primary_ip:
5143
          self.changed_primary_ip = True
5144

    
5145
        continue
5146

    
5147
      if (existing_node.primary_ip == primary_ip or
5148
          existing_node.secondary_ip == primary_ip or
5149
          existing_node.primary_ip == secondary_ip or
5150
          existing_node.secondary_ip == secondary_ip):
5151
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5152
                                   " existing node %s" % existing_node.name,
5153
                                   errors.ECODE_NOTUNIQUE)
5154

    
5155
    # After this 'if' block, None is no longer a valid value for the
5156
    # _capable op attributes
5157
    if self.op.readd:
5158
      old_node = self.cfg.GetNodeInfo(node)
5159
      assert old_node is not None, "Can't retrieve locked node %s" % node
5160
      for attr in self._NFLAGS:
5161
        if getattr(self.op, attr) is None:
5162
          setattr(self.op, attr, getattr(old_node, attr))
5163
    else:
5164
      for attr in self._NFLAGS:
5165
        if getattr(self.op, attr) is None:
5166
          setattr(self.op, attr, True)
5167

    
5168
    if self.op.readd and not self.op.vm_capable:
5169
      pri, sec = cfg.GetNodeInstances(node)
5170
      if pri or sec:
5171
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5172
                                   " flag set to false, but it already holds"
5173
                                   " instances" % node,
5174
                                   errors.ECODE_STATE)
5175

    
5176
    # check that the type of the node (single versus dual homed) is the
5177
    # same as for the master
5178
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5179
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5180
    newbie_singlehomed = secondary_ip == primary_ip
5181
    if master_singlehomed != newbie_singlehomed:
5182
      if master_singlehomed:
5183
        raise errors.OpPrereqError("The master has no secondary ip but the"
5184
                                   " new node has one",
5185
                                   errors.ECODE_INVAL)
5186
      else:
5187
        raise errors.OpPrereqError("The master has a secondary ip but the"
5188
                                   " new node doesn't have one",
5189
                                   errors.ECODE_INVAL)
5190

    
5191
    # checks reachability
5192
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5193
      raise errors.OpPrereqError("Node not reachable by ping",
5194
                                 errors.ECODE_ENVIRON)
5195

    
5196
    if not newbie_singlehomed:
5197
      # check reachability from my secondary ip to newbie's secondary ip
5198
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5199
                           source=myself.secondary_ip):
5200
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5201
                                   " based ping to node daemon port",
5202
                                   errors.ECODE_ENVIRON)
5203

    
5204
    if self.op.readd:
5205
      exceptions = [node]
5206
    else:
5207
      exceptions = []
5208

    
5209
    if self.op.master_capable:
5210
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5211
    else:
5212
      self.master_candidate = False
5213

    
5214
    if self.op.readd:
5215
      self.new_node = old_node
5216
    else:
5217
      node_group = cfg.LookupNodeGroup(self.op.group)
5218
      self.new_node = objects.Node(name=node,
5219
                                   primary_ip=primary_ip,
5220
                                   secondary_ip=secondary_ip,
5221
                                   master_candidate=self.master_candidate,
5222
                                   offline=False, drained=False,
5223
                                   group=node_group)
5224

    
5225
    if self.op.ndparams:
5226
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5227

    
5228
  def Exec(self, feedback_fn):
5229
    """Adds the new node to the cluster.
5230

5231
    """
5232
    new_node = self.new_node
5233
    node = new_node.name
5234

    
5235
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5236
      "Not owning BGL"
5237

    
5238
    # We adding a new node so we assume it's powered
5239
    new_node.powered = True
5240

    
5241
    # for re-adds, reset the offline/drained/master-candidate flags;
5242
    # we need to reset here, otherwise offline would prevent RPC calls
5243
    # later in the procedure; this also means that if the re-add
5244
    # fails, we are left with a non-offlined, broken node
5245
    if self.op.readd:
5246
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5247
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5248
      # if we demote the node, we do cleanup later in the procedure
5249
      new_node.master_candidate = self.master_candidate
5250
      if self.changed_primary_ip:
5251
        new_node.primary_ip = self.op.primary_ip
5252

    
5253
    # copy the master/vm_capable flags
5254
    for attr in self._NFLAGS:
5255
      setattr(new_node, attr, getattr(self.op, attr))
5256

    
5257
    # notify the user about any possible mc promotion
5258
    if new_node.master_candidate:
5259
      self.LogInfo("Node will be a master candidate")
5260

    
5261
    if self.op.ndparams:
5262
      new_node.ndparams = self.op.ndparams
5263
    else:
5264
      new_node.ndparams = {}
5265

    
5266
    # check connectivity
5267
    result = self.rpc.call_version([node])[node]
5268
    result.Raise("Can't get version information from node %s" % node)
5269
    if constants.PROTOCOL_VERSION == result.payload:
5270
      logging.info("Communication to node %s fine, sw version %s match",
5271
                   node, result.payload)
5272
    else:
5273
      raise errors.OpExecError("Version mismatch master version %s,"
5274
                               " node version %s" %
5275
                               (constants.PROTOCOL_VERSION, result.payload))
5276

    
5277
    # Add node to our /etc/hosts, and add key to known_hosts
5278
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5279
      master_node = self.cfg.GetMasterNode()
5280
      result = self.rpc.call_etc_hosts_modify(master_node,
5281
                                              constants.ETC_HOSTS_ADD,
5282
                                              self.hostname.name,
5283
                                              self.hostname.ip)
5284
      result.Raise("Can't update hosts file with new host data")
5285

    
5286
    if new_node.secondary_ip != new_node.primary_ip:
5287
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5288
                               False)
5289

    
5290
    node_verify_list = [self.cfg.GetMasterNode()]
5291
    node_verify_param = {
5292
      constants.NV_NODELIST: ([node], {}),
5293
      # TODO: do a node-net-test as well?
5294
    }
5295

    
5296
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5297
                                       self.cfg.GetClusterName())
5298
    for verifier in node_verify_list:
5299
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5300
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5301
      if nl_payload:
5302
        for failed in nl_payload:
5303
          feedback_fn("ssh/hostname verification failed"
5304
                      " (checking from %s): %s" %
5305
                      (verifier, nl_payload[failed]))
5306
        raise errors.OpExecError("ssh/hostname verification failed")
5307

    
5308
    if self.op.readd:
5309
      _RedistributeAncillaryFiles(self)
5310
      self.context.ReaddNode(new_node)
5311
      # make sure we redistribute the config
5312
      self.cfg.Update(new_node, feedback_fn)
5313
      # and make sure the new node will not have old files around
5314
      if not new_node.master_candidate:
5315
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5316
        msg = result.fail_msg
5317
        if msg:
5318
          self.LogWarning("Node failed to demote itself from master"
5319
                          " candidate status: %s" % msg)
5320
    else:
5321
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5322
                                  additional_vm=self.op.vm_capable)
5323
      self.context.AddNode(new_node, self.proc.GetECId())
5324

    
5325

    
5326
class LUNodeSetParams(LogicalUnit):
5327
  """Modifies the parameters of a node.
5328

5329
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5330
      to the node role (as _ROLE_*)
5331
  @cvar _R2F: a dictionary from node role to tuples of flags
5332
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5333

5334
  """
5335
  HPATH = "node-modify"
5336
  HTYPE = constants.HTYPE_NODE
5337
  REQ_BGL = False
5338
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5339
  _F2R = {
5340
    (True, False, False): _ROLE_CANDIDATE,
5341
    (False, True, False): _ROLE_DRAINED,
5342
    (False, False, True): _ROLE_OFFLINE,
5343
    (False, False, False): _ROLE_REGULAR,
5344
    }
5345
  _R2F = dict((v, k) for k, v in _F2R.items())
5346
  _FLAGS = ["master_candidate", "drained", "offline"]
5347

    
5348
  def CheckArguments(self):
5349
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5350
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5351
                self.op.master_capable, self.op.vm_capable,
5352
                self.op.secondary_ip, self.op.ndparams]
5353
    if all_mods.count(None) == len(all_mods):
5354
      raise errors.OpPrereqError("Please pass at least one modification",
5355
                                 errors.ECODE_INVAL)
5356
    if all_mods.count(True) > 1:
5357
      raise errors.OpPrereqError("Can't set the node into more than one"
5358
                                 " state at the same time",
5359
                                 errors.ECODE_INVAL)
5360

    
5361
    # Boolean value that tells us whether we might be demoting from MC
5362
    self.might_demote = (self.op.master_candidate == False or
5363
                         self.op.offline == True or
5364
                         self.op.drained == True or
5365
                         self.op.master_capable == False)
5366

    
5367
    if self.op.secondary_ip:
5368
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5369
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5370
                                   " address" % self.op.secondary_ip,
5371
                                   errors.ECODE_INVAL)
5372

    
5373
    self.lock_all = self.op.auto_promote and self.might_demote
5374
    self.lock_instances = self.op.secondary_ip is not None
5375

    
5376
  def _InstanceFilter(self, instance):
5377
    """Filter for getting affected instances.
5378

5379
    """
5380
    return (instance.disk_template in constants.DTS_INT_MIRROR and
5381
            self.op.node_name in instance.all_nodes)
5382

    
5383
  def ExpandNames(self):
5384
    if self.lock_all:
5385
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5386
    else:
5387
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5388

    
5389
    # Since modifying a node can have severe effects on currently running
5390
    # operations the resource lock is at least acquired in shared mode
5391
    self.needed_locks[locking.LEVEL_NODE_RES] = \
5392
      self.needed_locks[locking.LEVEL_NODE]
5393

    
5394
    # Get node resource and instance locks in shared mode; they are not used
5395
    # for anything but read-only access
5396
    self.share_locks[locking.LEVEL_NODE_RES] = 1
5397
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5398

    
5399
    if self.lock_instances:
5400
      self.needed_locks[locking.LEVEL_INSTANCE] = \
5401
        frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5402

    
5403
  def BuildHooksEnv(self):
5404
    """Build hooks env.
5405

5406
    This runs on the master node.
5407

5408
    """
5409
    return {
5410
      "OP_TARGET": self.op.node_name,
5411
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5412
      "OFFLINE": str(self.op.offline),
5413
      "DRAINED": str(self.op.drained),
5414
      "MASTER_CAPABLE": str(self.op.master_capable),
5415
      "VM_CAPABLE": str(self.op.vm_capable),
5416
      }
5417

    
5418
  def BuildHooksNodes(self):
5419
    """Build hooks nodes.
5420

5421
    """
5422
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5423
    return (nl, nl)
5424

    
5425
  def CheckPrereq(self):
5426
    """Check prerequisites.
5427

5428
    This only checks the instance list against the existing names.
5429

5430
    """
5431
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5432

    
5433
    if self.lock_instances:
5434
      affected_instances = \
5435
        self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5436

    
5437
      # Verify instance locks
5438
      owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5439
      wanted_instances = frozenset(affected_instances.keys())
5440
      if wanted_instances - owned_instances:
5441
        raise errors.OpPrereqError("Instances affected by changing node %s's"
5442
                                   " secondary IP address have changed since"
5443
                                   " locks were acquired, wanted '%s', have"
5444
                                   " '%s'; retry the operation" %
5445
                                   (self.op.node_name,
5446
                                    utils.CommaJoin(wanted_instances),
5447
                                    utils.CommaJoin(owned_instances)),
5448
                                   errors.ECODE_STATE)
5449
    else:
5450
      affected_instances = None
5451

    
5452
    if (self.op.master_candidate is not None or
5453
        self.op.drained is not None or
5454
        self.op.offline is not None):
5455
      # we can't change the master's node flags
5456
      if self.op.node_name == self.cfg.GetMasterNode():
5457
        raise errors.OpPrereqError("The master role can be changed"
5458
                                   " only via master-failover",
5459
                                   errors.ECODE_INVAL)
5460

    
5461
    if self.op.master_candidate and not node.master_capable:
5462
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5463
                                 " it a master candidate" % node.name,
5464
                                 errors.ECODE_STATE)
5465

    
5466
    if self.op.vm_capable == False:
5467
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5468
      if ipri or isec:
5469
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5470
                                   " the vm_capable flag" % node.name,
5471
                                   errors.ECODE_STATE)
5472

    
5473
    if node.master_candidate and self.might_demote and not self.lock_all:
5474
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5475
      # check if after removing the current node, we're missing master
5476
      # candidates
5477
      (mc_remaining, mc_should, _) = \
5478
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5479
      if mc_remaining < mc_should:
5480
        raise errors.OpPrereqError("Not enough master candidates, please"
5481
                                   " pass auto promote option to allow"
5482
                                   " promotion", errors.ECODE_STATE)
5483

    
5484
    self.old_flags = old_flags = (node.master_candidate,
5485
                                  node.drained, node.offline)
5486
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5487
    self.old_role = old_role = self._F2R[old_flags]
5488

    
5489
    # Check for ineffective changes
5490
    for attr in self._FLAGS:
5491
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5492
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5493
        setattr(self.op, attr, None)
5494

    
5495
    # Past this point, any flag change to False means a transition
5496
    # away from the respective state, as only real changes are kept
5497

    
5498
    # TODO: We might query the real power state if it supports OOB
5499
    if _SupportsOob(self.cfg, node):
5500
      if self.op.offline is False and not (node.powered or
5501
                                           self.op.powered == True):
5502
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5503
                                    " offline status can be reset") %
5504
                                   self.op.node_name)
5505
    elif self.op.powered is not None:
5506
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5507
                                  " as it does not support out-of-band"
5508
                                  " handling") % self.op.node_name)
5509

    
5510
    # If we're being deofflined/drained, we'll MC ourself if needed
5511
    if (self.op.drained == False or self.op.offline == False or
5512
        (self.op.master_capable and not node.master_capable)):
5513
      if _DecideSelfPromotion(self):
5514
        self.op.master_candidate = True
5515
        self.LogInfo("Auto-promoting node to master candidate")
5516

    
5517
    # If we're no longer master capable, we'll demote ourselves from MC
5518
    if self.op.master_capable == False and node.master_candidate:
5519
      self.LogInfo("Demoting from master candidate")
5520
      self.op.master_candidate = False
5521

    
5522
    # Compute new role
5523
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5524
    if self.op.master_candidate:
5525
      new_role = self._ROLE_CANDIDATE
5526
    elif self.op.drained:
5527
      new_role = self._ROLE_DRAINED
5528
    elif self.op.offline:
5529
      new_role = self._ROLE_OFFLINE
5530
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5531
      # False is still in new flags, which means we're un-setting (the
5532
      # only) True flag
5533
      new_role = self._ROLE_REGULAR
5534
    else: # no new flags, nothing, keep old role
5535
      new_role = old_role
5536

    
5537
    self.new_role = new_role
5538

    
5539
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5540
      # Trying to transition out of offline status
5541
      # TODO: Use standard RPC runner, but make sure it works when the node is
5542
      # still marked offline
5543
      result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5544
      if result.fail_msg:
5545
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5546
                                   " to report its version: %s" %
5547
                                   (node.name, result.fail_msg),
5548
                                   errors.ECODE_STATE)
5549
      else:
5550
        self.LogWarning("Transitioning node from offline to online state"
5551
                        " without using re-add. Please make sure the node"
5552
                        " is healthy!")
5553

    
5554
    if self.op.secondary_ip:
5555
      # Ok even without locking, because this can't be changed by any LU
5556
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5557
      master_singlehomed = master.secondary_ip == master.primary_ip
5558
      if master_singlehomed and self.op.secondary_ip:
5559
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5560
                                   " homed cluster", errors.ECODE_INVAL)
5561

    
5562
      assert not (frozenset(affected_instances) -
5563
                  self.owned_locks(locking.LEVEL_INSTANCE))
5564

    
5565
      if node.offline:
5566
        if affected_instances:
5567
          raise errors.OpPrereqError("Cannot change secondary IP address:"
5568
                                     " offline node has instances (%s)"
5569
                                     " configured to use it" %
5570
                                     utils.CommaJoin(affected_instances.keys()))
5571
      else:
5572
        # On online nodes, check that no instances are running, and that
5573
        # the node has the new ip and we can reach it.
5574
        for instance in affected_instances.values():
5575
          _CheckInstanceState(self, instance, INSTANCE_DOWN,
5576
                              msg="cannot change secondary ip")
5577

    
5578
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5579
        if master.name != node.name:
5580
          # check reachability from master secondary ip to new secondary ip
5581
          if not netutils.TcpPing(self.op.secondary_ip,
5582
                                  constants.DEFAULT_NODED_PORT,
5583
                                  source=master.secondary_ip):
5584
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5585
                                       " based ping to node daemon port",
5586
                                       errors.ECODE_ENVIRON)
5587

    
5588
    if self.op.ndparams:
5589
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5590
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5591
      self.new_ndparams = new_ndparams
5592

    
5593
  def Exec(self, feedback_fn):
5594
    """Modifies a node.
5595

5596
    """
5597
    node = self.node
5598
    old_role = self.old_role
5599
    new_role = self.new_role
5600

    
5601
    result = []
5602

    
5603
    if self.op.ndparams:
5604
      node.ndparams = self.new_ndparams
5605

    
5606
    if self.op.powered is not None:
5607
      node.powered = self.op.powered
5608

    
5609
    for attr in ["master_capable", "vm_capable"]:
5610
      val = getattr(self.op, attr)
5611
      if val is not None:
5612
        setattr(node, attr, val)
5613
        result.append((attr, str(val)))
5614

    
5615
    if new_role != old_role:
5616
      # Tell the node to demote itself, if no longer MC and not offline
5617
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5618
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5619
        if msg:
5620
          self.LogWarning("Node failed to demote itself: %s", msg)
5621

    
5622
      new_flags = self._R2F[new_role]
5623
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5624
        if of != nf:
5625
          result.append((desc, str(nf)))
5626
      (node.master_candidate, node.drained, node.offline) = new_flags
5627

    
5628
      # we locked all nodes, we adjust the CP before updating this node
5629
      if self.lock_all:
5630
        _AdjustCandidatePool(self, [node.name])
5631

    
5632
    if self.op.secondary_ip:
5633
      node.secondary_ip = self.op.secondary_ip
5634
      result.append(("secondary_ip", self.op.secondary_ip))
5635

    
5636
    # this will trigger configuration file update, if needed
5637
    self.cfg.Update(node, feedback_fn)
5638

    
5639
    # this will trigger job queue propagation or cleanup if the mc
5640
    # flag changed
5641
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5642
      self.context.ReaddNode(node)
5643

    
5644
    return result
5645

    
5646

    
5647
class LUNodePowercycle(NoHooksLU):
5648
  """Powercycles a node.
5649

5650
  """
5651
  REQ_BGL = False
5652

    
5653
  def CheckArguments(self):
5654
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5655
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5656
      raise errors.OpPrereqError("The node is the master and the force"
5657
                                 " parameter was not set",
5658
                                 errors.ECODE_INVAL)
5659

    
5660
  def ExpandNames(self):
5661
    """Locking for PowercycleNode.
5662

5663
    This is a last-resort option and shouldn't block on other
5664
    jobs. Therefore, we grab no locks.
5665

5666
    """
5667
    self.needed_locks = {}
5668

    
5669
  def Exec(self, feedback_fn):
5670
    """Reboots a node.
5671

5672
    """
5673
    result = self.rpc.call_node_powercycle(self.op.node_name,
5674
                                           self.cfg.GetHypervisorType())
5675
    result.Raise("Failed to schedule the reboot")
5676
    return result.payload
5677

    
5678

    
5679
class LUClusterQuery(NoHooksLU):
5680
  """Query cluster configuration.
5681

5682
  """
5683
  REQ_BGL = False
5684

    
5685
  def ExpandNames(self):
5686
    self.needed_locks = {}
5687

    
5688
  def Exec(self, feedback_fn):
5689
    """Return cluster config.
5690

5691
    """
5692
    cluster = self.cfg.GetClusterInfo()
5693
    os_hvp = {}
5694

    
5695
    # Filter just for enabled hypervisors
5696
    for os_name, hv_dict in cluster.os_hvp.items():
5697
      os_hvp[os_name] = {}
5698
      for hv_name, hv_params in hv_dict.items():
5699
        if hv_name in cluster.enabled_hypervisors:
5700
          os_hvp[os_name][hv_name] = hv_params
5701

    
5702
    # Convert ip_family to ip_version
5703
    primary_ip_version = constants.IP4_VERSION
5704
    if cluster.primary_ip_family == netutils.IP6Address.family:
5705
      primary_ip_version = constants.IP6_VERSION
5706

    
5707
    result = {
5708
      "software_version": constants.RELEASE_VERSION,
5709
      "protocol_version": constants.PROTOCOL_VERSION,
5710
      "config_version": constants.CONFIG_VERSION,
5711
      "os_api_version": max(constants.OS_API_VERSIONS),
5712
      "export_version": constants.EXPORT_VERSION,
5713
      "architecture": (platform.architecture()[0], platform.machine()),
5714
      "name": cluster.cluster_name,
5715
      "master": cluster.master_node,
5716
      "default_hypervisor": cluster.enabled_hypervisors[0],
5717
      "enabled_hypervisors": cluster.enabled_hypervisors,
5718
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5719
                        for hypervisor_name in cluster.enabled_hypervisors]),
5720
      "os_hvp": os_hvp,
5721
      "beparams": cluster.beparams,
5722
      "osparams": cluster.osparams,
5723
      "nicparams": cluster.nicparams,
5724
      "ndparams": cluster.ndparams,
5725
      "candidate_pool_size": cluster.candidate_pool_size,
5726
      "master_netdev": cluster.master_netdev,
5727
      "master_netmask": cluster.master_netmask,
5728
      "use_external_mip_script": cluster.use_external_mip_script,
5729
      "volume_group_name": cluster.volume_group_name,
5730
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5731
      "file_storage_dir": cluster.file_storage_dir,
5732
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5733
      "maintain_node_health": cluster.maintain_node_health,
5734
      "ctime": cluster.ctime,
5735
      "mtime": cluster.mtime,
5736
      "uuid": cluster.uuid,
5737
      "tags": list(cluster.GetTags()),
5738
      "uid_pool": cluster.uid_pool,
5739
      "default_iallocator": cluster.default_iallocator,
5740
      "reserved_lvs": cluster.reserved_lvs,
5741
      "primary_ip_version": primary_ip_version,
5742
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5743
      "hidden_os": cluster.hidden_os,
5744
      "blacklisted_os": cluster.blacklisted_os,
5745
      }
5746

    
5747
    return result
5748

    
5749

    
5750
class LUClusterConfigQuery(NoHooksLU):
5751
  """Return configuration values.
5752

5753
  """
5754
  REQ_BGL = False
5755
  _FIELDS_DYNAMIC = utils.FieldSet()
5756
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5757
                                  "watcher_pause", "volume_group_name")
5758

    
5759
  def CheckArguments(self):
5760
    _CheckOutputFields(static=self._FIELDS_STATIC,
5761
                       dynamic=self._FIELDS_DYNAMIC,
5762
                       selected=self.op.output_fields)
5763

    
5764
  def ExpandNames(self):
5765
    self.needed_locks = {}
5766

    
5767
  def Exec(self, feedback_fn):
5768
    """Dump a representation of the cluster config to the standard output.
5769

5770
    """
5771
    values = []
5772
    for field in self.op.output_fields:
5773
      if field == "cluster_name":
5774
        entry = self.cfg.GetClusterName()
5775
      elif field == "master_node":
5776
        entry = self.cfg.GetMasterNode()
5777
      elif field == "drain_flag":
5778
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5779
      elif field == "watcher_pause":
5780
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5781
      elif field == "volume_group_name":
5782
        entry = self.cfg.GetVGName()
5783
      else:
5784
        raise errors.ParameterError(field)
5785
      values.append(entry)
5786
    return values
5787

    
5788

    
5789
class LUInstanceActivateDisks(NoHooksLU):
5790
  """Bring up an instance's disks.
5791

5792
  """
5793
  REQ_BGL = False
5794

    
5795
  def ExpandNames(self):
5796
    self._ExpandAndLockInstance()
5797
    self.needed_locks[locking.LEVEL_NODE] = []
5798
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5799

    
5800
  def DeclareLocks(self, level):
5801
    if level == locking.LEVEL_NODE:
5802
      self._LockInstancesNodes()
5803

    
5804
  def CheckPrereq(self):
5805
    """Check prerequisites.
5806

5807
    This checks that the instance is in the cluster.
5808

5809
    """
5810
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5811
    assert self.instance is not None, \
5812
      "Cannot retrieve locked instance %s" % self.op.instance_name
5813
    _CheckNodeOnline(self, self.instance.primary_node)
5814

    
5815
  def Exec(self, feedback_fn):
5816
    """Activate the disks.
5817

5818
    """
5819
    disks_ok, disks_info = \
5820
              _AssembleInstanceDisks(self, self.instance,
5821
                                     ignore_size=self.op.ignore_size)
5822
    if not disks_ok:
5823
      raise errors.OpExecError("Cannot activate block devices")
5824

    
5825
    return disks_info
5826

    
5827

    
5828
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5829
                           ignore_size=False):
5830
  """Prepare the block devices for an instance.
5831

5832
  This sets up the block devices on all nodes.
5833

5834
  @type lu: L{LogicalUnit}
5835
  @param lu: the logical unit on whose behalf we execute
5836
  @type instance: L{objects.Instance}
5837
  @param instance: the instance for whose disks we assemble
5838
  @type disks: list of L{objects.Disk} or None
5839
  @param disks: which disks to assemble (or all, if None)
5840
  @type ignore_secondaries: boolean
5841
  @param ignore_secondaries: if true, errors on secondary nodes
5842
      won't result in an error return from the function
5843
  @type ignore_size: boolean
5844
  @param ignore_size: if true, the current known size of the disk
5845
      will not be used during the disk activation, useful for cases
5846
      when the size is wrong
5847
  @return: False if the operation failed, otherwise a list of
5848
      (host, instance_visible_name, node_visible_name)
5849
      with the mapping from node devices to instance devices
5850

5851
  """
5852
  device_info = []
5853
  disks_ok = True
5854
  iname = instance.name
5855
  disks = _ExpandCheckDisks(instance, disks)
5856

    
5857
  # With the two passes mechanism we try to reduce the window of
5858
  # opportunity for the race condition of switching DRBD to primary
5859
  # before handshaking occured, but we do not eliminate it
5860

    
5861
  # The proper fix would be to wait (with some limits) until the
5862
  # connection has been made and drbd transitions from WFConnection
5863
  # into any other network-connected state (Connected, SyncTarget,
5864
  # SyncSource, etc.)
5865

    
5866
  # 1st pass, assemble on all nodes in secondary mode
5867
  for idx, inst_disk in enumerate(disks):
5868
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5869
      if ignore_size:
5870
        node_disk = node_disk.Copy()
5871
        node_disk.UnsetSize()
5872
      lu.cfg.SetDiskID(node_disk, node)
5873
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5874
      msg = result.fail_msg
5875
      if msg:
5876
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5877
                           " (is_primary=False, pass=1): %s",
5878
                           inst_disk.iv_name, node, msg)
5879
        if not ignore_secondaries:
5880
          disks_ok = False
5881

    
5882
  # FIXME: race condition on drbd migration to primary
5883

    
5884
  # 2nd pass, do only the primary node
5885
  for idx, inst_disk in enumerate(disks):
5886
    dev_path = None
5887

    
5888
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5889
      if node != instance.primary_node:
5890
        continue
5891
      if ignore_size:
5892
        node_disk = node_disk.Copy()
5893
        node_disk.UnsetSize()
5894
      lu.cfg.SetDiskID(node_disk, node)
5895
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5896
      msg = result.fail_msg
5897
      if msg:
5898
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5899
                           " (is_primary=True, pass=2): %s",
5900
                           inst_disk.iv_name, node, msg)
5901
        disks_ok = False
5902
      else:
5903
        dev_path = result.payload
5904

    
5905
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5906

    
5907
  # leave the disks configured for the primary node
5908
  # this is a workaround that would be fixed better by
5909
  # improving the logical/physical id handling
5910
  for disk in disks:
5911
    lu.cfg.SetDiskID(disk, instance.primary_node)
5912

    
5913
  return disks_ok, device_info
5914

    
5915

    
5916
def _StartInstanceDisks(lu, instance, force):
5917
  """Start the disks of an instance.
5918

5919
  """
5920
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5921
                                           ignore_secondaries=force)
5922
  if not disks_ok:
5923
    _ShutdownInstanceDisks(lu, instance)
5924
    if force is not None and not force:
5925
      lu.proc.LogWarning("", hint="If the message above refers to a"
5926
                         " secondary node,"
5927
                         " you can retry the operation using '--force'.")
5928
    raise errors.OpExecError("Disk consistency error")
5929

    
5930

    
5931
class LUInstanceDeactivateDisks(NoHooksLU):
5932
  """Shutdown an instance's disks.
5933

5934
  """
5935
  REQ_BGL = False
5936

    
5937
  def ExpandNames(self):
5938
    self._ExpandAndLockInstance()
5939
    self.needed_locks[locking.LEVEL_NODE] = []
5940
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5941

    
5942
  def DeclareLocks(self, level):
5943
    if level == locking.LEVEL_NODE:
5944
      self._LockInstancesNodes()
5945

    
5946
  def CheckPrereq(self):
5947
    """Check prerequisites.
5948

5949
    This checks that the instance is in the cluster.
5950

5951
    """
5952
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5953
    assert self.instance is not None, \
5954
      "Cannot retrieve locked instance %s" % self.op.instance_name
5955

    
5956
  def Exec(self, feedback_fn):
5957
    """Deactivate the disks
5958

5959
    """
5960
    instance = self.instance
5961
    if self.op.force:
5962
      _ShutdownInstanceDisks(self, instance)
5963
    else:
5964
      _SafeShutdownInstanceDisks(self, instance)
5965

    
5966

    
5967
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5968
  """Shutdown block devices of an instance.
5969

5970
  This function checks if an instance is running, before calling
5971
  _ShutdownInstanceDisks.
5972

5973
  """
5974
  _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
5975
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5976

    
5977

    
5978
def _ExpandCheckDisks(instance, disks):
5979
  """Return the instance disks selected by the disks list
5980

5981
  @type disks: list of L{objects.Disk} or None
5982
  @param disks: selected disks
5983
  @rtype: list of L{objects.Disk}
5984
  @return: selected instance disks to act on
5985

5986
  """
5987
  if disks is None:
5988
    return instance.disks
5989
  else:
5990
    if not set(disks).issubset(instance.disks):
5991
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5992
                                   " target instance")
5993
    return disks
5994

    
5995

    
5996
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5997
  """Shutdown block devices of an instance.
5998

5999
  This does the shutdown on all nodes of the instance.
6000

6001
  If the ignore_primary is false, errors on the primary node are
6002
  ignored.
6003

6004
  """
6005
  all_result = True
6006
  disks = _ExpandCheckDisks(instance, disks)
6007

    
6008
  for disk in disks:
6009
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
6010
      lu.cfg.SetDiskID(top_disk, node)
6011
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
6012
      msg = result.fail_msg
6013
      if msg:
6014
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
6015
                      disk.iv_name, node, msg)
6016
        if ((node == instance.primary_node and not ignore_primary) or
6017
            (node != instance.primary_node and not result.offline)):
6018
          all_result = False
6019
  return all_result
6020

    
6021

    
6022
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
6023
  """Checks if a node has enough free memory.
6024

6025
  This function check if a given node has the needed amount of free
6026
  memory. In case the node has less memory or we cannot get the
6027
  information from the node, this function raise an OpPrereqError
6028
  exception.
6029

6030
  @type lu: C{LogicalUnit}
6031
  @param lu: a logical unit from which we get configuration data
6032
  @type node: C{str}
6033
  @param node: the node to check
6034
  @type reason: C{str}
6035
  @param reason: string to use in the error message
6036
  @type requested: C{int}
6037
  @param requested: the amount of memory in MiB to check for
6038
  @type hypervisor_name: C{str}
6039
  @param hypervisor_name: the hypervisor to ask for memory stats
6040
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6041
      we cannot check the node
6042

6043
  """
6044
  nodeinfo = lu.rpc.call_node_info([node], None, [hypervisor_name])
6045
  nodeinfo[node].Raise("Can't get data from node %s" % node,
6046
                       prereq=True, ecode=errors.ECODE_ENVIRON)
6047
  (_, _, (hv_info, )) = nodeinfo[node].payload
6048

    
6049
  free_mem = hv_info.get("memory_free", None)
6050
  if not isinstance(free_mem, int):
6051
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6052
                               " was '%s'" % (node, free_mem),
6053
                               errors.ECODE_ENVIRON)
6054
  if requested > free_mem:
6055
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6056
                               " needed %s MiB, available %s MiB" %
6057
                               (node, reason, requested, free_mem),
6058
                               errors.ECODE_NORES)
6059

    
6060

    
6061
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6062
  """Checks if nodes have enough free disk space in the all VGs.
6063

6064
  This function check if all given nodes have the needed amount of
6065
  free disk. In case any node has less disk or we cannot get the
6066
  information from the node, this function raise an OpPrereqError
6067
  exception.
6068

6069
  @type lu: C{LogicalUnit}
6070
  @param lu: a logical unit from which we get configuration data
6071
  @type nodenames: C{list}
6072
  @param nodenames: the list of node names to check
6073
  @type req_sizes: C{dict}
6074
  @param req_sizes: the hash of vg and corresponding amount of disk in
6075
      MiB to check for
6076
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6077
      or we cannot check the node
6078

6079
  """
6080
  for vg, req_size in req_sizes.items():
6081
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6082

    
6083

    
6084
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6085
  """Checks if nodes have enough free disk space in the specified VG.
6086

6087
  This function check if all given nodes have the needed amount of
6088
  free disk. In case any node has less disk or we cannot get the
6089
  information from the node, this function raise an OpPrereqError
6090
  exception.
6091

6092
  @type lu: C{LogicalUnit}
6093
  @param lu: a logical unit from which we get configuration data
6094
  @type nodenames: C{list}
6095
  @param nodenames: the list of node names to check
6096
  @type vg: C{str}
6097
  @param vg: the volume group to check
6098
  @type requested: C{int}
6099
  @param requested: the amount of disk in MiB to check for
6100
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6101
      or we cannot check the node
6102

6103
  """
6104
  nodeinfo = lu.rpc.call_node_info(nodenames, [vg], None)
6105
  for node in nodenames:
6106
    info = nodeinfo[node]
6107
    info.Raise("Cannot get current information from node %s" % node,
6108
               prereq=True, ecode=errors.ECODE_ENVIRON)
6109
    (_, (vg_info, ), _) = info.payload
6110
    vg_free = vg_info.get("vg_free", None)
6111
    if not isinstance(vg_free, int):
6112
      raise errors.OpPrereqError("Can't compute free disk space on node"
6113
                                 " %s for vg %s, result was '%s'" %
6114
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
6115
    if requested > vg_free:
6116
      raise errors.OpPrereqError("Not enough disk space on target node %s"
6117
                                 " vg %s: required %d MiB, available %d MiB" %
6118
                                 (node, vg, requested, vg_free),
6119
                                 errors.ECODE_NORES)
6120

    
6121

    
6122
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6123
  """Checks if nodes have enough physical CPUs
6124

6125
  This function checks if all given nodes have the needed number of
6126
  physical CPUs. In case any node has less CPUs or we cannot get the
6127
  information from the node, this function raises an OpPrereqError
6128
  exception.
6129

6130
  @type lu: C{LogicalUnit}
6131
  @param lu: a logical unit from which we get configuration data
6132
  @type nodenames: C{list}
6133
  @param nodenames: the list of node names to check
6134
  @type requested: C{int}
6135
  @param requested: the minimum acceptable number of physical CPUs
6136
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6137
      or we cannot check the node
6138

6139
  """
6140
  nodeinfo = lu.rpc.call_node_info(nodenames, None, [hypervisor_name])
6141
  for node in nodenames:
6142
    info = nodeinfo[node]
6143
    info.Raise("Cannot get current information from node %s" % node,
6144
               prereq=True, ecode=errors.ECODE_ENVIRON)
6145
    (_, _, (hv_info, )) = info.payload
6146
    num_cpus = hv_info.get("cpu_total", None)
6147
    if not isinstance(num_cpus, int):
6148
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6149
                                 " on node %s, result was '%s'" %
6150
                                 (node, num_cpus), errors.ECODE_ENVIRON)
6151
    if requested > num_cpus:
6152
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6153
                                 "required" % (node, num_cpus, requested),
6154
                                 errors.ECODE_NORES)
6155

    
6156

    
6157
class LUInstanceStartup(LogicalUnit):
6158
  """Starts an instance.
6159

6160
  """
6161
  HPATH = "instance-start"
6162
  HTYPE = constants.HTYPE_INSTANCE
6163
  REQ_BGL = False
6164

    
6165
  def CheckArguments(self):
6166
    # extra beparams
6167
    if self.op.beparams:
6168
      # fill the beparams dict
6169
      objects.UpgradeBeParams(self.op.beparams)
6170
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6171

    
6172
  def ExpandNames(self):
6173
    self._ExpandAndLockInstance()
6174

    
6175
  def BuildHooksEnv(self):
6176
    """Build hooks env.
6177

6178
    This runs on master, primary and secondary nodes of the instance.
6179

6180
    """
6181
    env = {
6182
      "FORCE": self.op.force,
6183
      }
6184

    
6185
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6186

    
6187
    return env
6188

    
6189
  def BuildHooksNodes(self):
6190
    """Build hooks nodes.
6191

6192
    """
6193
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6194
    return (nl, nl)
6195

    
6196
  def CheckPrereq(self):
6197
    """Check prerequisites.
6198

6199
    This checks that the instance is in the cluster.
6200

6201
    """
6202
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6203
    assert self.instance is not None, \
6204
      "Cannot retrieve locked instance %s" % self.op.instance_name
6205

    
6206
    # extra hvparams
6207
    if self.op.hvparams:
6208
      # check hypervisor parameter syntax (locally)
6209
      cluster = self.cfg.GetClusterInfo()
6210
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6211
      filled_hvp = cluster.FillHV(instance)
6212
      filled_hvp.update(self.op.hvparams)
6213
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6214
      hv_type.CheckParameterSyntax(filled_hvp)
6215
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6216

    
6217
    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6218

    
6219
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6220

    
6221
    if self.primary_offline and self.op.ignore_offline_nodes:
6222
      self.proc.LogWarning("Ignoring offline primary node")
6223

    
6224
      if self.op.hvparams or self.op.beparams:
6225
        self.proc.LogWarning("Overridden parameters are ignored")
6226
    else:
6227
      _CheckNodeOnline(self, instance.primary_node)
6228

    
6229
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6230

    
6231
      # check bridges existence
6232
      _CheckInstanceBridgesExist(self, instance)
6233

    
6234
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6235
                                                instance.name,
6236
                                                instance.hypervisor)
6237
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6238
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6239
      if not remote_info.payload: # not running already
6240
        _CheckNodeFreeMemory(self, instance.primary_node,
6241
                             "starting instance %s" % instance.name,
6242
                             bep[constants.BE_MAXMEM], instance.hypervisor)
6243

    
6244
  def Exec(self, feedback_fn):
6245
    """Start the instance.
6246

6247
    """
6248
    instance = self.instance
6249
    force = self.op.force
6250

    
6251
    if not self.op.no_remember:
6252
      self.cfg.MarkInstanceUp(instance.name)
6253

    
6254
    if self.primary_offline:
6255
      assert self.op.ignore_offline_nodes
6256
      self.proc.LogInfo("Primary node offline, marked instance as started")
6257
    else:
6258
      node_current = instance.primary_node
6259

    
6260
      _StartInstanceDisks(self, instance, force)
6261

    
6262
      result = \
6263
        self.rpc.call_instance_start(node_current,
6264
                                     (instance, self.op.hvparams,
6265
                                      self.op.beparams),
6266
                                     self.op.startup_paused)
6267
      msg = result.fail_msg
6268
      if msg:
6269
        _ShutdownInstanceDisks(self, instance)
6270
        raise errors.OpExecError("Could not start instance: %s" % msg)
6271

    
6272

    
6273
class LUInstanceReboot(LogicalUnit):
6274
  """Reboot an instance.
6275

6276
  """
6277
  HPATH = "instance-reboot"
6278
  HTYPE = constants.HTYPE_INSTANCE
6279
  REQ_BGL = False
6280

    
6281
  def ExpandNames(self):
6282
    self._ExpandAndLockInstance()
6283

    
6284
  def BuildHooksEnv(self):
6285
    """Build hooks env.
6286

6287
    This runs on master, primary and secondary nodes of the instance.
6288

6289
    """
6290
    env = {
6291
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6292
      "REBOOT_TYPE": self.op.reboot_type,
6293
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6294
      }
6295

    
6296
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6297

    
6298
    return env
6299

    
6300
  def BuildHooksNodes(self):
6301
    """Build hooks nodes.
6302

6303
    """
6304
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6305
    return (nl, nl)
6306

    
6307
  def CheckPrereq(self):
6308
    """Check prerequisites.
6309

6310
    This checks that the instance is in the cluster.
6311

6312
    """
6313
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6314
    assert self.instance is not None, \
6315
      "Cannot retrieve locked instance %s" % self.op.instance_name
6316
    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6317
    _CheckNodeOnline(self, instance.primary_node)
6318

    
6319
    # check bridges existence
6320
    _CheckInstanceBridgesExist(self, instance)
6321

    
6322
  def Exec(self, feedback_fn):
6323
    """Reboot the instance.
6324

6325
    """
6326
    instance = self.instance
6327
    ignore_secondaries = self.op.ignore_secondaries
6328
    reboot_type = self.op.reboot_type
6329

    
6330
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6331
                                              instance.name,
6332
                                              instance.hypervisor)
6333
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6334
    instance_running = bool(remote_info.payload)
6335

    
6336
    node_current = instance.primary_node
6337

    
6338
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6339
                                            constants.INSTANCE_REBOOT_HARD]:
6340
      for disk in instance.disks:
6341
        self.cfg.SetDiskID(disk, node_current)
6342
      result = self.rpc.call_instance_reboot(node_current, instance,
6343
                                             reboot_type,
6344
                                             self.op.shutdown_timeout)
6345
      result.Raise("Could not reboot instance")
6346
    else:
6347
      if instance_running:
6348
        result = self.rpc.call_instance_shutdown(node_current, instance,
6349
                                                 self.op.shutdown_timeout)
6350
        result.Raise("Could not shutdown instance for full reboot")
6351
        _ShutdownInstanceDisks(self, instance)
6352
      else:
6353
        self.LogInfo("Instance %s was already stopped, starting now",
6354
                     instance.name)
6355
      _StartInstanceDisks(self, instance, ignore_secondaries)
6356
      result = self.rpc.call_instance_start(node_current,
6357
                                            (instance, None, None), False)
6358
      msg = result.fail_msg
6359
      if msg:
6360
        _ShutdownInstanceDisks(self, instance)
6361
        raise errors.OpExecError("Could not start instance for"
6362
                                 " full reboot: %s" % msg)
6363

    
6364
    self.cfg.MarkInstanceUp(instance.name)
6365

    
6366

    
6367
class LUInstanceShutdown(LogicalUnit):
6368
  """Shutdown an instance.
6369

6370
  """
6371
  HPATH = "instance-stop"
6372
  HTYPE = constants.HTYPE_INSTANCE
6373
  REQ_BGL = False
6374

    
6375
  def ExpandNames(self):
6376
    self._ExpandAndLockInstance()
6377

    
6378
  def BuildHooksEnv(self):
6379
    """Build hooks env.
6380

6381
    This runs on master, primary and secondary nodes of the instance.
6382

6383
    """
6384
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6385
    env["TIMEOUT"] = self.op.timeout
6386
    return env
6387

    
6388
  def BuildHooksNodes(self):
6389
    """Build hooks nodes.
6390

6391
    """
6392
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6393
    return (nl, nl)
6394

    
6395
  def CheckPrereq(self):
6396
    """Check prerequisites.
6397

6398
    This checks that the instance is in the cluster.
6399

6400
    """
6401
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6402
    assert self.instance is not None, \
6403
      "Cannot retrieve locked instance %s" % self.op.instance_name
6404

    
6405
    _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6406

    
6407
    self.primary_offline = \
6408
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6409

    
6410
    if self.primary_offline and self.op.ignore_offline_nodes:
6411
      self.proc.LogWarning("Ignoring offline primary node")
6412
    else:
6413
      _CheckNodeOnline(self, self.instance.primary_node)
6414

    
6415
  def Exec(self, feedback_fn):
6416
    """Shutdown the instance.
6417

6418
    """
6419
    instance = self.instance
6420
    node_current = instance.primary_node
6421
    timeout = self.op.timeout
6422

    
6423
    if not self.op.no_remember:
6424
      self.cfg.MarkInstanceDown(instance.name)
6425

    
6426
    if self.primary_offline:
6427
      assert self.op.ignore_offline_nodes
6428
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6429
    else:
6430
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6431
      msg = result.fail_msg
6432
      if msg:
6433
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6434

    
6435
      _ShutdownInstanceDisks(self, instance)
6436

    
6437

    
6438
class LUInstanceReinstall(LogicalUnit):
6439
  """Reinstall an instance.
6440

6441
  """
6442
  HPATH = "instance-reinstall"
6443
  HTYPE = constants.HTYPE_INSTANCE
6444
  REQ_BGL = False
6445

    
6446
  def ExpandNames(self):
6447
    self._ExpandAndLockInstance()
6448

    
6449
  def BuildHooksEnv(self):
6450
    """Build hooks env.
6451

6452
    This runs on master, primary and secondary nodes of the instance.
6453

6454
    """
6455
    return _BuildInstanceHookEnvByObject(self, self.instance)
6456

    
6457
  def BuildHooksNodes(self):
6458
    """Build hooks nodes.
6459

6460
    """
6461
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6462
    return (nl, nl)
6463

    
6464
  def CheckPrereq(self):
6465
    """Check prerequisites.
6466

6467
    This checks that the instance is in the cluster and is not running.
6468

6469
    """
6470
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6471
    assert instance is not None, \
6472
      "Cannot retrieve locked instance %s" % self.op.instance_name
6473
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6474
                     " offline, cannot reinstall")
6475
    for node in instance.secondary_nodes:
6476
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6477
                       " cannot reinstall")
6478

    
6479
    if instance.disk_template == constants.DT_DISKLESS:
6480
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6481
                                 self.op.instance_name,
6482
                                 errors.ECODE_INVAL)
6483
    _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6484

    
6485
    if self.op.os_type is not None:
6486
      # OS verification
6487
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6488
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6489
      instance_os = self.op.os_type
6490
    else:
6491
      instance_os = instance.os
6492

    
6493
    nodelist = list(instance.all_nodes)
6494

    
6495
    if self.op.osparams:
6496
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6497
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6498
      self.os_inst = i_osdict # the new dict (without defaults)
6499
    else:
6500
      self.os_inst = None
6501

    
6502
    self.instance = instance
6503

    
6504
  def Exec(self, feedback_fn):
6505
    """Reinstall the instance.
6506

6507
    """
6508
    inst = self.instance
6509

    
6510
    if self.op.os_type is not None:
6511
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6512
      inst.os = self.op.os_type
6513
      # Write to configuration
6514
      self.cfg.Update(inst, feedback_fn)
6515

    
6516
    _StartInstanceDisks(self, inst, None)
6517
    try:
6518
      feedback_fn("Running the instance OS create scripts...")
6519
      # FIXME: pass debug option from opcode to backend
6520
      result = self.rpc.call_instance_os_add(inst.primary_node,
6521
                                             (inst, self.os_inst), True,
6522
                                             self.op.debug_level)
6523
      result.Raise("Could not install OS for instance %s on node %s" %
6524
                   (inst.name, inst.primary_node))
6525
    finally:
6526
      _ShutdownInstanceDisks(self, inst)
6527

    
6528

    
6529
class LUInstanceRecreateDisks(LogicalUnit):
6530
  """Recreate an instance's missing disks.
6531

6532
  """
6533
  HPATH = "instance-recreate-disks"
6534
  HTYPE = constants.HTYPE_INSTANCE
6535
  REQ_BGL = False
6536

    
6537
  def CheckArguments(self):
6538
    # normalise the disk list
6539
    self.op.disks = sorted(frozenset(self.op.disks))
6540

    
6541
  def ExpandNames(self):
6542
    self._ExpandAndLockInstance()
6543
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6544
    if self.op.nodes:
6545
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6546
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6547
    else:
6548
      self.needed_locks[locking.LEVEL_NODE] = []
6549

    
6550
  def DeclareLocks(self, level):
6551
    if level == locking.LEVEL_NODE:
6552
      # if we replace the nodes, we only need to lock the old primary,
6553
      # otherwise we need to lock all nodes for disk re-creation
6554
      primary_only = bool(self.op.nodes)
6555
      self._LockInstancesNodes(primary_only=primary_only)
6556
    elif level == locking.LEVEL_NODE_RES:
6557
      # Copy node locks
6558
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6559
        self.needed_locks[locking.LEVEL_NODE][:]
6560

    
6561
  def BuildHooksEnv(self):
6562
    """Build hooks env.
6563

6564
    This runs on master, primary and secondary nodes of the instance.
6565

6566
    """
6567
    return _BuildInstanceHookEnvByObject(self, self.instance)
6568

    
6569
  def BuildHooksNodes(self):
6570
    """Build hooks nodes.
6571

6572
    """
6573
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6574
    return (nl, nl)
6575

    
6576
  def CheckPrereq(self):
6577
    """Check prerequisites.
6578

6579
    This checks that the instance is in the cluster and is not running.
6580

6581
    """
6582
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6583
    assert instance is not None, \
6584
      "Cannot retrieve locked instance %s" % self.op.instance_name
6585
    if self.op.nodes:
6586
      if len(self.op.nodes) != len(instance.all_nodes):
6587
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6588
                                   " %d replacement nodes were specified" %
6589
                                   (instance.name, len(instance.all_nodes),
6590
                                    len(self.op.nodes)),
6591
                                   errors.ECODE_INVAL)
6592
      assert instance.disk_template != constants.DT_DRBD8 or \
6593
          len(self.op.nodes) == 2
6594
      assert instance.disk_template != constants.DT_PLAIN or \
6595
          len(self.op.nodes) == 1
6596
      primary_node = self.op.nodes[0]
6597
    else:
6598
      primary_node = instance.primary_node
6599
    _CheckNodeOnline(self, primary_node)
6600

    
6601
    if instance.disk_template == constants.DT_DISKLESS:
6602
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6603
                                 self.op.instance_name, errors.ECODE_INVAL)
6604
    # if we replace nodes *and* the old primary is offline, we don't
6605
    # check
6606
    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6607
    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6608
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6609
    if not (self.op.nodes and old_pnode.offline):
6610
      _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6611
                          msg="cannot recreate disks")
6612

    
6613
    if not self.op.disks:
6614
      self.op.disks = range(len(instance.disks))
6615
    else:
6616
      for idx in self.op.disks:
6617
        if idx >= len(instance.disks):
6618
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6619
                                     errors.ECODE_INVAL)
6620
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6621
      raise errors.OpPrereqError("Can't recreate disks partially and"
6622
                                 " change the nodes at the same time",
6623
                                 errors.ECODE_INVAL)
6624
    self.instance = instance
6625

    
6626
  def Exec(self, feedback_fn):
6627
    """Recreate the disks.
6628

6629
    """
6630
    instance = self.instance
6631

    
6632
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6633
            self.owned_locks(locking.LEVEL_NODE_RES))
6634

    
6635
    to_skip = []
6636
    mods = [] # keeps track of needed logical_id changes
6637

    
6638
    for idx, disk in enumerate(instance.disks):
6639
      if idx not in self.op.disks: # disk idx has not been passed in
6640
        to_skip.append(idx)
6641
        continue
6642
      # update secondaries for disks, if needed
6643
      if self.op.nodes:
6644
        if disk.dev_type == constants.LD_DRBD8:
6645
          # need to update the nodes and minors
6646
          assert len(self.op.nodes) == 2
6647
          assert len(disk.logical_id) == 6 # otherwise disk internals
6648
                                           # have changed
6649
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6650
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6651
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6652
                    new_minors[0], new_minors[1], old_secret)
6653
          assert len(disk.logical_id) == len(new_id)
6654
          mods.append((idx, new_id))
6655

    
6656
    # now that we have passed all asserts above, we can apply the mods
6657
    # in a single run (to avoid partial changes)
6658
    for idx, new_id in mods:
6659
      instance.disks[idx].logical_id = new_id
6660

    
6661
    # change primary node, if needed
6662
    if self.op.nodes:
6663
      instance.primary_node = self.op.nodes[0]
6664
      self.LogWarning("Changing the instance's nodes, you will have to"
6665
                      " remove any disks left on the older nodes manually")
6666

    
6667
    if self.op.nodes:
6668
      self.cfg.Update(instance, feedback_fn)
6669

    
6670
    _CreateDisks(self, instance, to_skip=to_skip)
6671

    
6672

    
6673
class LUInstanceRename(LogicalUnit):
6674
  """Rename an instance.
6675

6676
  """
6677
  HPATH = "instance-rename"
6678
  HTYPE = constants.HTYPE_INSTANCE
6679

    
6680
  def CheckArguments(self):
6681
    """Check arguments.
6682

6683
    """
6684
    if self.op.ip_check and not self.op.name_check:
6685
      # TODO: make the ip check more flexible and not depend on the name check
6686
      raise errors.OpPrereqError("IP address check requires a name check",
6687
                                 errors.ECODE_INVAL)
6688

    
6689
  def BuildHooksEnv(self):
6690
    """Build hooks env.
6691

6692
    This runs on master, primary and secondary nodes of the instance.
6693

6694
    """
6695
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6696
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6697
    return env
6698

    
6699
  def BuildHooksNodes(self):
6700
    """Build hooks nodes.
6701

6702
    """
6703
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6704
    return (nl, nl)
6705

    
6706
  def CheckPrereq(self):
6707
    """Check prerequisites.
6708

6709
    This checks that the instance is in the cluster and is not running.
6710

6711
    """
6712
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6713
                                                self.op.instance_name)
6714
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6715
    assert instance is not None
6716
    _CheckNodeOnline(self, instance.primary_node)
6717
    _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6718
                        msg="cannot rename")
6719
    self.instance = instance
6720

    
6721
    new_name = self.op.new_name
6722
    if self.op.name_check:
6723
      hostname = netutils.GetHostname(name=new_name)
6724
      if hostname.name != new_name:
6725
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6726
                     hostname.name)
6727
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6728
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6729
                                    " same as given hostname '%s'") %
6730
                                    (hostname.name, self.op.new_name),
6731
                                    errors.ECODE_INVAL)
6732
      new_name = self.op.new_name = hostname.name
6733
      if (self.op.ip_check and
6734
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6735
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6736
                                   (hostname.ip, new_name),
6737
                                   errors.ECODE_NOTUNIQUE)
6738

    
6739
    instance_list = self.cfg.GetInstanceList()
6740
    if new_name in instance_list and new_name != instance.name:
6741
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6742
                                 new_name, errors.ECODE_EXISTS)
6743

    
6744
  def Exec(self, feedback_fn):
6745
    """Rename the instance.
6746

6747
    """
6748
    inst = self.instance
6749
    old_name = inst.name
6750

    
6751
    rename_file_storage = False
6752
    if (inst.disk_template in constants.DTS_FILEBASED and
6753
        self.op.new_name != inst.name):
6754
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6755
      rename_file_storage = True
6756

    
6757
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6758
    # Change the instance lock. This is definitely safe while we hold the BGL.
6759
    # Otherwise the new lock would have to be added in acquired mode.
6760
    assert self.REQ_BGL
6761
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6762
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6763

    
6764
    # re-read the instance from the configuration after rename
6765
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6766

    
6767
    if rename_file_storage:
6768
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6769
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6770
                                                     old_file_storage_dir,
6771
                                                     new_file_storage_dir)
6772
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6773
                   " (but the instance has been renamed in Ganeti)" %
6774
                   (inst.primary_node, old_file_storage_dir,
6775
                    new_file_storage_dir))
6776

    
6777
    _StartInstanceDisks(self, inst, None)
6778
    try:
6779
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6780
                                                 old_name, self.op.debug_level)
6781
      msg = result.fail_msg
6782
      if msg:
6783
        msg = ("Could not run OS rename script for instance %s on node %s"
6784
               " (but the instance has been renamed in Ganeti): %s" %
6785
               (inst.name, inst.primary_node, msg))
6786
        self.proc.LogWarning(msg)
6787
    finally:
6788
      _ShutdownInstanceDisks(self, inst)
6789

    
6790
    return inst.name
6791

    
6792

    
6793
class LUInstanceRemove(LogicalUnit):
6794
  """Remove an instance.
6795

6796
  """
6797
  HPATH = "instance-remove"
6798
  HTYPE = constants.HTYPE_INSTANCE
6799
  REQ_BGL = False
6800

    
6801
  def ExpandNames(self):
6802
    self._ExpandAndLockInstance()
6803
    self.needed_locks[locking.LEVEL_NODE] = []
6804
    self.needed_locks[locking.LEVEL_NODE_RES] = []
6805
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6806

    
6807
  def DeclareLocks(self, level):
6808
    if level == locking.LEVEL_NODE:
6809
      self._LockInstancesNodes()
6810
    elif level == locking.LEVEL_NODE_RES:
6811
      # Copy node locks
6812
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6813
        self.needed_locks[locking.LEVEL_NODE][:]
6814

    
6815
  def BuildHooksEnv(self):
6816
    """Build hooks env.
6817

6818
    This runs on master, primary and secondary nodes of the instance.
6819

6820
    """
6821
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6822
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6823
    return env
6824

    
6825
  def BuildHooksNodes(self):
6826
    """Build hooks nodes.
6827

6828
    """
6829
    nl = [self.cfg.GetMasterNode()]
6830
    nl_post = list(self.instance.all_nodes) + nl
6831
    return (nl, nl_post)
6832

    
6833
  def CheckPrereq(self):
6834
    """Check prerequisites.
6835

6836
    This checks that the instance is in the cluster.
6837

6838
    """
6839
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6840
    assert self.instance is not None, \
6841
      "Cannot retrieve locked instance %s" % self.op.instance_name
6842

    
6843
  def Exec(self, feedback_fn):
6844
    """Remove the instance.
6845

6846
    """
6847
    instance = self.instance
6848
    logging.info("Shutting down instance %s on node %s",
6849
                 instance.name, instance.primary_node)
6850

    
6851
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6852
                                             self.op.shutdown_timeout)
6853
    msg = result.fail_msg
6854
    if msg:
6855
      if self.op.ignore_failures:
6856
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6857
      else:
6858
        raise errors.OpExecError("Could not shutdown instance %s on"
6859
                                 " node %s: %s" %
6860
                                 (instance.name, instance.primary_node, msg))
6861

    
6862
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6863
            self.owned_locks(locking.LEVEL_NODE_RES))
6864
    assert not (set(instance.all_nodes) -
6865
                self.owned_locks(locking.LEVEL_NODE)), \
6866
      "Not owning correct locks"
6867

    
6868
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6869

    
6870

    
6871
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6872
  """Utility function to remove an instance.
6873

6874
  """
6875
  logging.info("Removing block devices for instance %s", instance.name)
6876

    
6877
  if not _RemoveDisks(lu, instance):
6878
    if not ignore_failures:
6879
      raise errors.OpExecError("Can't remove instance's disks")
6880
    feedback_fn("Warning: can't remove instance's disks")
6881

    
6882
  logging.info("Removing instance %s out of cluster config", instance.name)
6883

    
6884
  lu.cfg.RemoveInstance(instance.name)
6885

    
6886
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6887
    "Instance lock removal conflict"
6888

    
6889
  # Remove lock for the instance
6890
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6891

    
6892

    
6893
class LUInstanceQuery(NoHooksLU):
6894
  """Logical unit for querying instances.
6895

6896
  """
6897
  # pylint: disable=W0142
6898
  REQ_BGL = False
6899

    
6900
  def CheckArguments(self):
6901
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6902
                             self.op.output_fields, self.op.use_locking)
6903

    
6904
  def ExpandNames(self):
6905
    self.iq.ExpandNames(self)
6906

    
6907
  def DeclareLocks(self, level):
6908
    self.iq.DeclareLocks(self, level)
6909

    
6910
  def Exec(self, feedback_fn):
6911
    return self.iq.OldStyleQuery(self)
6912

    
6913

    
6914
class LUInstanceFailover(LogicalUnit):
6915
  """Failover an instance.
6916

6917
  """
6918
  HPATH = "instance-failover"
6919
  HTYPE = constants.HTYPE_INSTANCE
6920
  REQ_BGL = False
6921

    
6922
  def CheckArguments(self):
6923
    """Check the arguments.
6924

6925
    """
6926
    self.iallocator = getattr(self.op, "iallocator", None)
6927
    self.target_node = getattr(self.op, "target_node", None)
6928

    
6929
  def ExpandNames(self):
6930
    self._ExpandAndLockInstance()
6931

    
6932
    if self.op.target_node is not None:
6933
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6934

    
6935
    self.needed_locks[locking.LEVEL_NODE] = []
6936
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6937

    
6938
    ignore_consistency = self.op.ignore_consistency
6939
    shutdown_timeout = self.op.shutdown_timeout
6940
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6941
                                       cleanup=False,
6942
                                       failover=True,
6943
                                       ignore_consistency=ignore_consistency,
6944
                                       shutdown_timeout=shutdown_timeout)
6945
    self.tasklets = [self._migrater]
6946

    
6947
  def DeclareLocks(self, level):
6948
    if level == locking.LEVEL_NODE:
6949
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6950
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6951
        if self.op.target_node is None:
6952
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6953
        else:
6954
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6955
                                                   self.op.target_node]
6956
        del self.recalculate_locks[locking.LEVEL_NODE]
6957
      else:
6958
        self._LockInstancesNodes()
6959

    
6960
  def BuildHooksEnv(self):
6961
    """Build hooks env.
6962

6963
    This runs on master, primary and secondary nodes of the instance.
6964

6965
    """
6966
    instance = self._migrater.instance
6967
    source_node = instance.primary_node
6968
    target_node = self.op.target_node
6969
    env = {
6970
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6971
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6972
      "OLD_PRIMARY": source_node,
6973
      "NEW_PRIMARY": target_node,
6974
      }
6975

    
6976
    if instance.disk_template in constants.DTS_INT_MIRROR:
6977
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6978
      env["NEW_SECONDARY"] = source_node
6979
    else:
6980
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6981

    
6982
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6983

    
6984
    return env
6985

    
6986
  def BuildHooksNodes(self):
6987
    """Build hooks nodes.
6988

6989
    """
6990
    instance = self._migrater.instance
6991
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6992
    return (nl, nl + [instance.primary_node])
6993

    
6994

    
6995
class LUInstanceMigrate(LogicalUnit):
6996
  """Migrate an instance.
6997

6998
  This is migration without shutting down, compared to the failover,
6999
  which is done with shutdown.
7000

7001
  """
7002
  HPATH = "instance-migrate"
7003
  HTYPE = constants.HTYPE_INSTANCE
7004
  REQ_BGL = False
7005

    
7006
  def ExpandNames(self):
7007
    self._ExpandAndLockInstance()
7008

    
7009
    if self.op.target_node is not None:
7010
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7011

    
7012
    self.needed_locks[locking.LEVEL_NODE] = []
7013
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7014

    
7015
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
7016
                                       cleanup=self.op.cleanup,
7017
                                       failover=False,
7018
                                       fallback=self.op.allow_failover)
7019
    self.tasklets = [self._migrater]
7020

    
7021
  def DeclareLocks(self, level):
7022
    if level == locking.LEVEL_NODE:
7023
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
7024
      if instance.disk_template in constants.DTS_EXT_MIRROR:
7025
        if self.op.target_node is None:
7026
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7027
        else:
7028
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
7029
                                                   self.op.target_node]
7030
        del self.recalculate_locks[locking.LEVEL_NODE]
7031
      else:
7032
        self._LockInstancesNodes()
7033

    
7034
  def BuildHooksEnv(self):
7035
    """Build hooks env.
7036

7037
    This runs on master, primary and secondary nodes of the instance.
7038

7039
    """
7040
    instance = self._migrater.instance
7041
    source_node = instance.primary_node
7042
    target_node = self.op.target_node
7043
    env = _BuildInstanceHookEnvByObject(self, instance)
7044
    env.update({
7045
      "MIGRATE_LIVE": self._migrater.live,
7046
      "MIGRATE_CLEANUP": self.op.cleanup,
7047
      "OLD_PRIMARY": source_node,
7048
      "NEW_PRIMARY": target_node,
7049
      })
7050

    
7051
    if instance.disk_template in constants.DTS_INT_MIRROR:
7052
      env["OLD_SECONDARY"] = target_node
7053
      env["NEW_SECONDARY"] = source_node
7054
    else:
7055
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7056

    
7057
    return env
7058

    
7059
  def BuildHooksNodes(self):
7060
    """Build hooks nodes.
7061

7062
    """
7063
    instance = self._migrater.instance
7064
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7065
    return (nl, nl + [instance.primary_node])
7066

    
7067

    
7068
class LUInstanceMove(LogicalUnit):
7069
  """Move an instance by data-copying.
7070

7071
  """
7072
  HPATH = "instance-move"
7073
  HTYPE = constants.HTYPE_INSTANCE
7074
  REQ_BGL = False
7075

    
7076
  def ExpandNames(self):
7077
    self._ExpandAndLockInstance()
7078
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7079
    self.op.target_node = target_node
7080
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
7081
    self.needed_locks[locking.LEVEL_NODE_RES] = []
7082
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7083

    
7084
  def DeclareLocks(self, level):
7085
    if level == locking.LEVEL_NODE:
7086
      self._LockInstancesNodes(primary_only=True)
7087
    elif level == locking.LEVEL_NODE_RES:
7088
      # Copy node locks
7089
      self.needed_locks[locking.LEVEL_NODE_RES] = \
7090
        self.needed_locks[locking.LEVEL_NODE][:]
7091

    
7092
  def BuildHooksEnv(self):
7093
    """Build hooks env.
7094

7095
    This runs on master, primary and secondary nodes of the instance.
7096

7097
    """
7098
    env = {
7099
      "TARGET_NODE": self.op.target_node,
7100
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7101
      }
7102
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7103
    return env
7104

    
7105
  def BuildHooksNodes(self):
7106
    """Build hooks nodes.
7107

7108
    """
7109
    nl = [
7110
      self.cfg.GetMasterNode(),
7111
      self.instance.primary_node,
7112
      self.op.target_node,
7113
      ]
7114
    return (nl, nl)
7115

    
7116
  def CheckPrereq(self):
7117
    """Check prerequisites.
7118

7119
    This checks that the instance is in the cluster.
7120

7121
    """
7122
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7123
    assert self.instance is not None, \
7124
      "Cannot retrieve locked instance %s" % self.op.instance_name
7125

    
7126
    node = self.cfg.GetNodeInfo(self.op.target_node)
7127
    assert node is not None, \
7128
      "Cannot retrieve locked node %s" % self.op.target_node
7129

    
7130
    self.target_node = target_node = node.name
7131

    
7132
    if target_node == instance.primary_node:
7133
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
7134
                                 (instance.name, target_node),
7135
                                 errors.ECODE_STATE)
7136

    
7137
    bep = self.cfg.GetClusterInfo().FillBE(instance)
7138

    
7139
    for idx, dsk in enumerate(instance.disks):
7140
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7141
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7142
                                   " cannot copy" % idx, errors.ECODE_STATE)
7143

    
7144
    _CheckNodeOnline(self, target_node)
7145
    _CheckNodeNotDrained(self, target_node)
7146
    _CheckNodeVmCapable(self, target_node)
7147

    
7148
    if instance.admin_state == constants.ADMINST_UP:
7149
      # check memory requirements on the secondary node
7150
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7151
                           instance.name, bep[constants.BE_MAXMEM],
7152
                           instance.hypervisor)
7153
    else:
7154
      self.LogInfo("Not checking memory on the secondary node as"
7155
                   " instance will not be started")
7156

    
7157
    # check bridge existance
7158
    _CheckInstanceBridgesExist(self, instance, node=target_node)
7159

    
7160
  def Exec(self, feedback_fn):
7161
    """Move an instance.
7162

7163
    The move is done by shutting it down on its present node, copying
7164
    the data over (slow) and starting it on the new node.
7165

7166
    """
7167
    instance = self.instance
7168

    
7169
    source_node = instance.primary_node
7170
    target_node = self.target_node
7171

    
7172
    self.LogInfo("Shutting down instance %s on source node %s",
7173
                 instance.name, source_node)
7174

    
7175
    assert (self.owned_locks(locking.LEVEL_NODE) ==
7176
            self.owned_locks(locking.LEVEL_NODE_RES))
7177

    
7178
    result = self.rpc.call_instance_shutdown(source_node, instance,
7179
                                             self.op.shutdown_timeout)
7180
    msg = result.fail_msg
7181
    if msg:
7182
      if self.op.ignore_consistency:
7183
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
7184
                             " Proceeding anyway. Please make sure node"
7185
                             " %s is down. Error details: %s",
7186
                             instance.name, source_node, source_node, msg)
7187
      else:
7188
        raise errors.OpExecError("Could not shutdown instance %s on"
7189
                                 " node %s: %s" %
7190
                                 (instance.name, source_node, msg))
7191

    
7192
    # create the target disks
7193
    try:
7194
      _CreateDisks(self, instance, target_node=target_node)
7195
    except errors.OpExecError:
7196
      self.LogWarning("Device creation failed, reverting...")
7197
      try:
7198
        _RemoveDisks(self, instance, target_node=target_node)
7199
      finally:
7200
        self.cfg.ReleaseDRBDMinors(instance.name)
7201
        raise
7202

    
7203
    cluster_name = self.cfg.GetClusterInfo().cluster_name
7204

    
7205
    errs = []
7206
    # activate, get path, copy the data over
7207
    for idx, disk in enumerate(instance.disks):
7208
      self.LogInfo("Copying data for disk %d", idx)
7209
      result = self.rpc.call_blockdev_assemble(target_node, disk,
7210
                                               instance.name, True, idx)
7211
      if result.fail_msg:
7212
        self.LogWarning("Can't assemble newly created disk %d: %s",
7213
                        idx, result.fail_msg)
7214
        errs.append(result.fail_msg)
7215
        break
7216
      dev_path = result.payload
7217
      result = self.rpc.call_blockdev_export(source_node, disk,
7218
                                             target_node, dev_path,
7219
                                             cluster_name)
7220
      if result.fail_msg:
7221
        self.LogWarning("Can't copy data over for disk %d: %s",
7222
                        idx, result.fail_msg)
7223
        errs.append(result.fail_msg)
7224
        break
7225

    
7226
    if errs:
7227
      self.LogWarning("Some disks failed to copy, aborting")
7228
      try:
7229
        _RemoveDisks(self, instance, target_node=target_node)
7230
      finally:
7231
        self.cfg.ReleaseDRBDMinors(instance.name)
7232
        raise errors.OpExecError("Errors during disk copy: %s" %
7233
                                 (",".join(errs),))
7234

    
7235
    instance.primary_node = target_node
7236
    self.cfg.Update(instance, feedback_fn)
7237

    
7238
    self.LogInfo("Removing the disks on the original node")
7239
    _RemoveDisks(self, instance, target_node=source_node)
7240

    
7241
    # Only start the instance if it's marked as up
7242
    if instance.admin_state == constants.ADMINST_UP:
7243
      self.LogInfo("Starting instance %s on node %s",
7244
                   instance.name, target_node)
7245

    
7246
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7247
                                           ignore_secondaries=True)
7248
      if not disks_ok:
7249
        _ShutdownInstanceDisks(self, instance)
7250
        raise errors.OpExecError("Can't activate the instance's disks")
7251

    
7252
      result = self.rpc.call_instance_start(target_node,
7253
                                            (instance, None, None), False)
7254
      msg = result.fail_msg
7255
      if msg:
7256
        _ShutdownInstanceDisks(self, instance)
7257
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7258
                                 (instance.name, target_node, msg))
7259

    
7260

    
7261
class LUNodeMigrate(LogicalUnit):
7262
  """Migrate all instances from a node.
7263

7264
  """
7265
  HPATH = "node-migrate"
7266
  HTYPE = constants.HTYPE_NODE
7267
  REQ_BGL = False
7268

    
7269
  def CheckArguments(self):
7270
    pass
7271

    
7272
  def ExpandNames(self):
7273
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7274

    
7275
    self.share_locks = _ShareAll()
7276
    self.needed_locks = {
7277
      locking.LEVEL_NODE: [self.op.node_name],
7278
      }
7279

    
7280
  def BuildHooksEnv(self):
7281
    """Build hooks env.
7282

7283
    This runs on the master, the primary and all the secondaries.
7284

7285
    """
7286
    return {
7287
      "NODE_NAME": self.op.node_name,
7288
      }
7289

    
7290
  def BuildHooksNodes(self):
7291
    """Build hooks nodes.
7292

7293
    """
7294
    nl = [self.cfg.GetMasterNode()]
7295
    return (nl, nl)
7296

    
7297
  def CheckPrereq(self):
7298
    pass
7299

    
7300
  def Exec(self, feedback_fn):
7301
    # Prepare jobs for migration instances
7302
    jobs = [
7303
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7304
                                 mode=self.op.mode,
7305
                                 live=self.op.live,
7306
                                 iallocator=self.op.iallocator,
7307
                                 target_node=self.op.target_node)]
7308
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7309
      ]
7310

    
7311
    # TODO: Run iallocator in this opcode and pass correct placement options to
7312
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7313
    # running the iallocator and the actual migration, a good consistency model
7314
    # will have to be found.
7315

    
7316
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7317
            frozenset([self.op.node_name]))
7318

    
7319
    return ResultWithJobs(jobs)
7320

    
7321

    
7322
class TLMigrateInstance(Tasklet):
7323
  """Tasklet class for instance migration.
7324

7325
  @type live: boolean
7326
  @ivar live: whether the migration will be done live or non-live;
7327
      this variable is initalized only after CheckPrereq has run
7328
  @type cleanup: boolean
7329
  @ivar cleanup: Wheater we cleanup from a failed migration
7330
  @type iallocator: string
7331
  @ivar iallocator: The iallocator used to determine target_node
7332
  @type target_node: string
7333
  @ivar target_node: If given, the target_node to reallocate the instance to
7334
  @type failover: boolean
7335
  @ivar failover: Whether operation results in failover or migration
7336
  @type fallback: boolean
7337
  @ivar fallback: Whether fallback to failover is allowed if migration not
7338
                  possible
7339
  @type ignore_consistency: boolean
7340
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7341
                            and target node
7342
  @type shutdown_timeout: int
7343
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7344

7345
  """
7346

    
7347
  # Constants
7348
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7349
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7350

    
7351
  def __init__(self, lu, instance_name, cleanup=False,
7352
               failover=False, fallback=False,
7353
               ignore_consistency=False,
7354
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7355
    """Initializes this class.
7356

7357
    """
7358
    Tasklet.__init__(self, lu)
7359

    
7360
    # Parameters
7361
    self.instance_name = instance_name
7362
    self.cleanup = cleanup
7363
    self.live = False # will be overridden later
7364
    self.failover = failover
7365
    self.fallback = fallback
7366
    self.ignore_consistency = ignore_consistency
7367
    self.shutdown_timeout = shutdown_timeout
7368

    
7369
  def CheckPrereq(self):
7370
    """Check prerequisites.
7371

7372
    This checks that the instance is in the cluster.
7373

7374
    """
7375
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7376
    instance = self.cfg.GetInstanceInfo(instance_name)
7377
    assert instance is not None
7378
    self.instance = instance
7379

    
7380
    if (not self.cleanup and
7381
        not instance.admin_state == constants.ADMINST_UP and
7382
        not self.failover and self.fallback):
7383
      self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7384
                      " switching to failover")
7385
      self.failover = True
7386

    
7387
    if instance.disk_template not in constants.DTS_MIRRORED:
7388
      if self.failover:
7389
        text = "failovers"
7390
      else:
7391
        text = "migrations"
7392
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7393
                                 " %s" % (instance.disk_template, text),
7394
                                 errors.ECODE_STATE)
7395

    
7396
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7397
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7398

    
7399
      if self.lu.op.iallocator:
7400
        self._RunAllocator()
7401
      else:
7402
        # We set set self.target_node as it is required by
7403
        # BuildHooksEnv
7404
        self.target_node = self.lu.op.target_node
7405

    
7406
      # self.target_node is already populated, either directly or by the
7407
      # iallocator run
7408
      target_node = self.target_node
7409
      if self.target_node == instance.primary_node:
7410
        raise errors.OpPrereqError("Cannot migrate instance %s"
7411
                                   " to its primary (%s)" %
7412
                                   (instance.name, instance.primary_node))
7413

    
7414
      if len(self.lu.tasklets) == 1:
7415
        # It is safe to release locks only when we're the only tasklet
7416
        # in the LU
7417
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7418
                      keep=[instance.primary_node, self.target_node])
7419

    
7420
    else:
7421
      secondary_nodes = instance.secondary_nodes
7422
      if not secondary_nodes:
7423
        raise errors.ConfigurationError("No secondary node but using"
7424
                                        " %s disk template" %
7425
                                        instance.disk_template)
7426
      target_node = secondary_nodes[0]
7427
      if self.lu.op.iallocator or (self.lu.op.target_node and
7428
                                   self.lu.op.target_node != target_node):
7429
        if self.failover:
7430
          text = "failed over"
7431
        else:
7432
          text = "migrated"
7433
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7434
                                   " be %s to arbitrary nodes"
7435
                                   " (neither an iallocator nor a target"
7436
                                   " node can be passed)" %
7437
                                   (instance.disk_template, text),
7438
                                   errors.ECODE_INVAL)
7439

    
7440
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7441

    
7442
    # check memory requirements on the secondary node
7443
    if not self.failover or instance.admin_state == constants.ADMINST_UP:
7444
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7445
                           instance.name, i_be[constants.BE_MAXMEM],
7446
                           instance.hypervisor)
7447
    else:
7448
      self.lu.LogInfo("Not checking memory on the secondary node as"
7449
                      " instance will not be started")
7450

    
7451
    # check bridge existance
7452
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7453

    
7454
    if not self.cleanup:
7455
      _CheckNodeNotDrained(self.lu, target_node)
7456
      if not self.failover:
7457
        result = self.rpc.call_instance_migratable(instance.primary_node,
7458
                                                   instance)
7459
        if result.fail_msg and self.fallback:
7460
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7461
                          " failover")
7462
          self.failover = True
7463
        else:
7464
          result.Raise("Can't migrate, please use failover",
7465
                       prereq=True, ecode=errors.ECODE_STATE)
7466

    
7467
    assert not (self.failover and self.cleanup)
7468

    
7469
    if not self.failover:
7470
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7471
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7472
                                   " parameters are accepted",
7473
                                   errors.ECODE_INVAL)
7474
      if self.lu.op.live is not None:
7475
        if self.lu.op.live:
7476
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7477
        else:
7478
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7479
        # reset the 'live' parameter to None so that repeated
7480
        # invocations of CheckPrereq do not raise an exception
7481
        self.lu.op.live = None
7482
      elif self.lu.op.mode is None:
7483
        # read the default value from the hypervisor
7484
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7485
                                                skip_globals=False)
7486
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7487

    
7488
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7489
    else:
7490
      # Failover is never live
7491
      self.live = False
7492

    
7493
  def _RunAllocator(self):
7494
    """Run the allocator based on input opcode.
7495

7496
    """
7497
    ial = IAllocator(self.cfg, self.rpc,
7498
                     mode=constants.IALLOCATOR_MODE_RELOC,
7499
                     name=self.instance_name,
7500
                     # TODO See why hail breaks with a single node below
7501
                     relocate_from=[self.instance.primary_node,
7502
                                    self.instance.primary_node],
7503
                     )
7504

    
7505
    ial.Run(self.lu.op.iallocator)
7506

    
7507
    if not ial.success:
7508
      raise errors.OpPrereqError("Can't compute nodes using"
7509
                                 " iallocator '%s': %s" %
7510
                                 (self.lu.op.iallocator, ial.info),
7511
                                 errors.ECODE_NORES)
7512
    if len(ial.result) != ial.required_nodes:
7513
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7514
                                 " of nodes (%s), required %s" %
7515
                                 (self.lu.op.iallocator, len(ial.result),
7516
                                  ial.required_nodes), errors.ECODE_FAULT)
7517
    self.target_node = ial.result[0]
7518
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7519
                 self.instance_name, self.lu.op.iallocator,
7520
                 utils.CommaJoin(ial.result))
7521

    
7522
  def _WaitUntilSync(self):
7523
    """Poll with custom rpc for disk sync.
7524

7525
    This uses our own step-based rpc call.
7526

7527
    """
7528
    self.feedback_fn("* wait until resync is done")
7529
    all_done = False
7530
    while not all_done:
7531
      all_done = True
7532
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7533
                                            self.nodes_ip,
7534
                                            self.instance.disks)
7535
      min_percent = 100
7536
      for node, nres in result.items():
7537
        nres.Raise("Cannot resync disks on node %s" % node)
7538
        node_done, node_percent = nres.payload
7539
        all_done = all_done and node_done
7540
        if node_percent is not None:
7541
          min_percent = min(min_percent, node_percent)
7542
      if not all_done:
7543
        if min_percent < 100:
7544
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7545
        time.sleep(2)
7546

    
7547
  def _EnsureSecondary(self, node):
7548
    """Demote a node to secondary.
7549

7550
    """
7551
    self.feedback_fn("* switching node %s to secondary mode" % node)
7552

    
7553
    for dev in self.instance.disks:
7554
      self.cfg.SetDiskID(dev, node)
7555

    
7556
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7557
                                          self.instance.disks)
7558
    result.Raise("Cannot change disk to secondary on node %s" % node)
7559

    
7560
  def _GoStandalone(self):
7561
    """Disconnect from the network.
7562

7563
    """
7564
    self.feedback_fn("* changing into standalone mode")
7565
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7566
                                               self.instance.disks)
7567
    for node, nres in result.items():
7568
      nres.Raise("Cannot disconnect disks node %s" % node)
7569

    
7570
  def _GoReconnect(self, multimaster):
7571
    """Reconnect to the network.
7572

7573
    """
7574
    if multimaster:
7575
      msg = "dual-master"
7576
    else:
7577
      msg = "single-master"
7578
    self.feedback_fn("* changing disks into %s mode" % msg)
7579
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7580
                                           self.instance.disks,
7581
                                           self.instance.name, multimaster)
7582
    for node, nres in result.items():
7583
      nres.Raise("Cannot change disks config on node %s" % node)
7584

    
7585
  def _ExecCleanup(self):
7586
    """Try to cleanup after a failed migration.
7587

7588
    The cleanup is done by:
7589
      - check that the instance is running only on one node
7590
        (and update the config if needed)
7591
      - change disks on its secondary node to secondary
7592
      - wait until disks are fully synchronized
7593
      - disconnect from the network
7594
      - change disks into single-master mode
7595
      - wait again until disks are fully synchronized
7596

7597
    """
7598
    instance = self.instance
7599
    target_node = self.target_node
7600
    source_node = self.source_node
7601

    
7602
    # check running on only one node
7603
    self.feedback_fn("* checking where the instance actually runs"
7604
                     " (if this hangs, the hypervisor might be in"
7605
                     " a bad state)")
7606
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7607
    for node, result in ins_l.items():
7608
      result.Raise("Can't contact node %s" % node)
7609

    
7610
    runningon_source = instance.name in ins_l[source_node].payload
7611
    runningon_target = instance.name in ins_l[target_node].payload
7612

    
7613
    if runningon_source and runningon_target:
7614
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7615
                               " or the hypervisor is confused; you will have"
7616
                               " to ensure manually that it runs only on one"
7617
                               " and restart this operation")
7618

    
7619
    if not (runningon_source or runningon_target):
7620
      raise errors.OpExecError("Instance does not seem to be running at all;"
7621
                               " in this case it's safer to repair by"
7622
                               " running 'gnt-instance stop' to ensure disk"
7623
                               " shutdown, and then restarting it")
7624

    
7625
    if runningon_target:
7626
      # the migration has actually succeeded, we need to update the config
7627
      self.feedback_fn("* instance running on secondary node (%s),"
7628
                       " updating config" % target_node)
7629
      instance.primary_node = target_node
7630
      self.cfg.Update(instance, self.feedback_fn)
7631
      demoted_node = source_node
7632
    else:
7633
      self.feedback_fn("* instance confirmed to be running on its"
7634
                       " primary node (%s)" % source_node)
7635
      demoted_node = target_node
7636

    
7637
    if instance.disk_template in constants.DTS_INT_MIRROR:
7638
      self._EnsureSecondary(demoted_node)
7639
      try:
7640
        self._WaitUntilSync()
7641
      except errors.OpExecError:
7642
        # we ignore here errors, since if the device is standalone, it
7643
        # won't be able to sync
7644
        pass
7645
      self._GoStandalone()
7646
      self._GoReconnect(False)
7647
      self._WaitUntilSync()
7648

    
7649
    self.feedback_fn("* done")
7650

    
7651
  def _RevertDiskStatus(self):
7652
    """Try to revert the disk status after a failed migration.
7653

7654
    """
7655
    target_node = self.target_node
7656
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7657
      return
7658

    
7659
    try:
7660
      self._EnsureSecondary(target_node)
7661
      self._GoStandalone()
7662
      self._GoReconnect(False)
7663
      self._WaitUntilSync()
7664
    except errors.OpExecError, err:
7665
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7666
                         " please try to recover the instance manually;"
7667
                         " error '%s'" % str(err))
7668

    
7669
  def _AbortMigration(self):
7670
    """Call the hypervisor code to abort a started migration.
7671

7672
    """
7673
    instance = self.instance
7674
    target_node = self.target_node
7675
    source_node = self.source_node
7676
    migration_info = self.migration_info
7677

    
7678
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7679
                                                                 instance,
7680
                                                                 migration_info,
7681
                                                                 False)
7682
    abort_msg = abort_result.fail_msg
7683
    if abort_msg:
7684
      logging.error("Aborting migration failed on target node %s: %s",
7685
                    target_node, abort_msg)
7686
      # Don't raise an exception here, as we stil have to try to revert the
7687
      # disk status, even if this step failed.
7688

    
7689
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7690
        instance, False, self.live)
7691
    abort_msg = abort_result.fail_msg
7692
    if abort_msg:
7693
      logging.error("Aborting migration failed on source node %s: %s",
7694
                    source_node, abort_msg)
7695

    
7696
  def _ExecMigration(self):
7697
    """Migrate an instance.
7698

7699
    The migrate is done by:
7700
      - change the disks into dual-master mode
7701
      - wait until disks are fully synchronized again
7702
      - migrate the instance
7703
      - change disks on the new secondary node (the old primary) to secondary
7704
      - wait until disks are fully synchronized
7705
      - change disks into single-master mode
7706

7707
    """
7708
    instance = self.instance
7709
    target_node = self.target_node
7710
    source_node = self.source_node
7711

    
7712
    # Check for hypervisor version mismatch and warn the user.
7713
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7714
                                       None, [self.instance.hypervisor])
7715
    for ninfo in nodeinfo.values():
7716
      ninfo.Raise("Unable to retrieve node information from node '%s'" %
7717
                  ninfo.node)
7718
    (_, _, (src_info, )) = nodeinfo[source_node].payload
7719
    (_, _, (dst_info, )) = nodeinfo[target_node].payload
7720

    
7721
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info) and
7722
        (constants.HV_NODEINFO_KEY_VERSION in dst_info)):
7723
      src_version = src_info[constants.HV_NODEINFO_KEY_VERSION]
7724
      dst_version = dst_info[constants.HV_NODEINFO_KEY_VERSION]
7725
      if src_version != dst_version:
7726
        self.feedback_fn("* warning: hypervisor version mismatch between"
7727
                         " source (%s) and target (%s) node" %
7728
                         (src_version, dst_version))
7729

    
7730
    self.feedback_fn("* checking disk consistency between source and target")
7731
    for dev in instance.disks:
7732
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7733
        raise errors.OpExecError("Disk %s is degraded or not fully"
7734
                                 " synchronized on target node,"
7735
                                 " aborting migration" % dev.iv_name)
7736

    
7737
    # First get the migration information from the remote node
7738
    result = self.rpc.call_migration_info(source_node, instance)
7739
    msg = result.fail_msg
7740
    if msg:
7741
      log_err = ("Failed fetching source migration information from %s: %s" %
7742
                 (source_node, msg))
7743
      logging.error(log_err)
7744
      raise errors.OpExecError(log_err)
7745

    
7746
    self.migration_info = migration_info = result.payload
7747

    
7748
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7749
      # Then switch the disks to master/master mode
7750
      self._EnsureSecondary(target_node)
7751
      self._GoStandalone()
7752
      self._GoReconnect(True)
7753
      self._WaitUntilSync()
7754

    
7755
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7756
    result = self.rpc.call_accept_instance(target_node,
7757
                                           instance,
7758
                                           migration_info,
7759
                                           self.nodes_ip[target_node])
7760

    
7761
    msg = result.fail_msg
7762
    if msg:
7763
      logging.error("Instance pre-migration failed, trying to revert"
7764
                    " disk status: %s", msg)
7765
      self.feedback_fn("Pre-migration failed, aborting")
7766
      self._AbortMigration()
7767
      self._RevertDiskStatus()
7768
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7769
                               (instance.name, msg))
7770

    
7771
    self.feedback_fn("* migrating instance to %s" % target_node)
7772
    result = self.rpc.call_instance_migrate(source_node, instance,
7773
                                            self.nodes_ip[target_node],
7774
                                            self.live)
7775
    msg = result.fail_msg
7776
    if msg:
7777
      logging.error("Instance migration failed, trying to revert"
7778
                    " disk status: %s", msg)
7779
      self.feedback_fn("Migration failed, aborting")
7780
      self._AbortMigration()
7781
      self._RevertDiskStatus()
7782
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7783
                               (instance.name, msg))
7784

    
7785
    self.feedback_fn("* starting memory transfer")
7786
    last_feedback = time.time()
7787
    while True:
7788
      result = self.rpc.call_instance_get_migration_status(source_node,
7789
                                                           instance)
7790
      msg = result.fail_msg
7791
      ms = result.payload   # MigrationStatus instance
7792
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7793
        logging.error("Instance migration failed, trying to revert"
7794
                      " disk status: %s", msg)
7795
        self.feedback_fn("Migration failed, aborting")
7796
        self._AbortMigration()
7797
        self._RevertDiskStatus()
7798
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7799
                                 (instance.name, msg))
7800

    
7801
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7802
        self.feedback_fn("* memory transfer complete")
7803
        break
7804

    
7805
      if (utils.TimeoutExpired(last_feedback,
7806
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7807
          ms.transferred_ram is not None):
7808
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7809
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7810
        last_feedback = time.time()
7811

    
7812
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7813

    
7814
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7815
                                                           instance,
7816
                                                           True,
7817
                                                           self.live)
7818
    msg = result.fail_msg
7819
    if msg:
7820
      logging.error("Instance migration succeeded, but finalization failed"
7821
                    " on the source node: %s", msg)
7822
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7823
                               msg)
7824

    
7825
    instance.primary_node = target_node
7826

    
7827
    # distribute new instance config to the other nodes
7828
    self.cfg.Update(instance, self.feedback_fn)
7829

    
7830
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7831
                                                           instance,
7832
                                                           migration_info,
7833
                                                           True)
7834
    msg = result.fail_msg
7835
    if msg:
7836
      logging.error("Instance migration succeeded, but finalization failed"
7837
                    " on the target node: %s", msg)
7838
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7839
                               msg)
7840

    
7841
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7842
      self._EnsureSecondary(source_node)
7843
      self._WaitUntilSync()
7844
      self._GoStandalone()
7845
      self._GoReconnect(False)
7846
      self._WaitUntilSync()
7847

    
7848
    self.feedback_fn("* done")
7849

    
7850
  def _ExecFailover(self):
7851
    """Failover an instance.
7852

7853
    The failover is done by shutting it down on its present node and
7854
    starting it on the secondary.
7855

7856
    """
7857
    instance = self.instance
7858
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7859

    
7860
    source_node = instance.primary_node
7861
    target_node = self.target_node
7862

    
7863
    if instance.admin_state == constants.ADMINST_UP:
7864
      self.feedback_fn("* checking disk consistency between source and target")
7865
      for dev in instance.disks:
7866
        # for drbd, these are drbd over lvm
7867
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7868
          if primary_node.offline:
7869
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7870
                             " target node %s" %
7871
                             (primary_node.name, dev.iv_name, target_node))
7872
          elif not self.ignore_consistency:
7873
            raise errors.OpExecError("Disk %s is degraded on target node,"
7874
                                     " aborting failover" % dev.iv_name)
7875
    else:
7876
      self.feedback_fn("* not checking disk consistency as instance is not"
7877
                       " running")
7878

    
7879
    self.feedback_fn("* shutting down instance on source node")
7880
    logging.info("Shutting down instance %s on node %s",
7881
                 instance.name, source_node)
7882

    
7883
    result = self.rpc.call_instance_shutdown(source_node, instance,
7884
                                             self.shutdown_timeout)
7885
    msg = result.fail_msg
7886
    if msg:
7887
      if self.ignore_consistency or primary_node.offline:
7888
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7889
                           " proceeding anyway; please make sure node"
7890
                           " %s is down; error details: %s",
7891
                           instance.name, source_node, source_node, msg)
7892
      else:
7893
        raise errors.OpExecError("Could not shutdown instance %s on"
7894
                                 " node %s: %s" %
7895
                                 (instance.name, source_node, msg))
7896

    
7897
    self.feedback_fn("* deactivating the instance's disks on source node")
7898
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7899
      raise errors.OpExecError("Can't shut down the instance's disks")
7900

    
7901
    instance.primary_node = target_node
7902
    # distribute new instance config to the other nodes
7903
    self.cfg.Update(instance, self.feedback_fn)
7904

    
7905
    # Only start the instance if it's marked as up
7906
    if instance.admin_state == constants.ADMINST_UP:
7907
      self.feedback_fn("* activating the instance's disks on target node %s" %
7908
                       target_node)
7909
      logging.info("Starting instance %s on node %s",
7910
                   instance.name, target_node)
7911

    
7912
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7913
                                           ignore_secondaries=True)
7914
      if not disks_ok:
7915
        _ShutdownInstanceDisks(self.lu, instance)
7916
        raise errors.OpExecError("Can't activate the instance's disks")
7917

    
7918
      self.feedback_fn("* starting the instance on the target node %s" %
7919
                       target_node)
7920
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
7921
                                            False)
7922
      msg = result.fail_msg
7923
      if msg:
7924
        _ShutdownInstanceDisks(self.lu, instance)
7925
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7926
                                 (instance.name, target_node, msg))
7927

    
7928
  def Exec(self, feedback_fn):
7929
    """Perform the migration.
7930

7931
    """
7932
    self.feedback_fn = feedback_fn
7933
    self.source_node = self.instance.primary_node
7934

    
7935
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7936
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7937
      self.target_node = self.instance.secondary_nodes[0]
7938
      # Otherwise self.target_node has been populated either
7939
      # directly, or through an iallocator.
7940

    
7941
    self.all_nodes = [self.source_node, self.target_node]
7942
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7943
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7944

    
7945
    if self.failover:
7946
      feedback_fn("Failover instance %s" % self.instance.name)
7947
      self._ExecFailover()
7948
    else:
7949
      feedback_fn("Migrating instance %s" % self.instance.name)
7950

    
7951
      if self.cleanup:
7952
        return self._ExecCleanup()
7953
      else:
7954
        return self._ExecMigration()
7955

    
7956

    
7957
def _CreateBlockDev(lu, node, instance, device, force_create,
7958
                    info, force_open):
7959
  """Create a tree of block devices on a given node.
7960

7961
  If this device type has to be created on secondaries, create it and
7962
  all its children.
7963

7964
  If not, just recurse to children keeping the same 'force' value.
7965

7966
  @param lu: the lu on whose behalf we execute
7967
  @param node: the node on which to create the device
7968
  @type instance: L{objects.Instance}
7969
  @param instance: the instance which owns the device
7970
  @type device: L{objects.Disk}
7971
  @param device: the device to create
7972
  @type force_create: boolean
7973
  @param force_create: whether to force creation of this device; this
7974
      will be change to True whenever we find a device which has
7975
      CreateOnSecondary() attribute
7976
  @param info: the extra 'metadata' we should attach to the device
7977
      (this will be represented as a LVM tag)
7978
  @type force_open: boolean
7979
  @param force_open: this parameter will be passes to the
7980
      L{backend.BlockdevCreate} function where it specifies
7981
      whether we run on primary or not, and it affects both
7982
      the child assembly and the device own Open() execution
7983

7984
  """
7985
  if device.CreateOnSecondary():
7986
    force_create = True
7987

    
7988
  if device.children:
7989
    for child in device.children:
7990
      _CreateBlockDev(lu, node, instance, child, force_create,
7991
                      info, force_open)
7992

    
7993
  if not force_create:
7994
    return
7995

    
7996
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7997

    
7998

    
7999
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
8000
  """Create a single block device on a given node.
8001

8002
  This will not recurse over children of the device, so they must be
8003
  created in advance.
8004

8005
  @param lu: the lu on whose behalf we execute
8006
  @param node: the node on which to create the device
8007
  @type instance: L{objects.Instance}
8008
  @param instance: the instance which owns the device
8009
  @type device: L{objects.Disk}
8010
  @param device: the device to create
8011
  @param info: the extra 'metadata' we should attach to the device
8012
      (this will be represented as a LVM tag)
8013
  @type force_open: boolean
8014
  @param force_open: this parameter will be passes to the
8015
      L{backend.BlockdevCreate} function where it specifies
8016
      whether we run on primary or not, and it affects both
8017
      the child assembly and the device own Open() execution
8018

8019
  """
8020
  lu.cfg.SetDiskID(device, node)
8021
  result = lu.rpc.call_blockdev_create(node, device, device.size,
8022
                                       instance.name, force_open, info)
8023
  result.Raise("Can't create block device %s on"
8024
               " node %s for instance %s" % (device, node, instance.name))
8025
  if device.physical_id is None:
8026
    device.physical_id = result.payload
8027

    
8028

    
8029
def _GenerateUniqueNames(lu, exts):
8030
  """Generate a suitable LV name.
8031

8032
  This will generate a logical volume name for the given instance.
8033

8034
  """
8035
  results = []
8036
  for val in exts:
8037
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8038
    results.append("%s%s" % (new_id, val))
8039
  return results
8040

    
8041

    
8042
def _ComputeLDParams(disk_template, disk_params):
8043
  """Computes Logical Disk parameters from Disk Template parameters.
8044

8045
  @type disk_template: string
8046
  @param disk_template: disk template, one of L{constants.DISK_TEMPLATES}
8047
  @type disk_params: dict
8048
  @param disk_params: disk template parameters; dict(template_name -> parameters
8049
  @rtype: list(dict)
8050
  @return: a list of dicts, one for each node of the disk hierarchy. Each dict
8051
    contains the LD parameters of the node. The tree is flattened in-order.
8052

8053
  """
8054
  if disk_template not in constants.DISK_TEMPLATES:
8055
    raise errors.ProgrammerError("Unknown disk template %s" % disk_template)
8056

    
8057
  result = list()
8058
  dt_params = disk_params[disk_template]
8059
  if disk_template == constants.DT_DRBD8:
8060
    params = {
8061
      constants.RESYNC_RATE: dt_params[constants.DRBD_RESYNC_RATE]
8062
      }
8063

    
8064
    drbd_params = \
8065
      objects.FillDict(constants.DISK_LD_DEFAULTS[constants.LD_DRBD8], params)
8066

    
8067
    result.append(drbd_params)
8068
    result.append(constants.DISK_LD_DEFAULTS[constants.LD_LV])
8069
    result.append(constants.DISK_LD_DEFAULTS[constants.LD_LV])
8070

    
8071
  elif (disk_template == constants.DT_FILE or
8072
        disk_template == constants.DT_SHARED_FILE):
8073
    result.append(constants.DISK_LD_DEFAULTS[constants.LD_FILE])
8074
  elif disk_template == constants.DT_PLAIN:
8075
    result.append(constants.DISK_LD_DEFAULTS[constants.LD_LV])
8076
  elif disk_template == constants.DT_BLOCK:
8077
    result.append(constants.DISK_LD_DEFAULTS[constants.LD_BLOCKDEV])
8078

    
8079
  return result
8080

    
8081

    
8082
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8083
                         iv_name, p_minor, s_minor, drbd_params, data_params,
8084
                         meta_params):
8085
  """Generate a drbd8 device complete with its children.
8086

8087
  """
8088
  assert len(vgnames) == len(names) == 2
8089
  port = lu.cfg.AllocatePort()
8090
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8091

    
8092
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8093
                          logical_id=(vgnames[0], names[0]),
8094
                          params=data_params)
8095
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8096
                          logical_id=(vgnames[1], names[1]),
8097
                          params=meta_params)
8098
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8099
                          logical_id=(primary, secondary, port,
8100
                                      p_minor, s_minor,
8101
                                      shared_secret),
8102
                          children=[dev_data, dev_meta],
8103
                          iv_name=iv_name, params=drbd_params)
8104
  return drbd_dev
8105

    
8106

    
8107
def _GenerateDiskTemplate(lu, template_name,
8108
                          instance_name, primary_node,
8109
                          secondary_nodes, disk_info,
8110
                          file_storage_dir, file_driver,
8111
                          base_index, feedback_fn, disk_params):
8112
  """Generate the entire disk layout for a given template type.
8113

8114
  """
8115
  #TODO: compute space requirements
8116

    
8117
  vgname = lu.cfg.GetVGName()
8118
  disk_count = len(disk_info)
8119
  disks = []
8120
  ld_params = _ComputeLDParams(template_name, disk_params)
8121
  if template_name == constants.DT_DISKLESS:
8122
    pass
8123
  elif template_name == constants.DT_PLAIN:
8124
    if len(secondary_nodes) != 0:
8125
      raise errors.ProgrammerError("Wrong template configuration")
8126

    
8127
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8128
                                      for i in range(disk_count)])
8129
    for idx, disk in enumerate(disk_info):
8130
      disk_index = idx + base_index
8131
      vg = disk.get(constants.IDISK_VG, vgname)
8132
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8133
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
8134
                              size=disk[constants.IDISK_SIZE],
8135
                              logical_id=(vg, names[idx]),
8136
                              iv_name="disk/%d" % disk_index,
8137
                              mode=disk[constants.IDISK_MODE],
8138
                              params=ld_params[0])
8139
      disks.append(disk_dev)
8140
  elif template_name == constants.DT_DRBD8:
8141
    drbd_params, data_params, meta_params = ld_params
8142
    if len(secondary_nodes) != 1:
8143
      raise errors.ProgrammerError("Wrong template configuration")
8144
    remote_node = secondary_nodes[0]
8145
    minors = lu.cfg.AllocateDRBDMinor(
8146
      [primary_node, remote_node] * len(disk_info), instance_name)
8147

    
8148
    names = []
8149
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8150
                                               for i in range(disk_count)]):
8151
      names.append(lv_prefix + "_data")
8152
      names.append(lv_prefix + "_meta")
8153
    for idx, disk in enumerate(disk_info):
8154
      disk_index = idx + base_index
8155
      data_vg = disk.get(constants.IDISK_VG, vgname)
8156
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
8157
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8158
                                      disk[constants.IDISK_SIZE],
8159
                                      [data_vg, meta_vg],
8160
                                      names[idx * 2:idx * 2 + 2],
8161
                                      "disk/%d" % disk_index,
8162
                                      minors[idx * 2], minors[idx * 2 + 1],
8163
                                      drbd_params, data_params, meta_params)
8164
      disk_dev.mode = disk[constants.IDISK_MODE]
8165
      disks.append(disk_dev)
8166
  elif template_name == constants.DT_FILE:
8167
    if len(secondary_nodes) != 0:
8168
      raise errors.ProgrammerError("Wrong template configuration")
8169

    
8170
    opcodes.RequireFileStorage()
8171

    
8172
    for idx, disk in enumerate(disk_info):
8173
      disk_index = idx + base_index
8174
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8175
                              size=disk[constants.IDISK_SIZE],
8176
                              iv_name="disk/%d" % disk_index,
8177
                              logical_id=(file_driver,
8178
                                          "%s/disk%d" % (file_storage_dir,
8179
                                                         disk_index)),
8180
                              mode=disk[constants.IDISK_MODE],
8181
                              params=ld_params[0])
8182
      disks.append(disk_dev)
8183
  elif template_name == constants.DT_SHARED_FILE:
8184
    if len(secondary_nodes) != 0:
8185
      raise errors.ProgrammerError("Wrong template configuration")
8186

    
8187
    opcodes.RequireSharedFileStorage()
8188

    
8189
    for idx, disk in enumerate(disk_info):
8190
      disk_index = idx + base_index
8191
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8192
                              size=disk[constants.IDISK_SIZE],
8193
                              iv_name="disk/%d" % disk_index,
8194
                              logical_id=(file_driver,
8195
                                          "%s/disk%d" % (file_storage_dir,
8196
                                                         disk_index)),
8197
                              mode=disk[constants.IDISK_MODE],
8198
                              params=ld_params[0])
8199
      disks.append(disk_dev)
8200
  elif template_name == constants.DT_BLOCK:
8201
    if len(secondary_nodes) != 0:
8202
      raise errors.ProgrammerError("Wrong template configuration")
8203

    
8204
    for idx, disk in enumerate(disk_info):
8205
      disk_index = idx + base_index
8206
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8207
                              size=disk[constants.IDISK_SIZE],
8208
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8209
                                          disk[constants.IDISK_ADOPT]),
8210
                              iv_name="disk/%d" % disk_index,
8211
                              mode=disk[constants.IDISK_MODE],
8212
                              params=ld_params[0])
8213
      disks.append(disk_dev)
8214

    
8215
  else:
8216
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8217
  return disks
8218

    
8219

    
8220
def _GetInstanceInfoText(instance):
8221
  """Compute that text that should be added to the disk's metadata.
8222

8223
  """
8224
  return "originstname+%s" % instance.name
8225

    
8226

    
8227
def _CalcEta(time_taken, written, total_size):
8228
  """Calculates the ETA based on size written and total size.
8229

8230
  @param time_taken: The time taken so far
8231
  @param written: amount written so far
8232
  @param total_size: The total size of data to be written
8233
  @return: The remaining time in seconds
8234

8235
  """
8236
  avg_time = time_taken / float(written)
8237
  return (total_size - written) * avg_time
8238

    
8239

    
8240
def _WipeDisks(lu, instance):
8241
  """Wipes instance disks.
8242

8243
  @type lu: L{LogicalUnit}
8244
  @param lu: the logical unit on whose behalf we execute
8245
  @type instance: L{objects.Instance}
8246
  @param instance: the instance whose disks we should create
8247
  @return: the success of the wipe
8248

8249
  """
8250
  node = instance.primary_node
8251

    
8252
  for device in instance.disks:
8253
    lu.cfg.SetDiskID(device, node)
8254

    
8255
  logging.info("Pause sync of instance %s disks", instance.name)
8256
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8257

    
8258
  for idx, success in enumerate(result.payload):
8259
    if not success:
8260
      logging.warn("pause-sync of instance %s for disks %d failed",
8261
                   instance.name, idx)
8262

    
8263
  try:
8264
    for idx, device in enumerate(instance.disks):
8265
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8266
      # MAX_WIPE_CHUNK at max
8267
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8268
                            constants.MIN_WIPE_CHUNK_PERCENT)
8269
      # we _must_ make this an int, otherwise rounding errors will
8270
      # occur
8271
      wipe_chunk_size = int(wipe_chunk_size)
8272

    
8273
      lu.LogInfo("* Wiping disk %d", idx)
8274
      logging.info("Wiping disk %d for instance %s, node %s using"
8275
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8276

    
8277
      offset = 0
8278
      size = device.size
8279
      last_output = 0
8280
      start_time = time.time()
8281

    
8282
      while offset < size:
8283
        wipe_size = min(wipe_chunk_size, size - offset)
8284
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8285
                      idx, offset, wipe_size)
8286
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8287
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8288
                     (idx, offset, wipe_size))
8289
        now = time.time()
8290
        offset += wipe_size
8291
        if now - last_output >= 60:
8292
          eta = _CalcEta(now - start_time, offset, size)
8293
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8294
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8295
          last_output = now
8296
  finally:
8297
    logging.info("Resume sync of instance %s disks", instance.name)
8298

    
8299
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8300

    
8301
    for idx, success in enumerate(result.payload):
8302
      if not success:
8303
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8304
                      " look at the status and troubleshoot the issue", idx)
8305
        logging.warn("resume-sync of instance %s for disks %d failed",
8306
                     instance.name, idx)
8307

    
8308

    
8309
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8310
  """Create all disks for an instance.
8311

8312
  This abstracts away some work from AddInstance.
8313

8314
  @type lu: L{LogicalUnit}
8315
  @param lu: the logical unit on whose behalf we execute
8316
  @type instance: L{objects.Instance}
8317
  @param instance: the instance whose disks we should create
8318
  @type to_skip: list
8319
  @param to_skip: list of indices to skip
8320
  @type target_node: string
8321
  @param target_node: if passed, overrides the target node for creation
8322
  @rtype: boolean
8323
  @return: the success of the creation
8324

8325
  """
8326
  info = _GetInstanceInfoText(instance)
8327
  if target_node is None:
8328
    pnode = instance.primary_node
8329
    all_nodes = instance.all_nodes
8330
  else:
8331
    pnode = target_node
8332
    all_nodes = [pnode]
8333

    
8334
  if instance.disk_template in constants.DTS_FILEBASED:
8335
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8336
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8337

    
8338
    result.Raise("Failed to create directory '%s' on"
8339
                 " node %s" % (file_storage_dir, pnode))
8340

    
8341
  # Note: this needs to be kept in sync with adding of disks in
8342
  # LUInstanceSetParams
8343
  for idx, device in enumerate(instance.disks):
8344
    if to_skip and idx in to_skip:
8345
      continue
8346
    logging.info("Creating volume %s for instance %s",
8347
                 device.iv_name, instance.name)
8348
    #HARDCODE
8349
    for node in all_nodes:
8350
      f_create = node == pnode
8351
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8352

    
8353

    
8354
def _RemoveDisks(lu, instance, target_node=None):
8355
  """Remove all disks for an instance.
8356

8357
  This abstracts away some work from `AddInstance()` and
8358
  `RemoveInstance()`. Note that in case some of the devices couldn't
8359
  be removed, the removal will continue with the other ones (compare
8360
  with `_CreateDisks()`).
8361

8362
  @type lu: L{LogicalUnit}
8363
  @param lu: the logical unit on whose behalf we execute
8364
  @type instance: L{objects.Instance}
8365
  @param instance: the instance whose disks we should remove
8366
  @type target_node: string
8367
  @param target_node: used to override the node on which to remove the disks
8368
  @rtype: boolean
8369
  @return: the success of the removal
8370

8371
  """
8372
  logging.info("Removing block devices for instance %s", instance.name)
8373

    
8374
  all_result = True
8375
  for device in instance.disks:
8376
    if target_node:
8377
      edata = [(target_node, device)]
8378
    else:
8379
      edata = device.ComputeNodeTree(instance.primary_node)
8380
    for node, disk in edata:
8381
      lu.cfg.SetDiskID(disk, node)
8382
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8383
      if msg:
8384
        lu.LogWarning("Could not remove block device %s on node %s,"
8385
                      " continuing anyway: %s", device.iv_name, node, msg)
8386
        all_result = False
8387

    
8388
    # if this is a DRBD disk, return its port to the pool
8389
    if device.dev_type in constants.LDS_DRBD:
8390
      tcp_port = device.logical_id[2]
8391
      lu.cfg.AddTcpUdpPort(tcp_port)
8392

    
8393
  if instance.disk_template == constants.DT_FILE:
8394
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8395
    if target_node:
8396
      tgt = target_node
8397
    else:
8398
      tgt = instance.primary_node
8399
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8400
    if result.fail_msg:
8401
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8402
                    file_storage_dir, instance.primary_node, result.fail_msg)
8403
      all_result = False
8404

    
8405
  return all_result
8406

    
8407

    
8408
def _ComputeDiskSizePerVG(disk_template, disks):
8409
  """Compute disk size requirements in the volume group
8410

8411
  """
8412
  def _compute(disks, payload):
8413
    """Universal algorithm.
8414

8415
    """
8416
    vgs = {}
8417
    for disk in disks:
8418
      vgs[disk[constants.IDISK_VG]] = \
8419
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8420

    
8421
    return vgs
8422

    
8423
  # Required free disk space as a function of disk and swap space
8424
  req_size_dict = {
8425
    constants.DT_DISKLESS: {},
8426
    constants.DT_PLAIN: _compute(disks, 0),
8427
    # 128 MB are added for drbd metadata for each disk
8428
    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8429
    constants.DT_FILE: {},
8430
    constants.DT_SHARED_FILE: {},
8431
  }
8432

    
8433
  if disk_template not in req_size_dict:
8434
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8435
                                 " is unknown" % disk_template)
8436

    
8437
  return req_size_dict[disk_template]
8438

    
8439

    
8440
def _ComputeDiskSize(disk_template, disks):
8441
  """Compute disk size requirements in the volume group
8442

8443
  """
8444
  # Required free disk space as a function of disk and swap space
8445
  req_size_dict = {
8446
    constants.DT_DISKLESS: None,
8447
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8448
    # 128 MB are added for drbd metadata for each disk
8449
    constants.DT_DRBD8:
8450
      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8451
    constants.DT_FILE: None,
8452
    constants.DT_SHARED_FILE: 0,
8453
    constants.DT_BLOCK: 0,
8454
  }
8455

    
8456
  if disk_template not in req_size_dict:
8457
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8458
                                 " is unknown" % disk_template)
8459

    
8460
  return req_size_dict[disk_template]
8461

    
8462

    
8463
def _FilterVmNodes(lu, nodenames):
8464
  """Filters out non-vm_capable nodes from a list.
8465

8466
  @type lu: L{LogicalUnit}
8467
  @param lu: the logical unit for which we check
8468
  @type nodenames: list
8469
  @param nodenames: the list of nodes on which we should check
8470
  @rtype: list
8471
  @return: the list of vm-capable nodes
8472

8473
  """
8474
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8475
  return [name for name in nodenames if name not in vm_nodes]
8476

    
8477

    
8478
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8479
  """Hypervisor parameter validation.
8480

8481
  This function abstract the hypervisor parameter validation to be
8482
  used in both instance create and instance modify.
8483

8484
  @type lu: L{LogicalUnit}
8485
  @param lu: the logical unit for which we check
8486
  @type nodenames: list
8487
  @param nodenames: the list of nodes on which we should check
8488
  @type hvname: string
8489
  @param hvname: the name of the hypervisor we should use
8490
  @type hvparams: dict
8491
  @param hvparams: the parameters which we need to check
8492
  @raise errors.OpPrereqError: if the parameters are not valid
8493

8494
  """
8495
  nodenames = _FilterVmNodes(lu, nodenames)
8496

    
8497
  cluster = lu.cfg.GetClusterInfo()
8498
  hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8499

    
8500
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8501
  for node in nodenames:
8502
    info = hvinfo[node]
8503
    if info.offline:
8504
      continue
8505
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8506

    
8507

    
8508
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8509
  """OS parameters validation.
8510

8511
  @type lu: L{LogicalUnit}
8512
  @param lu: the logical unit for which we check
8513
  @type required: boolean
8514
  @param required: whether the validation should fail if the OS is not
8515
      found
8516
  @type nodenames: list
8517
  @param nodenames: the list of nodes on which we should check
8518
  @type osname: string
8519
  @param osname: the name of the hypervisor we should use
8520
  @type osparams: dict
8521
  @param osparams: the parameters which we need to check
8522
  @raise errors.OpPrereqError: if the parameters are not valid
8523

8524
  """
8525
  nodenames = _FilterVmNodes(lu, nodenames)
8526
  result = lu.rpc.call_os_validate(nodenames, required, osname,
8527
                                   [constants.OS_VALIDATE_PARAMETERS],
8528
                                   osparams)
8529
  for node, nres in result.items():
8530
    # we don't check for offline cases since this should be run only
8531
    # against the master node and/or an instance's nodes
8532
    nres.Raise("OS Parameters validation failed on node %s" % node)
8533
    if not nres.payload:
8534
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8535
                 osname, node)
8536

    
8537

    
8538
class LUInstanceCreate(LogicalUnit):
8539
  """Create an instance.
8540

8541
  """
8542
  HPATH = "instance-add"
8543
  HTYPE = constants.HTYPE_INSTANCE
8544
  REQ_BGL = False
8545

    
8546
  def CheckArguments(self):
8547
    """Check arguments.
8548

8549
    """
8550
    # do not require name_check to ease forward/backward compatibility
8551
    # for tools
8552
    if self.op.no_install and self.op.start:
8553
      self.LogInfo("No-installation mode selected, disabling startup")
8554
      self.op.start = False
8555
    # validate/normalize the instance name
8556
    self.op.instance_name = \
8557
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8558

    
8559
    if self.op.ip_check and not self.op.name_check:
8560
      # TODO: make the ip check more flexible and not depend on the name check
8561
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8562
                                 " check", errors.ECODE_INVAL)
8563

    
8564
    # check nics' parameter names
8565
    for nic in self.op.nics:
8566
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8567

    
8568
    # check disks. parameter names and consistent adopt/no-adopt strategy
8569
    has_adopt = has_no_adopt = False
8570
    for disk in self.op.disks:
8571
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8572
      if constants.IDISK_ADOPT in disk:
8573
        has_adopt = True
8574
      else:
8575
        has_no_adopt = True
8576
    if has_adopt and has_no_adopt:
8577
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8578
                                 errors.ECODE_INVAL)
8579
    if has_adopt:
8580
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8581
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8582
                                   " '%s' disk template" %
8583
                                   self.op.disk_template,
8584
                                   errors.ECODE_INVAL)
8585
      if self.op.iallocator is not None:
8586
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8587
                                   " iallocator script", errors.ECODE_INVAL)
8588
      if self.op.mode == constants.INSTANCE_IMPORT:
8589
        raise errors.OpPrereqError("Disk adoption not allowed for"
8590
                                   " instance import", errors.ECODE_INVAL)
8591
    else:
8592
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8593
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8594
                                   " but no 'adopt' parameter given" %
8595
                                   self.op.disk_template,
8596
                                   errors.ECODE_INVAL)
8597

    
8598
    self.adopt_disks = has_adopt
8599

    
8600
    # instance name verification
8601
    if self.op.name_check:
8602
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8603
      self.op.instance_name = self.hostname1.name
8604
      # used in CheckPrereq for ip ping check
8605
      self.check_ip = self.hostname1.ip
8606
    else:
8607
      self.check_ip = None
8608

    
8609
    # file storage checks
8610
    if (self.op.file_driver and
8611
        not self.op.file_driver in constants.FILE_DRIVER):
8612
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8613
                                 self.op.file_driver, errors.ECODE_INVAL)
8614

    
8615
    if self.op.disk_template == constants.DT_FILE:
8616
      opcodes.RequireFileStorage()
8617
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8618
      opcodes.RequireSharedFileStorage()
8619

    
8620
    ### Node/iallocator related checks
8621
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8622

    
8623
    if self.op.pnode is not None:
8624
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8625
        if self.op.snode is None:
8626
          raise errors.OpPrereqError("The networked disk templates need"
8627
                                     " a mirror node", errors.ECODE_INVAL)
8628
      elif self.op.snode:
8629
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8630
                        " template")
8631
        self.op.snode = None
8632

    
8633
    self._cds = _GetClusterDomainSecret()
8634

    
8635
    if self.op.mode == constants.INSTANCE_IMPORT:
8636
      # On import force_variant must be True, because if we forced it at
8637
      # initial install, our only chance when importing it back is that it
8638
      # works again!
8639
      self.op.force_variant = True
8640

    
8641
      if self.op.no_install:
8642
        self.LogInfo("No-installation mode has no effect during import")
8643

    
8644
    elif self.op.mode == constants.INSTANCE_CREATE:
8645
      if self.op.os_type is None:
8646
        raise errors.OpPrereqError("No guest OS specified",
8647
                                   errors.ECODE_INVAL)
8648
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8649
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8650
                                   " installation" % self.op.os_type,
8651
                                   errors.ECODE_STATE)
8652
      if self.op.disk_template is None:
8653
        raise errors.OpPrereqError("No disk template specified",
8654
                                   errors.ECODE_INVAL)
8655

    
8656
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8657
      # Check handshake to ensure both clusters have the same domain secret
8658
      src_handshake = self.op.source_handshake
8659
      if not src_handshake:
8660
        raise errors.OpPrereqError("Missing source handshake",
8661
                                   errors.ECODE_INVAL)
8662

    
8663
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8664
                                                           src_handshake)
8665
      if errmsg:
8666
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8667
                                   errors.ECODE_INVAL)
8668

    
8669
      # Load and check source CA
8670
      self.source_x509_ca_pem = self.op.source_x509_ca
8671
      if not self.source_x509_ca_pem:
8672
        raise errors.OpPrereqError("Missing source X509 CA",
8673
                                   errors.ECODE_INVAL)
8674

    
8675
      try:
8676
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8677
                                                    self._cds)
8678
      except OpenSSL.crypto.Error, err:
8679
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8680
                                   (err, ), errors.ECODE_INVAL)
8681

    
8682
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8683
      if errcode is not None:
8684
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8685
                                   errors.ECODE_INVAL)
8686

    
8687
      self.source_x509_ca = cert
8688

    
8689
      src_instance_name = self.op.source_instance_name
8690
      if not src_instance_name:
8691
        raise errors.OpPrereqError("Missing source instance name",
8692
                                   errors.ECODE_INVAL)
8693

    
8694
      self.source_instance_name = \
8695
          netutils.GetHostname(name=src_instance_name).name
8696

    
8697
    else:
8698
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8699
                                 self.op.mode, errors.ECODE_INVAL)
8700

    
8701
  def ExpandNames(self):
8702
    """ExpandNames for CreateInstance.
8703

8704
    Figure out the right locks for instance creation.
8705

8706
    """
8707
    self.needed_locks = {}
8708

    
8709
    instance_name = self.op.instance_name
8710
    # this is just a preventive check, but someone might still add this
8711
    # instance in the meantime, and creation will fail at lock-add time
8712
    if instance_name in self.cfg.GetInstanceList():
8713
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8714
                                 instance_name, errors.ECODE_EXISTS)
8715

    
8716
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8717

    
8718
    if self.op.iallocator:
8719
      # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8720
      # specifying a group on instance creation and then selecting nodes from
8721
      # that group
8722
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8723
      self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8724
    else:
8725
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8726
      nodelist = [self.op.pnode]
8727
      if self.op.snode is not None:
8728
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8729
        nodelist.append(self.op.snode)
8730
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8731
      # Lock resources of instance's primary and secondary nodes (copy to
8732
      # prevent accidential modification)
8733
      self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8734

    
8735
    # in case of import lock the source node too
8736
    if self.op.mode == constants.INSTANCE_IMPORT:
8737
      src_node = self.op.src_node
8738
      src_path = self.op.src_path
8739

    
8740
      if src_path is None:
8741
        self.op.src_path = src_path = self.op.instance_name
8742

    
8743
      if src_node is None:
8744
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8745
        self.op.src_node = None
8746
        if os.path.isabs(src_path):
8747
          raise errors.OpPrereqError("Importing an instance from a path"
8748
                                     " requires a source node option",
8749
                                     errors.ECODE_INVAL)
8750
      else:
8751
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8752
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8753
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8754
        if not os.path.isabs(src_path):
8755
          self.op.src_path = src_path = \
8756
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8757

    
8758
  def _RunAllocator(self):
8759
    """Run the allocator based on input opcode.
8760

8761
    """
8762
    nics = [n.ToDict() for n in self.nics]
8763
    ial = IAllocator(self.cfg, self.rpc,
8764
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8765
                     name=self.op.instance_name,
8766
                     disk_template=self.op.disk_template,
8767
                     tags=self.op.tags,
8768
                     os=self.op.os_type,
8769
                     vcpus=self.be_full[constants.BE_VCPUS],
8770
                     memory=self.be_full[constants.BE_MAXMEM],
8771
                     disks=self.disks,
8772
                     nics=nics,
8773
                     hypervisor=self.op.hypervisor,
8774
                     )
8775

    
8776
    ial.Run(self.op.iallocator)
8777

    
8778
    if not ial.success:
8779
      raise errors.OpPrereqError("Can't compute nodes using"
8780
                                 " iallocator '%s': %s" %
8781
                                 (self.op.iallocator, ial.info),
8782
                                 errors.ECODE_NORES)
8783
    if len(ial.result) != ial.required_nodes:
8784
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8785
                                 " of nodes (%s), required %s" %
8786
                                 (self.op.iallocator, len(ial.result),
8787
                                  ial.required_nodes), errors.ECODE_FAULT)
8788
    self.op.pnode = ial.result[0]
8789
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8790
                 self.op.instance_name, self.op.iallocator,
8791
                 utils.CommaJoin(ial.result))
8792
    if ial.required_nodes == 2:
8793
      self.op.snode = ial.result[1]
8794

    
8795
  def BuildHooksEnv(self):
8796
    """Build hooks env.
8797

8798
    This runs on master, primary and secondary nodes of the instance.
8799

8800
    """
8801
    env = {
8802
      "ADD_MODE": self.op.mode,
8803
      }
8804
    if self.op.mode == constants.INSTANCE_IMPORT:
8805
      env["SRC_NODE"] = self.op.src_node
8806
      env["SRC_PATH"] = self.op.src_path
8807
      env["SRC_IMAGES"] = self.src_images
8808

    
8809
    env.update(_BuildInstanceHookEnv(
8810
      name=self.op.instance_name,
8811
      primary_node=self.op.pnode,
8812
      secondary_nodes=self.secondaries,
8813
      status=self.op.start,
8814
      os_type=self.op.os_type,
8815
      minmem=self.be_full[constants.BE_MINMEM],
8816
      maxmem=self.be_full[constants.BE_MAXMEM],
8817
      vcpus=self.be_full[constants.BE_VCPUS],
8818
      nics=_NICListToTuple(self, self.nics),
8819
      disk_template=self.op.disk_template,
8820
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8821
             for d in self.disks],
8822
      bep=self.be_full,
8823
      hvp=self.hv_full,
8824
      hypervisor_name=self.op.hypervisor,
8825
      tags=self.op.tags,
8826
    ))
8827

    
8828
    return env
8829

    
8830
  def BuildHooksNodes(self):
8831
    """Build hooks nodes.
8832

8833
    """
8834
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8835
    return nl, nl
8836

    
8837
  def _ReadExportInfo(self):
8838
    """Reads the export information from disk.
8839

8840
    It will override the opcode source node and path with the actual
8841
    information, if these two were not specified before.
8842

8843
    @return: the export information
8844

8845
    """
8846
    assert self.op.mode == constants.INSTANCE_IMPORT
8847

    
8848
    src_node = self.op.src_node
8849
    src_path = self.op.src_path
8850

    
8851
    if src_node is None:
8852
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8853
      exp_list = self.rpc.call_export_list(locked_nodes)
8854
      found = False
8855
      for node in exp_list:
8856
        if exp_list[node].fail_msg:
8857
          continue
8858
        if src_path in exp_list[node].payload:
8859
          found = True
8860
          self.op.src_node = src_node = node
8861
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8862
                                                       src_path)
8863
          break
8864
      if not found:
8865
        raise errors.OpPrereqError("No export found for relative path %s" %
8866
                                    src_path, errors.ECODE_INVAL)
8867

    
8868
    _CheckNodeOnline(self, src_node)
8869
    result = self.rpc.call_export_info(src_node, src_path)
8870
    result.Raise("No export or invalid export found in dir %s" % src_path)
8871

    
8872
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8873
    if not export_info.has_section(constants.INISECT_EXP):
8874
      raise errors.ProgrammerError("Corrupted export config",
8875
                                   errors.ECODE_ENVIRON)
8876

    
8877
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8878
    if (int(ei_version) != constants.EXPORT_VERSION):
8879
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8880
                                 (ei_version, constants.EXPORT_VERSION),
8881
                                 errors.ECODE_ENVIRON)
8882
    return export_info
8883

    
8884
  def _ReadExportParams(self, einfo):
8885
    """Use export parameters as defaults.
8886

8887
    In case the opcode doesn't specify (as in override) some instance
8888
    parameters, then try to use them from the export information, if
8889
    that declares them.
8890

8891
    """
8892
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8893

    
8894
    if self.op.disk_template is None:
8895
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8896
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8897
                                          "disk_template")
8898
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8899
          raise errors.OpPrereqError("Disk template specified in configuration"
8900
                                     " file is not one of the allowed values:"
8901
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8902
      else:
8903
        raise errors.OpPrereqError("No disk template specified and the export"
8904
                                   " is missing the disk_template information",
8905
                                   errors.ECODE_INVAL)
8906

    
8907
    if not self.op.disks:
8908
      disks = []
8909
      # TODO: import the disk iv_name too
8910
      for idx in range(constants.MAX_DISKS):
8911
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8912
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8913
          disks.append({constants.IDISK_SIZE: disk_sz})
8914
      self.op.disks = disks
8915
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8916
        raise errors.OpPrereqError("No disk info specified and the export"
8917
                                   " is missing the disk information",
8918
                                   errors.ECODE_INVAL)
8919

    
8920
    if not self.op.nics:
8921
      nics = []
8922
      for idx in range(constants.MAX_NICS):
8923
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8924
          ndict = {}
8925
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8926
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8927
            ndict[name] = v
8928
          nics.append(ndict)
8929
        else:
8930
          break
8931
      self.op.nics = nics
8932

    
8933
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8934
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8935

    
8936
    if (self.op.hypervisor is None and
8937
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8938
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8939

    
8940
    if einfo.has_section(constants.INISECT_HYP):
8941
      # use the export parameters but do not override the ones
8942
      # specified by the user
8943
      for name, value in einfo.items(constants.INISECT_HYP):
8944
        if name not in self.op.hvparams:
8945
          self.op.hvparams[name] = value
8946

    
8947
    if einfo.has_section(constants.INISECT_BEP):
8948
      # use the parameters, without overriding
8949
      for name, value in einfo.items(constants.INISECT_BEP):
8950
        if name not in self.op.beparams:
8951
          self.op.beparams[name] = value
8952
        # Compatibility for the old "memory" be param
8953
        if name == constants.BE_MEMORY:
8954
          if constants.BE_MAXMEM not in self.op.beparams:
8955
            self.op.beparams[constants.BE_MAXMEM] = value
8956
          if constants.BE_MINMEM not in self.op.beparams:
8957
            self.op.beparams[constants.BE_MINMEM] = value
8958
    else:
8959
      # try to read the parameters old style, from the main section
8960
      for name in constants.BES_PARAMETERS:
8961
        if (name not in self.op.beparams and
8962
            einfo.has_option(constants.INISECT_INS, name)):
8963
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8964

    
8965
    if einfo.has_section(constants.INISECT_OSP):
8966
      # use the parameters, without overriding
8967
      for name, value in einfo.items(constants.INISECT_OSP):
8968
        if name not in self.op.osparams:
8969
          self.op.osparams[name] = value
8970

    
8971
  def _RevertToDefaults(self, cluster):
8972
    """Revert the instance parameters to the default values.
8973

8974
    """
8975
    # hvparams
8976
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8977
    for name in self.op.hvparams.keys():
8978
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8979
        del self.op.hvparams[name]
8980
    # beparams
8981
    be_defs = cluster.SimpleFillBE({})
8982
    for name in self.op.beparams.keys():
8983
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8984
        del self.op.beparams[name]
8985
    # nic params
8986
    nic_defs = cluster.SimpleFillNIC({})
8987
    for nic in self.op.nics:
8988
      for name in constants.NICS_PARAMETERS:
8989
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8990
          del nic[name]
8991
    # osparams
8992
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8993
    for name in self.op.osparams.keys():
8994
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8995
        del self.op.osparams[name]
8996

    
8997
  def _CalculateFileStorageDir(self):
8998
    """Calculate final instance file storage dir.
8999

9000
    """
9001
    # file storage dir calculation/check
9002
    self.instance_file_storage_dir = None
9003
    if self.op.disk_template in constants.DTS_FILEBASED:
9004
      # build the full file storage dir path
9005
      joinargs = []
9006

    
9007
      if self.op.disk_template == constants.DT_SHARED_FILE:
9008
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
9009
      else:
9010
        get_fsd_fn = self.cfg.GetFileStorageDir
9011

    
9012
      cfg_storagedir = get_fsd_fn()
9013
      if not cfg_storagedir:
9014
        raise errors.OpPrereqError("Cluster file storage dir not defined")
9015
      joinargs.append(cfg_storagedir)
9016

    
9017
      if self.op.file_storage_dir is not None:
9018
        joinargs.append(self.op.file_storage_dir)
9019

    
9020
      joinargs.append(self.op.instance_name)
9021

    
9022
      # pylint: disable=W0142
9023
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
9024

    
9025
  def CheckPrereq(self):
9026
    """Check prerequisites.
9027

9028
    """
9029
    self._CalculateFileStorageDir()
9030

    
9031
    if self.op.mode == constants.INSTANCE_IMPORT:
9032
      export_info = self._ReadExportInfo()
9033
      self._ReadExportParams(export_info)
9034

    
9035
    if (not self.cfg.GetVGName() and
9036
        self.op.disk_template not in constants.DTS_NOT_LVM):
9037
      raise errors.OpPrereqError("Cluster does not support lvm-based"
9038
                                 " instances", errors.ECODE_STATE)
9039

    
9040
    if (self.op.hypervisor is None or
9041
        self.op.hypervisor == constants.VALUE_AUTO):
9042
      self.op.hypervisor = self.cfg.GetHypervisorType()
9043

    
9044
    cluster = self.cfg.GetClusterInfo()
9045
    enabled_hvs = cluster.enabled_hypervisors
9046
    if self.op.hypervisor not in enabled_hvs:
9047
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
9048
                                 " cluster (%s)" % (self.op.hypervisor,
9049
                                  ",".join(enabled_hvs)),
9050
                                 errors.ECODE_STATE)
9051

    
9052
    # Check tag validity
9053
    for tag in self.op.tags:
9054
      objects.TaggableObject.ValidateTag(tag)
9055

    
9056
    # check hypervisor parameter syntax (locally)
9057
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
9058
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
9059
                                      self.op.hvparams)
9060
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
9061
    hv_type.CheckParameterSyntax(filled_hvp)
9062
    self.hv_full = filled_hvp
9063
    # check that we don't specify global parameters on an instance
9064
    _CheckGlobalHvParams(self.op.hvparams)
9065

    
9066
    # fill and remember the beparams dict
9067
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
9068
    for param, value in self.op.beparams.iteritems():
9069
      if value == constants.VALUE_AUTO:
9070
        self.op.beparams[param] = default_beparams[param]
9071
    objects.UpgradeBeParams(self.op.beparams)
9072
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
9073
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
9074

    
9075
    # build os parameters
9076
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
9077

    
9078
    # now that hvp/bep are in final format, let's reset to defaults,
9079
    # if told to do so
9080
    if self.op.identify_defaults:
9081
      self._RevertToDefaults(cluster)
9082

    
9083
    # NIC buildup
9084
    self.nics = []
9085
    for idx, nic in enumerate(self.op.nics):
9086
      nic_mode_req = nic.get(constants.INIC_MODE, None)
9087
      nic_mode = nic_mode_req
9088
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9089
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9090

    
9091
      # in routed mode, for the first nic, the default ip is 'auto'
9092
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9093
        default_ip_mode = constants.VALUE_AUTO
9094
      else:
9095
        default_ip_mode = constants.VALUE_NONE
9096

    
9097
      # ip validity checks
9098
      ip = nic.get(constants.INIC_IP, default_ip_mode)
9099
      if ip is None or ip.lower() == constants.VALUE_NONE:
9100
        nic_ip = None
9101
      elif ip.lower() == constants.VALUE_AUTO:
9102
        if not self.op.name_check:
9103
          raise errors.OpPrereqError("IP address set to auto but name checks"
9104
                                     " have been skipped",
9105
                                     errors.ECODE_INVAL)
9106
        nic_ip = self.hostname1.ip
9107
      else:
9108
        if not netutils.IPAddress.IsValid(ip):
9109
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9110
                                     errors.ECODE_INVAL)
9111
        nic_ip = ip
9112

    
9113
      # TODO: check the ip address for uniqueness
9114
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9115
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
9116
                                   errors.ECODE_INVAL)
9117

    
9118
      # MAC address verification
9119
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9120
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9121
        mac = utils.NormalizeAndValidateMac(mac)
9122

    
9123
        try:
9124
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
9125
        except errors.ReservationError:
9126
          raise errors.OpPrereqError("MAC address %s already in use"
9127
                                     " in cluster" % mac,
9128
                                     errors.ECODE_NOTUNIQUE)
9129

    
9130
      #  Build nic parameters
9131
      link = nic.get(constants.INIC_LINK, None)
9132
      if link == constants.VALUE_AUTO:
9133
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9134
      nicparams = {}
9135
      if nic_mode_req:
9136
        nicparams[constants.NIC_MODE] = nic_mode
9137
      if link:
9138
        nicparams[constants.NIC_LINK] = link
9139

    
9140
      check_params = cluster.SimpleFillNIC(nicparams)
9141
      objects.NIC.CheckParameterSyntax(check_params)
9142
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9143

    
9144
    # disk checks/pre-build
9145
    default_vg = self.cfg.GetVGName()
9146
    self.disks = []
9147
    for disk in self.op.disks:
9148
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9149
      if mode not in constants.DISK_ACCESS_SET:
9150
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9151
                                   mode, errors.ECODE_INVAL)
9152
      size = disk.get(constants.IDISK_SIZE, None)
9153
      if size is None:
9154
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9155
      try:
9156
        size = int(size)
9157
      except (TypeError, ValueError):
9158
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9159
                                   errors.ECODE_INVAL)
9160

    
9161
      data_vg = disk.get(constants.IDISK_VG, default_vg)
9162
      new_disk = {
9163
        constants.IDISK_SIZE: size,
9164
        constants.IDISK_MODE: mode,
9165
        constants.IDISK_VG: data_vg,
9166
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
9167
        }
9168
      if constants.IDISK_ADOPT in disk:
9169
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9170
      self.disks.append(new_disk)
9171

    
9172
    if self.op.mode == constants.INSTANCE_IMPORT:
9173
      disk_images = []
9174
      for idx in range(len(self.disks)):
9175
        option = "disk%d_dump" % idx
9176
        if export_info.has_option(constants.INISECT_INS, option):
9177
          # FIXME: are the old os-es, disk sizes, etc. useful?
9178
          export_name = export_info.get(constants.INISECT_INS, option)
9179
          image = utils.PathJoin(self.op.src_path, export_name)
9180
          disk_images.append(image)
9181
        else:
9182
          disk_images.append(False)
9183

    
9184
      self.src_images = disk_images
9185

    
9186
      old_name = export_info.get(constants.INISECT_INS, "name")
9187
      if self.op.instance_name == old_name:
9188
        for idx, nic in enumerate(self.nics):
9189
          if nic.mac == constants.VALUE_AUTO:
9190
            nic_mac_ini = "nic%d_mac" % idx
9191
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9192

    
9193
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9194

    
9195
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
9196
    if self.op.ip_check:
9197
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9198
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
9199
                                   (self.check_ip, self.op.instance_name),
9200
                                   errors.ECODE_NOTUNIQUE)
9201

    
9202
    #### mac address generation
9203
    # By generating here the mac address both the allocator and the hooks get
9204
    # the real final mac address rather than the 'auto' or 'generate' value.
9205
    # There is a race condition between the generation and the instance object
9206
    # creation, which means that we know the mac is valid now, but we're not
9207
    # sure it will be when we actually add the instance. If things go bad
9208
    # adding the instance will abort because of a duplicate mac, and the
9209
    # creation job will fail.
9210
    for nic in self.nics:
9211
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9212
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9213

    
9214
    #### allocator run
9215

    
9216
    if self.op.iallocator is not None:
9217
      self._RunAllocator()
9218

    
9219
    # Release all unneeded node locks
9220
    _ReleaseLocks(self, locking.LEVEL_NODE,
9221
                  keep=filter(None, [self.op.pnode, self.op.snode,
9222
                                     self.op.src_node]))
9223

    
9224
    #### node related checks
9225

    
9226
    # check primary node
9227
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9228
    assert self.pnode is not None, \
9229
      "Cannot retrieve locked node %s" % self.op.pnode
9230
    if pnode.offline:
9231
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9232
                                 pnode.name, errors.ECODE_STATE)
9233
    if pnode.drained:
9234
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9235
                                 pnode.name, errors.ECODE_STATE)
9236
    if not pnode.vm_capable:
9237
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9238
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
9239

    
9240
    self.secondaries = []
9241

    
9242
    # mirror node verification
9243
    if self.op.disk_template in constants.DTS_INT_MIRROR:
9244
      if self.op.snode == pnode.name:
9245
        raise errors.OpPrereqError("The secondary node cannot be the"
9246
                                   " primary node", errors.ECODE_INVAL)
9247
      _CheckNodeOnline(self, self.op.snode)
9248
      _CheckNodeNotDrained(self, self.op.snode)
9249
      _CheckNodeVmCapable(self, self.op.snode)
9250
      self.secondaries.append(self.op.snode)
9251

    
9252
      snode = self.cfg.GetNodeInfo(self.op.snode)
9253
      if pnode.group != snode.group:
9254
        self.LogWarning("The primary and secondary nodes are in two"
9255
                        " different node groups; the disk parameters"
9256
                        " from the first disk's node group will be"
9257
                        " used")
9258

    
9259
    nodenames = [pnode.name] + self.secondaries
9260

    
9261
    # disk parameters (not customizable at instance or node level)
9262
    # just use the primary node parameters, ignoring the secondary.
9263
    self.diskparams = self.cfg.GetNodeGroup(pnode.group).diskparams
9264

    
9265
    if not self.adopt_disks:
9266
      # Check lv size requirements, if not adopting
9267
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9268
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9269

    
9270
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9271
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9272
                                disk[constants.IDISK_ADOPT])
9273
                     for disk in self.disks])
9274
      if len(all_lvs) != len(self.disks):
9275
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
9276
                                   errors.ECODE_INVAL)
9277
      for lv_name in all_lvs:
9278
        try:
9279
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9280
          # to ReserveLV uses the same syntax
9281
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9282
        except errors.ReservationError:
9283
          raise errors.OpPrereqError("LV named %s used by another instance" %
9284
                                     lv_name, errors.ECODE_NOTUNIQUE)
9285

    
9286
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9287
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9288

    
9289
      node_lvs = self.rpc.call_lv_list([pnode.name],
9290
                                       vg_names.payload.keys())[pnode.name]
9291
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9292
      node_lvs = node_lvs.payload
9293

    
9294
      delta = all_lvs.difference(node_lvs.keys())
9295
      if delta:
9296
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
9297
                                   utils.CommaJoin(delta),
9298
                                   errors.ECODE_INVAL)
9299
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9300
      if online_lvs:
9301
        raise errors.OpPrereqError("Online logical volumes found, cannot"
9302
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
9303
                                   errors.ECODE_STATE)
9304
      # update the size of disk based on what is found
9305
      for dsk in self.disks:
9306
        dsk[constants.IDISK_SIZE] = \
9307
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9308
                                        dsk[constants.IDISK_ADOPT])][0]))
9309

    
9310
    elif self.op.disk_template == constants.DT_BLOCK:
9311
      # Normalize and de-duplicate device paths
9312
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9313
                       for disk in self.disks])
9314
      if len(all_disks) != len(self.disks):
9315
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9316
                                   errors.ECODE_INVAL)
9317
      baddisks = [d for d in all_disks
9318
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9319
      if baddisks:
9320
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9321
                                   " cannot be adopted" %
9322
                                   (", ".join(baddisks),
9323
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9324
                                   errors.ECODE_INVAL)
9325

    
9326
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9327
                                            list(all_disks))[pnode.name]
9328
      node_disks.Raise("Cannot get block device information from node %s" %
9329
                       pnode.name)
9330
      node_disks = node_disks.payload
9331
      delta = all_disks.difference(node_disks.keys())
9332
      if delta:
9333
        raise errors.OpPrereqError("Missing block device(s): %s" %
9334
                                   utils.CommaJoin(delta),
9335
                                   errors.ECODE_INVAL)
9336
      for dsk in self.disks:
9337
        dsk[constants.IDISK_SIZE] = \
9338
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9339

    
9340
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9341

    
9342
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9343
    # check OS parameters (remotely)
9344
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9345

    
9346
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9347

    
9348
    # memory check on primary node
9349
    #TODO(dynmem): use MINMEM for checking
9350
    if self.op.start:
9351
      _CheckNodeFreeMemory(self, self.pnode.name,
9352
                           "creating instance %s" % self.op.instance_name,
9353
                           self.be_full[constants.BE_MAXMEM],
9354
                           self.op.hypervisor)
9355

    
9356
    self.dry_run_result = list(nodenames)
9357

    
9358
  def Exec(self, feedback_fn):
9359
    """Create and add the instance to the cluster.
9360

9361
    """
9362
    instance = self.op.instance_name
9363
    pnode_name = self.pnode.name
9364

    
9365
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9366
                self.owned_locks(locking.LEVEL_NODE)), \
9367
      "Node locks differ from node resource locks"
9368

    
9369
    ht_kind = self.op.hypervisor
9370
    if ht_kind in constants.HTS_REQ_PORT:
9371
      network_port = self.cfg.AllocatePort()
9372
    else:
9373
      network_port = None
9374

    
9375
    disks = _GenerateDiskTemplate(self,
9376
                                  self.op.disk_template,
9377
                                  instance, pnode_name,
9378
                                  self.secondaries,
9379
                                  self.disks,
9380
                                  self.instance_file_storage_dir,
9381
                                  self.op.file_driver,
9382
                                  0,
9383
                                  feedback_fn,
9384
                                  self.diskparams)
9385

    
9386
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9387
                            primary_node=pnode_name,
9388
                            nics=self.nics, disks=disks,
9389
                            disk_template=self.op.disk_template,
9390
                            admin_state=constants.ADMINST_DOWN,
9391
                            network_port=network_port,
9392
                            beparams=self.op.beparams,
9393
                            hvparams=self.op.hvparams,
9394
                            hypervisor=self.op.hypervisor,
9395
                            osparams=self.op.osparams,
9396
                            )
9397

    
9398
    if self.op.tags:
9399
      for tag in self.op.tags:
9400
        iobj.AddTag(tag)
9401

    
9402
    if self.adopt_disks:
9403
      if self.op.disk_template == constants.DT_PLAIN:
9404
        # rename LVs to the newly-generated names; we need to construct
9405
        # 'fake' LV disks with the old data, plus the new unique_id
9406
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9407
        rename_to = []
9408
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9409
          rename_to.append(t_dsk.logical_id)
9410
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9411
          self.cfg.SetDiskID(t_dsk, pnode_name)
9412
        result = self.rpc.call_blockdev_rename(pnode_name,
9413
                                               zip(tmp_disks, rename_to))
9414
        result.Raise("Failed to rename adoped LVs")
9415
    else:
9416
      feedback_fn("* creating instance disks...")
9417
      try:
9418
        _CreateDisks(self, iobj)
9419
      except errors.OpExecError:
9420
        self.LogWarning("Device creation failed, reverting...")
9421
        try:
9422
          _RemoveDisks(self, iobj)
9423
        finally:
9424
          self.cfg.ReleaseDRBDMinors(instance)
9425
          raise
9426

    
9427
    feedback_fn("adding instance %s to cluster config" % instance)
9428

    
9429
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9430

    
9431
    # Declare that we don't want to remove the instance lock anymore, as we've
9432
    # added the instance to the config
9433
    del self.remove_locks[locking.LEVEL_INSTANCE]
9434

    
9435
    if self.op.mode == constants.INSTANCE_IMPORT:
9436
      # Release unused nodes
9437
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9438
    else:
9439
      # Release all nodes
9440
      _ReleaseLocks(self, locking.LEVEL_NODE)
9441

    
9442
    disk_abort = False
9443
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9444
      feedback_fn("* wiping instance disks...")
9445
      try:
9446
        _WipeDisks(self, iobj)
9447
      except errors.OpExecError, err:
9448
        logging.exception("Wiping disks failed")
9449
        self.LogWarning("Wiping instance disks failed (%s)", err)
9450
        disk_abort = True
9451

    
9452
    if disk_abort:
9453
      # Something is already wrong with the disks, don't do anything else
9454
      pass
9455
    elif self.op.wait_for_sync:
9456
      disk_abort = not _WaitForSync(self, iobj)
9457
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9458
      # make sure the disks are not degraded (still sync-ing is ok)
9459
      feedback_fn("* checking mirrors status")
9460
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9461
    else:
9462
      disk_abort = False
9463

    
9464
    if disk_abort:
9465
      _RemoveDisks(self, iobj)
9466
      self.cfg.RemoveInstance(iobj.name)
9467
      # Make sure the instance lock gets removed
9468
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9469
      raise errors.OpExecError("There are some degraded disks for"
9470
                               " this instance")
9471

    
9472
    # Release all node resource locks
9473
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9474

    
9475
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9476
      if self.op.mode == constants.INSTANCE_CREATE:
9477
        if not self.op.no_install:
9478
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9479
                        not self.op.wait_for_sync)
9480
          if pause_sync:
9481
            feedback_fn("* pausing disk sync to install instance OS")
9482
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9483
                                                              iobj.disks, True)
9484
            for idx, success in enumerate(result.payload):
9485
              if not success:
9486
                logging.warn("pause-sync of instance %s for disk %d failed",
9487
                             instance, idx)
9488

    
9489
          feedback_fn("* running the instance OS create scripts...")
9490
          # FIXME: pass debug option from opcode to backend
9491
          os_add_result = \
9492
            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9493
                                          self.op.debug_level)
9494
          if pause_sync:
9495
            feedback_fn("* resuming disk sync")
9496
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9497
                                                              iobj.disks, False)
9498
            for idx, success in enumerate(result.payload):
9499
              if not success:
9500
                logging.warn("resume-sync of instance %s for disk %d failed",
9501
                             instance, idx)
9502

    
9503
          os_add_result.Raise("Could not add os for instance %s"
9504
                              " on node %s" % (instance, pnode_name))
9505

    
9506
      elif self.op.mode == constants.INSTANCE_IMPORT:
9507
        feedback_fn("* running the instance OS import scripts...")
9508

    
9509
        transfers = []
9510

    
9511
        for idx, image in enumerate(self.src_images):
9512
          if not image:
9513
            continue
9514

    
9515
          # FIXME: pass debug option from opcode to backend
9516
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9517
                                             constants.IEIO_FILE, (image, ),
9518
                                             constants.IEIO_SCRIPT,
9519
                                             (iobj.disks[idx], idx),
9520
                                             None)
9521
          transfers.append(dt)
9522

    
9523
        import_result = \
9524
          masterd.instance.TransferInstanceData(self, feedback_fn,
9525
                                                self.op.src_node, pnode_name,
9526
                                                self.pnode.secondary_ip,
9527
                                                iobj, transfers)
9528
        if not compat.all(import_result):
9529
          self.LogWarning("Some disks for instance %s on node %s were not"
9530
                          " imported successfully" % (instance, pnode_name))
9531

    
9532
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9533
        feedback_fn("* preparing remote import...")
9534
        # The source cluster will stop the instance before attempting to make a
9535
        # connection. In some cases stopping an instance can take a long time,
9536
        # hence the shutdown timeout is added to the connection timeout.
9537
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9538
                           self.op.source_shutdown_timeout)
9539
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9540

    
9541
        assert iobj.primary_node == self.pnode.name
9542
        disk_results = \
9543
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9544
                                        self.source_x509_ca,
9545
                                        self._cds, timeouts)
9546
        if not compat.all(disk_results):
9547
          # TODO: Should the instance still be started, even if some disks
9548
          # failed to import (valid for local imports, too)?
9549
          self.LogWarning("Some disks for instance %s on node %s were not"
9550
                          " imported successfully" % (instance, pnode_name))
9551

    
9552
        # Run rename script on newly imported instance
9553
        assert iobj.name == instance
9554
        feedback_fn("Running rename script for %s" % instance)
9555
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9556
                                                   self.source_instance_name,
9557
                                                   self.op.debug_level)
9558
        if result.fail_msg:
9559
          self.LogWarning("Failed to run rename script for %s on node"
9560
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9561

    
9562
      else:
9563
        # also checked in the prereq part
9564
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9565
                                     % self.op.mode)
9566

    
9567
    assert not self.owned_locks(locking.LEVEL_NODE_RES)
9568

    
9569
    if self.op.start:
9570
      iobj.admin_state = constants.ADMINST_UP
9571
      self.cfg.Update(iobj, feedback_fn)
9572
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9573
      feedback_fn("* starting instance...")
9574
      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9575
                                            False)
9576
      result.Raise("Could not start instance")
9577

    
9578
    return list(iobj.all_nodes)
9579

    
9580

    
9581
class LUInstanceConsole(NoHooksLU):
9582
  """Connect to an instance's console.
9583

9584
  This is somewhat special in that it returns the command line that
9585
  you need to run on the master node in order to connect to the
9586
  console.
9587

9588
  """
9589
  REQ_BGL = False
9590

    
9591
  def ExpandNames(self):
9592
    self.share_locks = _ShareAll()
9593
    self._ExpandAndLockInstance()
9594

    
9595
  def CheckPrereq(self):
9596
    """Check prerequisites.
9597

9598
    This checks that the instance is in the cluster.
9599

9600
    """
9601
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9602
    assert self.instance is not None, \
9603
      "Cannot retrieve locked instance %s" % self.op.instance_name
9604
    _CheckNodeOnline(self, self.instance.primary_node)
9605

    
9606
  def Exec(self, feedback_fn):
9607
    """Connect to the console of an instance
9608

9609
    """
9610
    instance = self.instance
9611
    node = instance.primary_node
9612

    
9613
    node_insts = self.rpc.call_instance_list([node],
9614
                                             [instance.hypervisor])[node]
9615
    node_insts.Raise("Can't get node information from %s" % node)
9616

    
9617
    if instance.name not in node_insts.payload:
9618
      if instance.admin_state == constants.ADMINST_UP:
9619
        state = constants.INSTST_ERRORDOWN
9620
      elif instance.admin_state == constants.ADMINST_DOWN:
9621
        state = constants.INSTST_ADMINDOWN
9622
      else:
9623
        state = constants.INSTST_ADMINOFFLINE
9624
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9625
                               (instance.name, state))
9626

    
9627
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9628

    
9629
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9630

    
9631

    
9632
def _GetInstanceConsole(cluster, instance):
9633
  """Returns console information for an instance.
9634

9635
  @type cluster: L{objects.Cluster}
9636
  @type instance: L{objects.Instance}
9637
  @rtype: dict
9638

9639
  """
9640
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9641
  # beparams and hvparams are passed separately, to avoid editing the
9642
  # instance and then saving the defaults in the instance itself.
9643
  hvparams = cluster.FillHV(instance)
9644
  beparams = cluster.FillBE(instance)
9645
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9646

    
9647
  assert console.instance == instance.name
9648
  assert console.Validate()
9649

    
9650
  return console.ToDict()
9651

    
9652

    
9653
class LUInstanceReplaceDisks(LogicalUnit):
9654
  """Replace the disks of an instance.
9655

9656
  """
9657
  HPATH = "mirrors-replace"
9658
  HTYPE = constants.HTYPE_INSTANCE
9659
  REQ_BGL = False
9660

    
9661
  def CheckArguments(self):
9662
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9663
                                  self.op.iallocator)
9664

    
9665
  def ExpandNames(self):
9666
    self._ExpandAndLockInstance()
9667

    
9668
    assert locking.LEVEL_NODE not in self.needed_locks
9669
    assert locking.LEVEL_NODE_RES not in self.needed_locks
9670
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9671

    
9672
    assert self.op.iallocator is None or self.op.remote_node is None, \
9673
      "Conflicting options"
9674

    
9675
    if self.op.remote_node is not None:
9676
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9677

    
9678
      # Warning: do not remove the locking of the new secondary here
9679
      # unless DRBD8.AddChildren is changed to work in parallel;
9680
      # currently it doesn't since parallel invocations of
9681
      # FindUnusedMinor will conflict
9682
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9683
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9684
    else:
9685
      self.needed_locks[locking.LEVEL_NODE] = []
9686
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9687

    
9688
      if self.op.iallocator is not None:
9689
        # iallocator will select a new node in the same group
9690
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9691

    
9692
    self.needed_locks[locking.LEVEL_NODE_RES] = []
9693

    
9694
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9695
                                   self.op.iallocator, self.op.remote_node,
9696
                                   self.op.disks, False, self.op.early_release)
9697

    
9698
    self.tasklets = [self.replacer]
9699

    
9700
  def DeclareLocks(self, level):
9701
    if level == locking.LEVEL_NODEGROUP:
9702
      assert self.op.remote_node is None
9703
      assert self.op.iallocator is not None
9704
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9705

    
9706
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9707
      # Lock all groups used by instance optimistically; this requires going
9708
      # via the node before it's locked, requiring verification later on
9709
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9710
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9711

    
9712
    elif level == locking.LEVEL_NODE:
9713
      if self.op.iallocator is not None:
9714
        assert self.op.remote_node is None
9715
        assert not self.needed_locks[locking.LEVEL_NODE]
9716

    
9717
        # Lock member nodes of all locked groups
9718
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9719
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9720
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9721
      else:
9722
        self._LockInstancesNodes()
9723
    elif level == locking.LEVEL_NODE_RES:
9724
      # Reuse node locks
9725
      self.needed_locks[locking.LEVEL_NODE_RES] = \
9726
        self.needed_locks[locking.LEVEL_NODE]
9727

    
9728
  def BuildHooksEnv(self):
9729
    """Build hooks env.
9730

9731
    This runs on the master, the primary and all the secondaries.
9732

9733
    """
9734
    instance = self.replacer.instance
9735
    env = {
9736
      "MODE": self.op.mode,
9737
      "NEW_SECONDARY": self.op.remote_node,
9738
      "OLD_SECONDARY": instance.secondary_nodes[0],
9739
      }
9740
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9741
    return env
9742

    
9743
  def BuildHooksNodes(self):
9744
    """Build hooks nodes.
9745

9746
    """
9747
    instance = self.replacer.instance
9748
    nl = [
9749
      self.cfg.GetMasterNode(),
9750
      instance.primary_node,
9751
      ]
9752
    if self.op.remote_node is not None:
9753
      nl.append(self.op.remote_node)
9754
    return nl, nl
9755

    
9756
  def CheckPrereq(self):
9757
    """Check prerequisites.
9758

9759
    """
9760
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9761
            self.op.iallocator is None)
9762

    
9763
    # Verify if node group locks are still correct
9764
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9765
    if owned_groups:
9766
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9767

    
9768
    return LogicalUnit.CheckPrereq(self)
9769

    
9770

    
9771
class TLReplaceDisks(Tasklet):
9772
  """Replaces disks for an instance.
9773

9774
  Note: Locking is not within the scope of this class.
9775

9776
  """
9777
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9778
               disks, delay_iallocator, early_release):
9779
    """Initializes this class.
9780

9781
    """
9782
    Tasklet.__init__(self, lu)
9783

    
9784
    # Parameters
9785
    self.instance_name = instance_name
9786
    self.mode = mode
9787
    self.iallocator_name = iallocator_name
9788
    self.remote_node = remote_node
9789
    self.disks = disks
9790
    self.delay_iallocator = delay_iallocator
9791
    self.early_release = early_release
9792

    
9793
    # Runtime data
9794
    self.instance = None
9795
    self.new_node = None
9796
    self.target_node = None
9797
    self.other_node = None
9798
    self.remote_node_info = None
9799
    self.node_secondary_ip = None
9800

    
9801
  @staticmethod
9802
  def CheckArguments(mode, remote_node, iallocator):
9803
    """Helper function for users of this class.
9804

9805
    """
9806
    # check for valid parameter combination
9807
    if mode == constants.REPLACE_DISK_CHG:
9808
      if remote_node is None and iallocator is None:
9809
        raise errors.OpPrereqError("When changing the secondary either an"
9810
                                   " iallocator script must be used or the"
9811
                                   " new node given", errors.ECODE_INVAL)
9812

    
9813
      if remote_node is not None and iallocator is not None:
9814
        raise errors.OpPrereqError("Give either the iallocator or the new"
9815
                                   " secondary, not both", errors.ECODE_INVAL)
9816

    
9817
    elif remote_node is not None or iallocator is not None:
9818
      # Not replacing the secondary
9819
      raise errors.OpPrereqError("The iallocator and new node options can"
9820
                                 " only be used when changing the"
9821
                                 " secondary node", errors.ECODE_INVAL)
9822

    
9823
  @staticmethod
9824
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9825
    """Compute a new secondary node using an IAllocator.
9826

9827
    """
9828
    ial = IAllocator(lu.cfg, lu.rpc,
9829
                     mode=constants.IALLOCATOR_MODE_RELOC,
9830
                     name=instance_name,
9831
                     relocate_from=list(relocate_from))
9832

    
9833
    ial.Run(iallocator_name)
9834

    
9835
    if not ial.success:
9836
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9837
                                 " %s" % (iallocator_name, ial.info),
9838
                                 errors.ECODE_NORES)
9839

    
9840
    if len(ial.result) != ial.required_nodes:
9841
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9842
                                 " of nodes (%s), required %s" %
9843
                                 (iallocator_name,
9844
                                  len(ial.result), ial.required_nodes),
9845
                                 errors.ECODE_FAULT)
9846

    
9847
    remote_node_name = ial.result[0]
9848

    
9849
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9850
               instance_name, remote_node_name)
9851

    
9852
    return remote_node_name
9853

    
9854
  def _FindFaultyDisks(self, node_name):
9855
    """Wrapper for L{_FindFaultyInstanceDisks}.
9856

9857
    """
9858
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9859
                                    node_name, True)
9860

    
9861
  def _CheckDisksActivated(self, instance):
9862
    """Checks if the instance disks are activated.
9863

9864
    @param instance: The instance to check disks
9865
    @return: True if they are activated, False otherwise
9866

9867
    """
9868
    nodes = instance.all_nodes
9869

    
9870
    for idx, dev in enumerate(instance.disks):
9871
      for node in nodes:
9872
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9873
        self.cfg.SetDiskID(dev, node)
9874

    
9875
        result = self.rpc.call_blockdev_find(node, dev)
9876

    
9877
        if result.offline:
9878
          continue
9879
        elif result.fail_msg or not result.payload:
9880
          return False
9881

    
9882
    return True
9883

    
9884
  def CheckPrereq(self):
9885
    """Check prerequisites.
9886

9887
    This checks that the instance is in the cluster.
9888

9889
    """
9890
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9891
    assert instance is not None, \
9892
      "Cannot retrieve locked instance %s" % self.instance_name
9893

    
9894
    if instance.disk_template != constants.DT_DRBD8:
9895
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9896
                                 " instances", errors.ECODE_INVAL)
9897

    
9898
    if len(instance.secondary_nodes) != 1:
9899
      raise errors.OpPrereqError("The instance has a strange layout,"
9900
                                 " expected one secondary but found %d" %
9901
                                 len(instance.secondary_nodes),
9902
                                 errors.ECODE_FAULT)
9903

    
9904
    if not self.delay_iallocator:
9905
      self._CheckPrereq2()
9906

    
9907
  def _CheckPrereq2(self):
9908
    """Check prerequisites, second part.
9909

9910
    This function should always be part of CheckPrereq. It was separated and is
9911
    now called from Exec because during node evacuation iallocator was only
9912
    called with an unmodified cluster model, not taking planned changes into
9913
    account.
9914

9915
    """
9916
    instance = self.instance
9917
    secondary_node = instance.secondary_nodes[0]
9918

    
9919
    if self.iallocator_name is None:
9920
      remote_node = self.remote_node
9921
    else:
9922
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9923
                                       instance.name, instance.secondary_nodes)
9924

    
9925
    if remote_node is None:
9926
      self.remote_node_info = None
9927
    else:
9928
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9929
             "Remote node '%s' is not locked" % remote_node
9930

    
9931
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9932
      assert self.remote_node_info is not None, \
9933
        "Cannot retrieve locked node %s" % remote_node
9934

    
9935
    if remote_node == self.instance.primary_node:
9936
      raise errors.OpPrereqError("The specified node is the primary node of"
9937
                                 " the instance", errors.ECODE_INVAL)
9938

    
9939
    if remote_node == secondary_node:
9940
      raise errors.OpPrereqError("The specified node is already the"
9941
                                 " secondary node of the instance",
9942
                                 errors.ECODE_INVAL)
9943

    
9944
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9945
                                    constants.REPLACE_DISK_CHG):
9946
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9947
                                 errors.ECODE_INVAL)
9948

    
9949
    if self.mode == constants.REPLACE_DISK_AUTO:
9950
      if not self._CheckDisksActivated(instance):
9951
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9952
                                   " first" % self.instance_name,
9953
                                   errors.ECODE_STATE)
9954
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9955
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9956

    
9957
      if faulty_primary and faulty_secondary:
9958
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9959
                                   " one node and can not be repaired"
9960
                                   " automatically" % self.instance_name,
9961
                                   errors.ECODE_STATE)
9962

    
9963
      if faulty_primary:
9964
        self.disks = faulty_primary
9965
        self.target_node = instance.primary_node
9966
        self.other_node = secondary_node
9967
        check_nodes = [self.target_node, self.other_node]
9968
      elif faulty_secondary:
9969
        self.disks = faulty_secondary
9970
        self.target_node = secondary_node
9971
        self.other_node = instance.primary_node
9972
        check_nodes = [self.target_node, self.other_node]
9973
      else:
9974
        self.disks = []
9975
        check_nodes = []
9976

    
9977
    else:
9978
      # Non-automatic modes
9979
      if self.mode == constants.REPLACE_DISK_PRI:
9980
        self.target_node = instance.primary_node
9981
        self.other_node = secondary_node
9982
        check_nodes = [self.target_node, self.other_node]
9983

    
9984
      elif self.mode == constants.REPLACE_DISK_SEC:
9985
        self.target_node = secondary_node
9986
        self.other_node = instance.primary_node
9987
        check_nodes = [self.target_node, self.other_node]
9988

    
9989
      elif self.mode == constants.REPLACE_DISK_CHG:
9990
        self.new_node = remote_node
9991
        self.other_node = instance.primary_node
9992
        self.target_node = secondary_node
9993
        check_nodes = [self.new_node, self.other_node]
9994

    
9995
        _CheckNodeNotDrained(self.lu, remote_node)
9996
        _CheckNodeVmCapable(self.lu, remote_node)
9997

    
9998
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9999
        assert old_node_info is not None
10000
        if old_node_info.offline and not self.early_release:
10001
          # doesn't make sense to delay the release
10002
          self.early_release = True
10003
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
10004
                          " early-release mode", secondary_node)
10005

    
10006
      else:
10007
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
10008
                                     self.mode)
10009

    
10010
      # If not specified all disks should be replaced
10011
      if not self.disks:
10012
        self.disks = range(len(self.instance.disks))
10013

    
10014
    # TODO: compute disk parameters
10015
    primary_node_info = self.cfg.GetNodeInfo(instance.primary_node)
10016
    secondary_node_info = self.cfg.GetNodeInfo(secondary_node)
10017
    if primary_node_info.group != secondary_node_info.group:
10018
      self.lu.LogInfo("The instance primary and secondary nodes are in two"
10019
                      " different node groups; the disk parameters of the"
10020
                      " primary node's group will be applied.")
10021

    
10022
    self.diskparams = self.cfg.GetNodeGroup(primary_node_info.group).diskparams
10023

    
10024
    for node in check_nodes:
10025
      _CheckNodeOnline(self.lu, node)
10026

    
10027
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
10028
                                                          self.other_node,
10029
                                                          self.target_node]
10030
                              if node_name is not None)
10031

    
10032
    # Release unneeded node and node resource locks
10033
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
10034
    _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
10035

    
10036
    # Release any owned node group
10037
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
10038
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
10039

    
10040
    # Check whether disks are valid
10041
    for disk_idx in self.disks:
10042
      instance.FindDisk(disk_idx)
10043

    
10044
    # Get secondary node IP addresses
10045
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
10046
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
10047

    
10048
  def Exec(self, feedback_fn):
10049
    """Execute disk replacement.
10050

10051
    This dispatches the disk replacement to the appropriate handler.
10052

10053
    """
10054
    if self.delay_iallocator:
10055
      self._CheckPrereq2()
10056

    
10057
    if __debug__:
10058
      # Verify owned locks before starting operation
10059
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
10060
      assert set(owned_nodes) == set(self.node_secondary_ip), \
10061
          ("Incorrect node locks, owning %s, expected %s" %
10062
           (owned_nodes, self.node_secondary_ip.keys()))
10063
      assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
10064
              self.lu.owned_locks(locking.LEVEL_NODE_RES))
10065

    
10066
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
10067
      assert list(owned_instances) == [self.instance_name], \
10068
          "Instance '%s' not locked" % self.instance_name
10069

    
10070
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
10071
          "Should not own any node group lock at this point"
10072

    
10073
    if not self.disks:
10074
      feedback_fn("No disks need replacement")
10075
      return
10076

    
10077
    feedback_fn("Replacing disk(s) %s for %s" %
10078
                (utils.CommaJoin(self.disks), self.instance.name))
10079

    
10080
    activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
10081

    
10082
    # Activate the instance disks if we're replacing them on a down instance
10083
    if activate_disks:
10084
      _StartInstanceDisks(self.lu, self.instance, True)
10085

    
10086
    try:
10087
      # Should we replace the secondary node?
10088
      if self.new_node is not None:
10089
        fn = self._ExecDrbd8Secondary
10090
      else:
10091
        fn = self._ExecDrbd8DiskOnly
10092

    
10093
      result = fn(feedback_fn)
10094
    finally:
10095
      # Deactivate the instance disks if we're replacing them on a
10096
      # down instance
10097
      if activate_disks:
10098
        _SafeShutdownInstanceDisks(self.lu, self.instance)
10099

    
10100
    assert not self.lu.owned_locks(locking.LEVEL_NODE)
10101

    
10102
    if __debug__:
10103
      # Verify owned locks
10104
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
10105
      nodes = frozenset(self.node_secondary_ip)
10106
      assert ((self.early_release and not owned_nodes) or
10107
              (not self.early_release and not (set(owned_nodes) - nodes))), \
10108
        ("Not owning the correct locks, early_release=%s, owned=%r,"
10109
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
10110

    
10111
    return result
10112

    
10113
  def _CheckVolumeGroup(self, nodes):
10114
    self.lu.LogInfo("Checking volume groups")
10115

    
10116
    vgname = self.cfg.GetVGName()
10117

    
10118
    # Make sure volume group exists on all involved nodes
10119
    results = self.rpc.call_vg_list(nodes)
10120
    if not results:
10121
      raise errors.OpExecError("Can't list volume groups on the nodes")
10122

    
10123
    for node in nodes:
10124
      res = results[node]
10125
      res.Raise("Error checking node %s" % node)
10126
      if vgname not in res.payload:
10127
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
10128
                                 (vgname, node))
10129

    
10130
  def _CheckDisksExistence(self, nodes):
10131
    # Check disk existence
10132
    for idx, dev in enumerate(self.instance.disks):
10133
      if idx not in self.disks:
10134
        continue
10135

    
10136
      for node in nodes:
10137
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10138
        self.cfg.SetDiskID(dev, node)
10139

    
10140
        result = self.rpc.call_blockdev_find(node, dev)
10141

    
10142
        msg = result.fail_msg
10143
        if msg or not result.payload:
10144
          if not msg:
10145
            msg = "disk not found"
10146
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10147
                                   (idx, node, msg))
10148

    
10149
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10150
    for idx, dev in enumerate(self.instance.disks):
10151
      if idx not in self.disks:
10152
        continue
10153

    
10154
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10155
                      (idx, node_name))
10156

    
10157
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10158
                                   ldisk=ldisk):
10159
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10160
                                 " replace disks for instance %s" %
10161
                                 (node_name, self.instance.name))
10162

    
10163
  def _CreateNewStorage(self, node_name):
10164
    """Create new storage on the primary or secondary node.
10165

10166
    This is only used for same-node replaces, not for changing the
10167
    secondary node, hence we don't want to modify the existing disk.
10168

10169
    """
10170
    iv_names = {}
10171

    
10172
    for idx, dev in enumerate(self.instance.disks):
10173
      if idx not in self.disks:
10174
        continue
10175

    
10176
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10177

    
10178
      self.cfg.SetDiskID(dev, node_name)
10179

    
10180
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10181
      names = _GenerateUniqueNames(self.lu, lv_names)
10182

    
10183
      _, data_p, meta_p = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10184

    
10185
      vg_data = dev.children[0].logical_id[0]
10186
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10187
                             logical_id=(vg_data, names[0]), params=data_p)
10188
      vg_meta = dev.children[1].logical_id[0]
10189
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10190
                             logical_id=(vg_meta, names[1]), params=meta_p)
10191

    
10192
      new_lvs = [lv_data, lv_meta]
10193
      old_lvs = [child.Copy() for child in dev.children]
10194
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10195

    
10196
      # we pass force_create=True to force the LVM creation
10197
      for new_lv in new_lvs:
10198
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10199
                        _GetInstanceInfoText(self.instance), False)
10200

    
10201
    return iv_names
10202

    
10203
  def _CheckDevices(self, node_name, iv_names):
10204
    for name, (dev, _, _) in iv_names.iteritems():
10205
      self.cfg.SetDiskID(dev, node_name)
10206

    
10207
      result = self.rpc.call_blockdev_find(node_name, dev)
10208

    
10209
      msg = result.fail_msg
10210
      if msg or not result.payload:
10211
        if not msg:
10212
          msg = "disk not found"
10213
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
10214
                                 (name, msg))
10215

    
10216
      if result.payload.is_degraded:
10217
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
10218

    
10219
  def _RemoveOldStorage(self, node_name, iv_names):
10220
    for name, (_, old_lvs, _) in iv_names.iteritems():
10221
      self.lu.LogInfo("Remove logical volumes for %s" % name)
10222

    
10223
      for lv in old_lvs:
10224
        self.cfg.SetDiskID(lv, node_name)
10225

    
10226
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10227
        if msg:
10228
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
10229
                             hint="remove unused LVs manually")
10230

    
10231
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10232
    """Replace a disk on the primary or secondary for DRBD 8.
10233

10234
    The algorithm for replace is quite complicated:
10235

10236
      1. for each disk to be replaced:
10237

10238
        1. create new LVs on the target node with unique names
10239
        1. detach old LVs from the drbd device
10240
        1. rename old LVs to name_replaced.<time_t>
10241
        1. rename new LVs to old LVs
10242
        1. attach the new LVs (with the old names now) to the drbd device
10243

10244
      1. wait for sync across all devices
10245

10246
      1. for each modified disk:
10247

10248
        1. remove old LVs (which have the name name_replaces.<time_t>)
10249

10250
    Failures are not very well handled.
10251

10252
    """
10253
    steps_total = 6
10254

    
10255
    # Step: check device activation
10256
    self.lu.LogStep(1, steps_total, "Check device existence")
10257
    self._CheckDisksExistence([self.other_node, self.target_node])
10258
    self._CheckVolumeGroup([self.target_node, self.other_node])
10259

    
10260
    # Step: check other node consistency
10261
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10262
    self._CheckDisksConsistency(self.other_node,
10263
                                self.other_node == self.instance.primary_node,
10264
                                False)
10265

    
10266
    # Step: create new storage
10267
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10268
    iv_names = self._CreateNewStorage(self.target_node)
10269

    
10270
    # Step: for each lv, detach+rename*2+attach
10271
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10272
    for dev, old_lvs, new_lvs in iv_names.itervalues():
10273
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10274

    
10275
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10276
                                                     old_lvs)
10277
      result.Raise("Can't detach drbd from local storage on node"
10278
                   " %s for device %s" % (self.target_node, dev.iv_name))
10279
      #dev.children = []
10280
      #cfg.Update(instance)
10281

    
10282
      # ok, we created the new LVs, so now we know we have the needed
10283
      # storage; as such, we proceed on the target node to rename
10284
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10285
      # using the assumption that logical_id == physical_id (which in
10286
      # turn is the unique_id on that node)
10287

    
10288
      # FIXME(iustin): use a better name for the replaced LVs
10289
      temp_suffix = int(time.time())
10290
      ren_fn = lambda d, suff: (d.physical_id[0],
10291
                                d.physical_id[1] + "_replaced-%s" % suff)
10292

    
10293
      # Build the rename list based on what LVs exist on the node
10294
      rename_old_to_new = []
10295
      for to_ren in old_lvs:
10296
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10297
        if not result.fail_msg and result.payload:
10298
          # device exists
10299
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10300

    
10301
      self.lu.LogInfo("Renaming the old LVs on the target node")
10302
      result = self.rpc.call_blockdev_rename(self.target_node,
10303
                                             rename_old_to_new)
10304
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
10305

    
10306
      # Now we rename the new LVs to the old LVs
10307
      self.lu.LogInfo("Renaming the new LVs on the target node")
10308
      rename_new_to_old = [(new, old.physical_id)
10309
                           for old, new in zip(old_lvs, new_lvs)]
10310
      result = self.rpc.call_blockdev_rename(self.target_node,
10311
                                             rename_new_to_old)
10312
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
10313

    
10314
      # Intermediate steps of in memory modifications
10315
      for old, new in zip(old_lvs, new_lvs):
10316
        new.logical_id = old.logical_id
10317
        self.cfg.SetDiskID(new, self.target_node)
10318

    
10319
      # We need to modify old_lvs so that removal later removes the
10320
      # right LVs, not the newly added ones; note that old_lvs is a
10321
      # copy here
10322
      for disk in old_lvs:
10323
        disk.logical_id = ren_fn(disk, temp_suffix)
10324
        self.cfg.SetDiskID(disk, self.target_node)
10325

    
10326
      # Now that the new lvs have the old name, we can add them to the device
10327
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10328
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10329
                                                  new_lvs)
10330
      msg = result.fail_msg
10331
      if msg:
10332
        for new_lv in new_lvs:
10333
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
10334
                                               new_lv).fail_msg
10335
          if msg2:
10336
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10337
                               hint=("cleanup manually the unused logical"
10338
                                     "volumes"))
10339
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10340

    
10341
    cstep = itertools.count(5)
10342

    
10343
    if self.early_release:
10344
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10345
      self._RemoveOldStorage(self.target_node, iv_names)
10346
      # TODO: Check if releasing locks early still makes sense
10347
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10348
    else:
10349
      # Release all resource locks except those used by the instance
10350
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10351
                    keep=self.node_secondary_ip.keys())
10352

    
10353
    # Release all node locks while waiting for sync
10354
    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10355

    
10356
    # TODO: Can the instance lock be downgraded here? Take the optional disk
10357
    # shutdown in the caller into consideration.
10358

    
10359
    # Wait for sync
10360
    # This can fail as the old devices are degraded and _WaitForSync
10361
    # does a combined result over all disks, so we don't check its return value
10362
    self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10363
    _WaitForSync(self.lu, self.instance)
10364

    
10365
    # Check all devices manually
10366
    self._CheckDevices(self.instance.primary_node, iv_names)
10367

    
10368
    # Step: remove old storage
10369
    if not self.early_release:
10370
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10371
      self._RemoveOldStorage(self.target_node, iv_names)
10372

    
10373
  def _ExecDrbd8Secondary(self, feedback_fn):
10374
    """Replace the secondary node for DRBD 8.
10375

10376
    The algorithm for replace is quite complicated:
10377
      - for all disks of the instance:
10378
        - create new LVs on the new node with same names
10379
        - shutdown the drbd device on the old secondary
10380
        - disconnect the drbd network on the primary
10381
        - create the drbd device on the new secondary
10382
        - network attach the drbd on the primary, using an artifice:
10383
          the drbd code for Attach() will connect to the network if it
10384
          finds a device which is connected to the good local disks but
10385
          not network enabled
10386
      - wait for sync across all devices
10387
      - remove all disks from the old secondary
10388

10389
    Failures are not very well handled.
10390

10391
    """
10392
    steps_total = 6
10393

    
10394
    pnode = self.instance.primary_node
10395

    
10396
    # Step: check device activation
10397
    self.lu.LogStep(1, steps_total, "Check device existence")
10398
    self._CheckDisksExistence([self.instance.primary_node])
10399
    self._CheckVolumeGroup([self.instance.primary_node])
10400

    
10401
    # Step: check other node consistency
10402
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10403
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10404

    
10405
    # Step: create new storage
10406
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10407
    for idx, dev in enumerate(self.instance.disks):
10408
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10409
                      (self.new_node, idx))
10410
      # we pass force_create=True to force LVM creation
10411
      for new_lv in dev.children:
10412
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10413
                        _GetInstanceInfoText(self.instance), False)
10414

    
10415
    # Step 4: dbrd minors and drbd setups changes
10416
    # after this, we must manually remove the drbd minors on both the
10417
    # error and the success paths
10418
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10419
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10420
                                         for dev in self.instance.disks],
10421
                                        self.instance.name)
10422
    logging.debug("Allocated minors %r", minors)
10423

    
10424
    iv_names = {}
10425
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10426
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10427
                      (self.new_node, idx))
10428
      # create new devices on new_node; note that we create two IDs:
10429
      # one without port, so the drbd will be activated without
10430
      # networking information on the new node at this stage, and one
10431
      # with network, for the latter activation in step 4
10432
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10433
      if self.instance.primary_node == o_node1:
10434
        p_minor = o_minor1
10435
      else:
10436
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10437
        p_minor = o_minor2
10438

    
10439
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10440
                      p_minor, new_minor, o_secret)
10441
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10442
                    p_minor, new_minor, o_secret)
10443

    
10444
      iv_names[idx] = (dev, dev.children, new_net_id)
10445
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10446
                    new_net_id)
10447
      drbd_params, _, _ = _ComputeLDParams(constants.DT_DRBD8, self.diskparams)
10448
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10449
                              logical_id=new_alone_id,
10450
                              children=dev.children,
10451
                              size=dev.size,
10452
                              params=drbd_params)
10453
      try:
10454
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10455
                              _GetInstanceInfoText(self.instance), False)
10456
      except errors.GenericError:
10457
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10458
        raise
10459

    
10460
    # We have new devices, shutdown the drbd on the old secondary
10461
    for idx, dev in enumerate(self.instance.disks):
10462
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10463
      self.cfg.SetDiskID(dev, self.target_node)
10464
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10465
      if msg:
10466
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10467
                           "node: %s" % (idx, msg),
10468
                           hint=("Please cleanup this device manually as"
10469
                                 " soon as possible"))
10470

    
10471
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10472
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10473
                                               self.instance.disks)[pnode]
10474

    
10475
    msg = result.fail_msg
10476
    if msg:
10477
      # detaches didn't succeed (unlikely)
10478
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10479
      raise errors.OpExecError("Can't detach the disks from the network on"
10480
                               " old node: %s" % (msg,))
10481

    
10482
    # if we managed to detach at least one, we update all the disks of
10483
    # the instance to point to the new secondary
10484
    self.lu.LogInfo("Updating instance configuration")
10485
    for dev, _, new_logical_id in iv_names.itervalues():
10486
      dev.logical_id = new_logical_id
10487
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10488

    
10489
    self.cfg.Update(self.instance, feedback_fn)
10490

    
10491
    # Release all node locks (the configuration has been updated)
10492
    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10493

    
10494
    # and now perform the drbd attach
10495
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10496
                    " (standalone => connected)")
10497
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10498
                                            self.new_node],
10499
                                           self.node_secondary_ip,
10500
                                           self.instance.disks,
10501
                                           self.instance.name,
10502
                                           False)
10503
    for to_node, to_result in result.items():
10504
      msg = to_result.fail_msg
10505
      if msg:
10506
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10507
                           to_node, msg,
10508
                           hint=("please do a gnt-instance info to see the"
10509
                                 " status of disks"))
10510

    
10511
    cstep = itertools.count(5)
10512

    
10513
    if self.early_release:
10514
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10515
      self._RemoveOldStorage(self.target_node, iv_names)
10516
      # TODO: Check if releasing locks early still makes sense
10517
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10518
    else:
10519
      # Release all resource locks except those used by the instance
10520
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10521
                    keep=self.node_secondary_ip.keys())
10522

    
10523
    # TODO: Can the instance lock be downgraded here? Take the optional disk
10524
    # shutdown in the caller into consideration.
10525

    
10526
    # Wait for sync
10527
    # This can fail as the old devices are degraded and _WaitForSync
10528
    # does a combined result over all disks, so we don't check its return value
10529
    self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10530
    _WaitForSync(self.lu, self.instance)
10531

    
10532
    # Check all devices manually
10533
    self._CheckDevices(self.instance.primary_node, iv_names)
10534

    
10535
    # Step: remove old storage
10536
    if not self.early_release:
10537
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10538
      self._RemoveOldStorage(self.target_node, iv_names)
10539

    
10540

    
10541
class LURepairNodeStorage(NoHooksLU):
10542
  """Repairs the volume group on a node.
10543

10544
  """
10545
  REQ_BGL = False
10546

    
10547
  def CheckArguments(self):
10548
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10549

    
10550
    storage_type = self.op.storage_type
10551

    
10552
    if (constants.SO_FIX_CONSISTENCY not in
10553
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10554
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10555
                                 " repaired" % storage_type,
10556
                                 errors.ECODE_INVAL)
10557

    
10558
  def ExpandNames(self):
10559
    self.needed_locks = {
10560
      locking.LEVEL_NODE: [self.op.node_name],
10561
      }
10562

    
10563
  def _CheckFaultyDisks(self, instance, node_name):
10564
    """Ensure faulty disks abort the opcode or at least warn."""
10565
    try:
10566
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10567
                                  node_name, True):
10568
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10569
                                   " node '%s'" % (instance.name, node_name),
10570
                                   errors.ECODE_STATE)
10571
    except errors.OpPrereqError, err:
10572
      if self.op.ignore_consistency:
10573
        self.proc.LogWarning(str(err.args[0]))
10574
      else:
10575
        raise
10576

    
10577
  def CheckPrereq(self):
10578
    """Check prerequisites.
10579

10580
    """
10581
    # Check whether any instance on this node has faulty disks
10582
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10583
      if inst.admin_state != constants.ADMINST_UP:
10584
        continue
10585
      check_nodes = set(inst.all_nodes)
10586
      check_nodes.discard(self.op.node_name)
10587
      for inst_node_name in check_nodes:
10588
        self._CheckFaultyDisks(inst, inst_node_name)
10589

    
10590
  def Exec(self, feedback_fn):
10591
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10592
                (self.op.name, self.op.node_name))
10593

    
10594
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10595
    result = self.rpc.call_storage_execute(self.op.node_name,
10596
                                           self.op.storage_type, st_args,
10597
                                           self.op.name,
10598
                                           constants.SO_FIX_CONSISTENCY)
10599
    result.Raise("Failed to repair storage unit '%s' on %s" %
10600
                 (self.op.name, self.op.node_name))
10601

    
10602

    
10603
class LUNodeEvacuate(NoHooksLU):
10604
  """Evacuates instances off a list of nodes.
10605

10606
  """
10607
  REQ_BGL = False
10608

    
10609
  _MODE2IALLOCATOR = {
10610
    constants.NODE_EVAC_PRI: constants.IALLOCATOR_NEVAC_PRI,
10611
    constants.NODE_EVAC_SEC: constants.IALLOCATOR_NEVAC_SEC,
10612
    constants.NODE_EVAC_ALL: constants.IALLOCATOR_NEVAC_ALL,
10613
    }
10614
  assert frozenset(_MODE2IALLOCATOR.keys()) == constants.NODE_EVAC_MODES
10615
  assert (frozenset(_MODE2IALLOCATOR.values()) ==
10616
          constants.IALLOCATOR_NEVAC_MODES)
10617

    
10618
  def CheckArguments(self):
10619
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10620

    
10621
  def ExpandNames(self):
10622
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10623

    
10624
    if self.op.remote_node is not None:
10625
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10626
      assert self.op.remote_node
10627

    
10628
      if self.op.remote_node == self.op.node_name:
10629
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10630
                                   " secondary node", errors.ECODE_INVAL)
10631

    
10632
      if self.op.mode != constants.NODE_EVAC_SEC:
10633
        raise errors.OpPrereqError("Without the use of an iallocator only"
10634
                                   " secondary instances can be evacuated",
10635
                                   errors.ECODE_INVAL)
10636

    
10637
    # Declare locks
10638
    self.share_locks = _ShareAll()
10639
    self.needed_locks = {
10640
      locking.LEVEL_INSTANCE: [],
10641
      locking.LEVEL_NODEGROUP: [],
10642
      locking.LEVEL_NODE: [],
10643
      }
10644

    
10645
    # Determine nodes (via group) optimistically, needs verification once locks
10646
    # have been acquired
10647
    self.lock_nodes = self._DetermineNodes()
10648

    
10649
  def _DetermineNodes(self):
10650
    """Gets the list of nodes to operate on.
10651

10652
    """
10653
    if self.op.remote_node is None:
10654
      # Iallocator will choose any node(s) in the same group
10655
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10656
    else:
10657
      group_nodes = frozenset([self.op.remote_node])
10658

    
10659
    # Determine nodes to be locked
10660
    return set([self.op.node_name]) | group_nodes
10661

    
10662
  def _DetermineInstances(self):
10663
    """Builds list of instances to operate on.
10664

10665
    """
10666
    assert self.op.mode in constants.NODE_EVAC_MODES
10667

    
10668
    if self.op.mode == constants.NODE_EVAC_PRI:
10669
      # Primary instances only
10670
      inst_fn = _GetNodePrimaryInstances
10671
      assert self.op.remote_node is None, \
10672
        "Evacuating primary instances requires iallocator"
10673
    elif self.op.mode == constants.NODE_EVAC_SEC:
10674
      # Secondary instances only
10675
      inst_fn = _GetNodeSecondaryInstances
10676
    else:
10677
      # All instances
10678
      assert self.op.mode == constants.NODE_EVAC_ALL
10679
      inst_fn = _GetNodeInstances
10680
      # TODO: In 2.6, change the iallocator interface to take an evacuation mode
10681
      # per instance
10682
      raise errors.OpPrereqError("Due to an issue with the iallocator"
10683
                                 " interface it is not possible to evacuate"
10684
                                 " all instances at once; specify explicitly"
10685
                                 " whether to evacuate primary or secondary"
10686
                                 " instances",
10687
                                 errors.ECODE_INVAL)
10688

    
10689
    return inst_fn(self.cfg, self.op.node_name)
10690

    
10691
  def DeclareLocks(self, level):
10692
    if level == locking.LEVEL_INSTANCE:
10693
      # Lock instances optimistically, needs verification once node and group
10694
      # locks have been acquired
10695
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10696
        set(i.name for i in self._DetermineInstances())
10697

    
10698
    elif level == locking.LEVEL_NODEGROUP:
10699
      # Lock node groups for all potential target nodes optimistically, needs
10700
      # verification once nodes have been acquired
10701
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10702
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10703

    
10704
    elif level == locking.LEVEL_NODE:
10705
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10706

    
10707
  def CheckPrereq(self):
10708
    # Verify locks
10709
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10710
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10711
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10712

    
10713
    need_nodes = self._DetermineNodes()
10714

    
10715
    if not owned_nodes.issuperset(need_nodes):
10716
      raise errors.OpPrereqError("Nodes in same group as '%s' changed since"
10717
                                 " locks were acquired, current nodes are"
10718
                                 " are '%s', used to be '%s'; retry the"
10719
                                 " operation" %
10720
                                 (self.op.node_name,
10721
                                  utils.CommaJoin(need_nodes),
10722
                                  utils.CommaJoin(owned_nodes)),
10723
                                 errors.ECODE_STATE)
10724

    
10725
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10726
    if owned_groups != wanted_groups:
10727
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10728
                               " current groups are '%s', used to be '%s';"
10729
                               " retry the operation" %
10730
                               (utils.CommaJoin(wanted_groups),
10731
                                utils.CommaJoin(owned_groups)))
10732

    
10733
    # Determine affected instances
10734
    self.instances = self._DetermineInstances()
10735
    self.instance_names = [i.name for i in self.instances]
10736

    
10737
    if set(self.instance_names) != owned_instances:
10738
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10739
                               " were acquired, current instances are '%s',"
10740
                               " used to be '%s'; retry the operation" %
10741
                               (self.op.node_name,
10742
                                utils.CommaJoin(self.instance_names),
10743
                                utils.CommaJoin(owned_instances)))
10744

    
10745
    if self.instance_names:
10746
      self.LogInfo("Evacuating instances from node '%s': %s",
10747
                   self.op.node_name,
10748
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10749
    else:
10750
      self.LogInfo("No instances to evacuate from node '%s'",
10751
                   self.op.node_name)
10752

    
10753
    if self.op.remote_node is not None:
10754
      for i in self.instances:
10755
        if i.primary_node == self.op.remote_node:
10756
          raise errors.OpPrereqError("Node %s is the primary node of"
10757
                                     " instance %s, cannot use it as"
10758
                                     " secondary" %
10759
                                     (self.op.remote_node, i.name),
10760
                                     errors.ECODE_INVAL)
10761

    
10762
  def Exec(self, feedback_fn):
10763
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10764

    
10765
    if not self.instance_names:
10766
      # No instances to evacuate
10767
      jobs = []
10768

    
10769
    elif self.op.iallocator is not None:
10770
      # TODO: Implement relocation to other group
10771
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10772
                       evac_mode=self._MODE2IALLOCATOR[self.op.mode],
10773
                       instances=list(self.instance_names))
10774

    
10775
      ial.Run(self.op.iallocator)
10776

    
10777
      if not ial.success:
10778
        raise errors.OpPrereqError("Can't compute node evacuation using"
10779
                                   " iallocator '%s': %s" %
10780
                                   (self.op.iallocator, ial.info),
10781
                                   errors.ECODE_NORES)
10782

    
10783
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10784

    
10785
    elif self.op.remote_node is not None:
10786
      assert self.op.mode == constants.NODE_EVAC_SEC
10787
      jobs = [
10788
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10789
                                        remote_node=self.op.remote_node,
10790
                                        disks=[],
10791
                                        mode=constants.REPLACE_DISK_CHG,
10792
                                        early_release=self.op.early_release)]
10793
        for instance_name in self.instance_names
10794
        ]
10795

    
10796
    else:
10797
      raise errors.ProgrammerError("No iallocator or remote node")
10798

    
10799
    return ResultWithJobs(jobs)
10800

    
10801

    
10802
def _SetOpEarlyRelease(early_release, op):
10803
  """Sets C{early_release} flag on opcodes if available.
10804

10805
  """
10806
  try:
10807
    op.early_release = early_release
10808
  except AttributeError:
10809
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10810

    
10811
  return op
10812

    
10813

    
10814
def _NodeEvacDest(use_nodes, group, nodes):
10815
  """Returns group or nodes depending on caller's choice.
10816

10817
  """
10818
  if use_nodes:
10819
    return utils.CommaJoin(nodes)
10820
  else:
10821
    return group
10822

    
10823

    
10824
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10825
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10826

10827
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10828
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10829

10830
  @type lu: L{LogicalUnit}
10831
  @param lu: Logical unit instance
10832
  @type alloc_result: tuple/list
10833
  @param alloc_result: Result from iallocator
10834
  @type early_release: bool
10835
  @param early_release: Whether to release locks early if possible
10836
  @type use_nodes: bool
10837
  @param use_nodes: Whether to display node names instead of groups
10838

10839
  """
10840
  (moved, failed, jobs) = alloc_result
10841

    
10842
  if failed:
10843
    failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10844
                                 for (name, reason) in failed)
10845
    lu.LogWarning("Unable to evacuate instances %s", failreason)
10846
    raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10847

    
10848
  if moved:
10849
    lu.LogInfo("Instances to be moved: %s",
10850
               utils.CommaJoin("%s (to %s)" %
10851
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10852
                               for (name, group, nodes) in moved))
10853

    
10854
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10855
              map(opcodes.OpCode.LoadOpCode, ops))
10856
          for ops in jobs]
10857

    
10858

    
10859
class LUInstanceGrowDisk(LogicalUnit):
10860
  """Grow a disk of an instance.
10861

10862
  """
10863
  HPATH = "disk-grow"
10864
  HTYPE = constants.HTYPE_INSTANCE
10865
  REQ_BGL = False
10866

    
10867
  def ExpandNames(self):
10868
    self._ExpandAndLockInstance()
10869
    self.needed_locks[locking.LEVEL_NODE] = []
10870
    self.needed_locks[locking.LEVEL_NODE_RES] = []
10871
    self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
10872

    
10873
  def DeclareLocks(self, level):
10874
    if level == locking.LEVEL_NODE:
10875
      self._LockInstancesNodes()
10876
    elif level == locking.LEVEL_NODE_RES:
10877
      # Copy node locks
10878
      self.needed_locks[locking.LEVEL_NODE_RES] = \
10879
        self.needed_locks[locking.LEVEL_NODE][:]
10880

    
10881
  def BuildHooksEnv(self):
10882
    """Build hooks env.
10883

10884
    This runs on the master, the primary and all the secondaries.
10885

10886
    """
10887
    env = {
10888
      "DISK": self.op.disk,
10889
      "AMOUNT": self.op.amount,
10890
      }
10891
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10892
    return env
10893

    
10894
  def BuildHooksNodes(self):
10895
    """Build hooks nodes.
10896

10897
    """
10898
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10899
    return (nl, nl)
10900

    
10901
  def CheckPrereq(self):
10902
    """Check prerequisites.
10903

10904
    This checks that the instance is in the cluster.
10905

10906
    """
10907
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10908
    assert instance is not None, \
10909
      "Cannot retrieve locked instance %s" % self.op.instance_name
10910
    nodenames = list(instance.all_nodes)
10911
    for node in nodenames:
10912
      _CheckNodeOnline(self, node)
10913

    
10914
    self.instance = instance
10915

    
10916
    if instance.disk_template not in constants.DTS_GROWABLE:
10917
      raise errors.OpPrereqError("Instance's disk layout does not support"
10918
                                 " growing", errors.ECODE_INVAL)
10919

    
10920
    self.disk = instance.FindDisk(self.op.disk)
10921

    
10922
    if instance.disk_template not in (constants.DT_FILE,
10923
                                      constants.DT_SHARED_FILE):
10924
      # TODO: check the free disk space for file, when that feature will be
10925
      # supported
10926
      _CheckNodesFreeDiskPerVG(self, nodenames,
10927
                               self.disk.ComputeGrowth(self.op.amount))
10928

    
10929
  def Exec(self, feedback_fn):
10930
    """Execute disk grow.
10931

10932
    """
10933
    instance = self.instance
10934
    disk = self.disk
10935

    
10936
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10937
    assert (self.owned_locks(locking.LEVEL_NODE) ==
10938
            self.owned_locks(locking.LEVEL_NODE_RES))
10939

    
10940
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10941
    if not disks_ok:
10942
      raise errors.OpExecError("Cannot activate block device to grow")
10943

    
10944
    feedback_fn("Growing disk %s of instance '%s' by %s" %
10945
                (self.op.disk, instance.name,
10946
                 utils.FormatUnit(self.op.amount, "h")))
10947

    
10948
    # First run all grow ops in dry-run mode
10949
    for node in instance.all_nodes:
10950
      self.cfg.SetDiskID(disk, node)
10951
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10952
      result.Raise("Grow request failed to node %s" % node)
10953

    
10954
    # We know that (as far as we can test) operations across different
10955
    # nodes will succeed, time to run it for real
10956
    for node in instance.all_nodes:
10957
      self.cfg.SetDiskID(disk, node)
10958
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10959
      result.Raise("Grow request failed to node %s" % node)
10960

    
10961
      # TODO: Rewrite code to work properly
10962
      # DRBD goes into sync mode for a short amount of time after executing the
10963
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10964
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10965
      # time is a work-around.
10966
      time.sleep(5)
10967

    
10968
    disk.RecordGrow(self.op.amount)
10969
    self.cfg.Update(instance, feedback_fn)
10970

    
10971
    # Changes have been recorded, release node lock
10972
    _ReleaseLocks(self, locking.LEVEL_NODE)
10973

    
10974
    # Downgrade lock while waiting for sync
10975
    self.glm.downgrade(locking.LEVEL_INSTANCE)
10976

    
10977
    if self.op.wait_for_sync:
10978
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10979
      if disk_abort:
10980
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10981
                             " status; please check the instance")
10982
      if instance.admin_state != constants.ADMINST_UP:
10983
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10984
    elif instance.admin_state != constants.ADMINST_UP:
10985
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10986
                           " not supposed to be running because no wait for"
10987
                           " sync mode was requested")
10988

    
10989
    assert self.owned_locks(locking.LEVEL_NODE_RES)
10990
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10991

    
10992

    
10993
class LUInstanceQueryData(NoHooksLU):
10994
  """Query runtime instance data.
10995

10996
  """
10997
  REQ_BGL = False
10998

    
10999
  def ExpandNames(self):
11000
    self.needed_locks = {}
11001

    
11002
    # Use locking if requested or when non-static information is wanted
11003
    if not (self.op.static or self.op.use_locking):
11004
      self.LogWarning("Non-static data requested, locks need to be acquired")
11005
      self.op.use_locking = True
11006

    
11007
    if self.op.instances or not self.op.use_locking:
11008
      # Expand instance names right here
11009
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
11010
    else:
11011
      # Will use acquired locks
11012
      self.wanted_names = None
11013

    
11014
    if self.op.use_locking:
11015
      self.share_locks = _ShareAll()
11016

    
11017
      if self.wanted_names is None:
11018
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
11019
      else:
11020
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
11021

    
11022
      self.needed_locks[locking.LEVEL_NODE] = []
11023
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11024

    
11025
  def DeclareLocks(self, level):
11026
    if self.op.use_locking and level == locking.LEVEL_NODE:
11027
      self._LockInstancesNodes()
11028

    
11029
  def CheckPrereq(self):
11030
    """Check prerequisites.
11031

11032
    This only checks the optional instance list against the existing names.
11033

11034
    """
11035
    if self.wanted_names is None:
11036
      assert self.op.use_locking, "Locking was not used"
11037
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
11038

    
11039
    self.wanted_instances = \
11040
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
11041

    
11042
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
11043
    """Returns the status of a block device
11044

11045
    """
11046
    if self.op.static or not node:
11047
      return None
11048

    
11049
    self.cfg.SetDiskID(dev, node)
11050

    
11051
    result = self.rpc.call_blockdev_find(node, dev)
11052
    if result.offline:
11053
      return None
11054

    
11055
    result.Raise("Can't compute disk status for %s" % instance_name)
11056

    
11057
    status = result.payload
11058
    if status is None:
11059
      return None
11060

    
11061
    return (status.dev_path, status.major, status.minor,
11062
            status.sync_percent, status.estimated_time,
11063
            status.is_degraded, status.ldisk_status)
11064

    
11065
  def _ComputeDiskStatus(self, instance, snode, dev):
11066
    """Compute block device status.
11067

11068
    """
11069
    if dev.dev_type in constants.LDS_DRBD:
11070
      # we change the snode then (otherwise we use the one passed in)
11071
      if dev.logical_id[0] == instance.primary_node:
11072
        snode = dev.logical_id[1]
11073
      else:
11074
        snode = dev.logical_id[0]
11075

    
11076
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
11077
                                              instance.name, dev)
11078
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
11079

    
11080
    if dev.children:
11081
      dev_children = map(compat.partial(self._ComputeDiskStatus,
11082
                                        instance, snode),
11083
                         dev.children)
11084
    else:
11085
      dev_children = []
11086

    
11087
    return {
11088
      "iv_name": dev.iv_name,
11089
      "dev_type": dev.dev_type,
11090
      "logical_id": dev.logical_id,
11091
      "physical_id": dev.physical_id,
11092
      "pstatus": dev_pstatus,
11093
      "sstatus": dev_sstatus,
11094
      "children": dev_children,
11095
      "mode": dev.mode,
11096
      "size": dev.size,
11097
      }
11098

    
11099
  def Exec(self, feedback_fn):
11100
    """Gather and return data"""
11101
    result = {}
11102

    
11103
    cluster = self.cfg.GetClusterInfo()
11104

    
11105
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
11106
                                          for i in self.wanted_instances)
11107
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
11108
      if self.op.static or pnode.offline:
11109
        remote_state = None
11110
        if pnode.offline:
11111
          self.LogWarning("Primary node %s is marked offline, returning static"
11112
                          " information only for instance %s" %
11113
                          (pnode.name, instance.name))
11114
      else:
11115
        remote_info = self.rpc.call_instance_info(instance.primary_node,
11116
                                                  instance.name,
11117
                                                  instance.hypervisor)
11118
        remote_info.Raise("Error checking node %s" % instance.primary_node)
11119
        remote_info = remote_info.payload
11120
        if remote_info and "state" in remote_info:
11121
          remote_state = "up"
11122
        else:
11123
          if instance.admin_state == constants.ADMINST_UP:
11124
            remote_state = "down"
11125
          else:
11126
            remote_state = instance.admin_state
11127

    
11128
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
11129
                  instance.disks)
11130

    
11131
      result[instance.name] = {
11132
        "name": instance.name,
11133
        "config_state": instance.admin_state,
11134
        "run_state": remote_state,
11135
        "pnode": instance.primary_node,
11136
        "snodes": instance.secondary_nodes,
11137
        "os": instance.os,
11138
        # this happens to be the same format used for hooks
11139
        "nics": _NICListToTuple(self, instance.nics),
11140
        "disk_template": instance.disk_template,
11141
        "disks": disks,
11142
        "hypervisor": instance.hypervisor,
11143
        "network_port": instance.network_port,
11144
        "hv_instance": instance.hvparams,
11145
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
11146
        "be_instance": instance.beparams,
11147
        "be_actual": cluster.FillBE(instance),
11148
        "os_instance": instance.osparams,
11149
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11150
        "serial_no": instance.serial_no,
11151
        "mtime": instance.mtime,
11152
        "ctime": instance.ctime,
11153
        "uuid": instance.uuid,
11154
        }
11155

    
11156
    return result
11157

    
11158

    
11159
class LUInstanceSetParams(LogicalUnit):
11160
  """Modifies an instances's parameters.
11161

11162
  """
11163
  HPATH = "instance-modify"
11164
  HTYPE = constants.HTYPE_INSTANCE
11165
  REQ_BGL = False
11166

    
11167
  def CheckArguments(self):
11168
    if not (self.op.nics or self.op.disks or self.op.disk_template or
11169
            self.op.hvparams or self.op.beparams or self.op.os_name or
11170
            self.op.online_inst or self.op.offline_inst):
11171
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11172

    
11173
    if self.op.hvparams:
11174
      _CheckGlobalHvParams(self.op.hvparams)
11175

    
11176
    # Disk validation
11177
    disk_addremove = 0
11178
    for disk_op, disk_dict in self.op.disks:
11179
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11180
      if disk_op == constants.DDM_REMOVE:
11181
        disk_addremove += 1
11182
        continue
11183
      elif disk_op == constants.DDM_ADD:
11184
        disk_addremove += 1
11185
      else:
11186
        if not isinstance(disk_op, int):
11187
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11188
        if not isinstance(disk_dict, dict):
11189
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11190
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11191

    
11192
      if disk_op == constants.DDM_ADD:
11193
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11194
        if mode not in constants.DISK_ACCESS_SET:
11195
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11196
                                     errors.ECODE_INVAL)
11197
        size = disk_dict.get(constants.IDISK_SIZE, None)
11198
        if size is None:
11199
          raise errors.OpPrereqError("Required disk parameter size missing",
11200
                                     errors.ECODE_INVAL)
11201
        try:
11202
          size = int(size)
11203
        except (TypeError, ValueError), err:
11204
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11205
                                     str(err), errors.ECODE_INVAL)
11206
        disk_dict[constants.IDISK_SIZE] = size
11207
      else:
11208
        # modification of disk
11209
        if constants.IDISK_SIZE in disk_dict:
11210
          raise errors.OpPrereqError("Disk size change not possible, use"
11211
                                     " grow-disk", errors.ECODE_INVAL)
11212

    
11213
    if disk_addremove > 1:
11214
      raise errors.OpPrereqError("Only one disk add or remove operation"
11215
                                 " supported at a time", errors.ECODE_INVAL)
11216

    
11217
    if self.op.disks and self.op.disk_template is not None:
11218
      raise errors.OpPrereqError("Disk template conversion and other disk"
11219
                                 " changes not supported at the same time",
11220
                                 errors.ECODE_INVAL)
11221

    
11222
    if (self.op.disk_template and
11223
        self.op.disk_template in constants.DTS_INT_MIRROR and
11224
        self.op.remote_node is None):
11225
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
11226
                                 " one requires specifying a secondary node",
11227
                                 errors.ECODE_INVAL)
11228

    
11229
    # NIC validation
11230
    nic_addremove = 0
11231
    for nic_op, nic_dict in self.op.nics:
11232
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11233
      if nic_op == constants.DDM_REMOVE:
11234
        nic_addremove += 1
11235
        continue
11236
      elif nic_op == constants.DDM_ADD:
11237
        nic_addremove += 1
11238
      else:
11239
        if not isinstance(nic_op, int):
11240
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11241
        if not isinstance(nic_dict, dict):
11242
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11243
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11244

    
11245
      # nic_dict should be a dict
11246
      nic_ip = nic_dict.get(constants.INIC_IP, None)
11247
      if nic_ip is not None:
11248
        if nic_ip.lower() == constants.VALUE_NONE:
11249
          nic_dict[constants.INIC_IP] = None
11250
        else:
11251
          if not netutils.IPAddress.IsValid(nic_ip):
11252
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11253
                                       errors.ECODE_INVAL)
11254

    
11255
      nic_bridge = nic_dict.get("bridge", None)
11256
      nic_link = nic_dict.get(constants.INIC_LINK, None)
11257
      if nic_bridge and nic_link:
11258
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11259
                                   " at the same time", errors.ECODE_INVAL)
11260
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11261
        nic_dict["bridge"] = None
11262
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11263
        nic_dict[constants.INIC_LINK] = None
11264

    
11265
      if nic_op == constants.DDM_ADD:
11266
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
11267
        if nic_mac is None:
11268
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11269

    
11270
      if constants.INIC_MAC in nic_dict:
11271
        nic_mac = nic_dict[constants.INIC_MAC]
11272
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11273
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11274

    
11275
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11276
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11277
                                     " modifying an existing nic",
11278
                                     errors.ECODE_INVAL)
11279

    
11280
    if nic_addremove > 1:
11281
      raise errors.OpPrereqError("Only one NIC add or remove operation"
11282
                                 " supported at a time", errors.ECODE_INVAL)
11283

    
11284
  def ExpandNames(self):
11285
    self._ExpandAndLockInstance()
11286
    # Can't even acquire node locks in shared mode as upcoming changes in
11287
    # Ganeti 2.6 will start to modify the node object on disk conversion
11288
    self.needed_locks[locking.LEVEL_NODE] = []
11289
    self.needed_locks[locking.LEVEL_NODE_RES] = []
11290
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11291

    
11292
  def DeclareLocks(self, level):
11293
    if level == locking.LEVEL_NODE:
11294
      self._LockInstancesNodes()
11295
      if self.op.disk_template and self.op.remote_node:
11296
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11297
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11298
    elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11299
      # Copy node locks
11300
      self.needed_locks[locking.LEVEL_NODE_RES] = \
11301
        self.needed_locks[locking.LEVEL_NODE][:]
11302

    
11303
  def BuildHooksEnv(self):
11304
    """Build hooks env.
11305

11306
    This runs on the master, primary and secondaries.
11307

11308
    """
11309
    args = dict()
11310
    if constants.BE_MINMEM in self.be_new:
11311
      args["minmem"] = self.be_new[constants.BE_MINMEM]
11312
    if constants.BE_MAXMEM in self.be_new:
11313
      args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11314
    if constants.BE_VCPUS in self.be_new:
11315
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
11316
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11317
    # information at all.
11318
    if self.op.nics:
11319
      args["nics"] = []
11320
      nic_override = dict(self.op.nics)
11321
      for idx, nic in enumerate(self.instance.nics):
11322
        if idx in nic_override:
11323
          this_nic_override = nic_override[idx]
11324
        else:
11325
          this_nic_override = {}
11326
        if constants.INIC_IP in this_nic_override:
11327
          ip = this_nic_override[constants.INIC_IP]
11328
        else:
11329
          ip = nic.ip
11330
        if constants.INIC_MAC in this_nic_override:
11331
          mac = this_nic_override[constants.INIC_MAC]
11332
        else:
11333
          mac = nic.mac
11334
        if idx in self.nic_pnew:
11335
          nicparams = self.nic_pnew[idx]
11336
        else:
11337
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11338
        mode = nicparams[constants.NIC_MODE]
11339
        link = nicparams[constants.NIC_LINK]
11340
        args["nics"].append((ip, mac, mode, link))
11341
      if constants.DDM_ADD in nic_override:
11342
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11343
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11344
        nicparams = self.nic_pnew[constants.DDM_ADD]
11345
        mode = nicparams[constants.NIC_MODE]
11346
        link = nicparams[constants.NIC_LINK]
11347
        args["nics"].append((ip, mac, mode, link))
11348
      elif constants.DDM_REMOVE in nic_override:
11349
        del args["nics"][-1]
11350

    
11351
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11352
    if self.op.disk_template:
11353
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11354

    
11355
    return env
11356

    
11357
  def BuildHooksNodes(self):
11358
    """Build hooks nodes.
11359

11360
    """
11361
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11362
    return (nl, nl)
11363

    
11364
  def CheckPrereq(self):
11365
    """Check prerequisites.
11366

11367
    This only checks the instance list against the existing names.
11368

11369
    """
11370
    # checking the new params on the primary/secondary nodes
11371

    
11372
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11373
    cluster = self.cluster = self.cfg.GetClusterInfo()
11374
    assert self.instance is not None, \
11375
      "Cannot retrieve locked instance %s" % self.op.instance_name
11376
    pnode = instance.primary_node
11377
    nodelist = list(instance.all_nodes)
11378
    pnode_info = self.cfg.GetNodeInfo(pnode)
11379
    self.diskparams = self.cfg.GetNodeGroup(pnode_info.group).diskparams
11380

    
11381
    # OS change
11382
    if self.op.os_name and not self.op.force:
11383
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11384
                      self.op.force_variant)
11385
      instance_os = self.op.os_name
11386
    else:
11387
      instance_os = instance.os
11388

    
11389
    if self.op.disk_template:
11390
      if instance.disk_template == self.op.disk_template:
11391
        raise errors.OpPrereqError("Instance already has disk template %s" %
11392
                                   instance.disk_template, errors.ECODE_INVAL)
11393

    
11394
      if (instance.disk_template,
11395
          self.op.disk_template) not in self._DISK_CONVERSIONS:
11396
        raise errors.OpPrereqError("Unsupported disk template conversion from"
11397
                                   " %s to %s" % (instance.disk_template,
11398
                                                  self.op.disk_template),
11399
                                   errors.ECODE_INVAL)
11400
      _CheckInstanceState(self, instance, INSTANCE_DOWN,
11401
                          msg="cannot change disk template")
11402
      if self.op.disk_template in constants.DTS_INT_MIRROR:
11403
        if self.op.remote_node == pnode:
11404
          raise errors.OpPrereqError("Given new secondary node %s is the same"
11405
                                     " as the primary node of the instance" %
11406
                                     self.op.remote_node, errors.ECODE_STATE)
11407
        _CheckNodeOnline(self, self.op.remote_node)
11408
        _CheckNodeNotDrained(self, self.op.remote_node)
11409
        # FIXME: here we assume that the old instance type is DT_PLAIN
11410
        assert instance.disk_template == constants.DT_PLAIN
11411
        disks = [{constants.IDISK_SIZE: d.size,
11412
                  constants.IDISK_VG: d.logical_id[0]}
11413
                 for d in instance.disks]
11414
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11415
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11416

    
11417
        snode_info = self.cfg.GetNodeInfo(self.op.remote_node)
11418
        if pnode_info.group != snode_info.group:
11419
          self.LogWarning("The primary and secondary nodes are in two"
11420
                          " different node groups; the disk parameters"
11421
                          " from the first disk's node group will be"
11422
                          " used")
11423

    
11424
    # hvparams processing
11425
    if self.op.hvparams:
11426
      hv_type = instance.hypervisor
11427
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11428
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11429
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11430

    
11431
      # local check
11432
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11433
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11434
      self.hv_proposed = self.hv_new = hv_new # the new actual values
11435
      self.hv_inst = i_hvdict # the new dict (without defaults)
11436
    else:
11437
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11438
                                              instance.hvparams)
11439
      self.hv_new = self.hv_inst = {}
11440

    
11441
    # beparams processing
11442
    if self.op.beparams:
11443
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11444
                                   use_none=True)
11445
      objects.UpgradeBeParams(i_bedict)
11446
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11447
      be_new = cluster.SimpleFillBE(i_bedict)
11448
      self.be_proposed = self.be_new = be_new # the new actual values
11449
      self.be_inst = i_bedict # the new dict (without defaults)
11450
    else:
11451
      self.be_new = self.be_inst = {}
11452
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11453
    be_old = cluster.FillBE(instance)
11454

    
11455
    # CPU param validation -- checking every time a paramtere is
11456
    # changed to cover all cases where either CPU mask or vcpus have
11457
    # changed
11458
    if (constants.BE_VCPUS in self.be_proposed and
11459
        constants.HV_CPU_MASK in self.hv_proposed):
11460
      cpu_list = \
11461
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11462
      # Verify mask is consistent with number of vCPUs. Can skip this
11463
      # test if only 1 entry in the CPU mask, which means same mask
11464
      # is applied to all vCPUs.
11465
      if (len(cpu_list) > 1 and
11466
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11467
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11468
                                   " CPU mask [%s]" %
11469
                                   (self.be_proposed[constants.BE_VCPUS],
11470
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11471
                                   errors.ECODE_INVAL)
11472

    
11473
      # Only perform this test if a new CPU mask is given
11474
      if constants.HV_CPU_MASK in self.hv_new:
11475
        # Calculate the largest CPU number requested
11476
        max_requested_cpu = max(map(max, cpu_list))
11477
        # Check that all of the instance's nodes have enough physical CPUs to
11478
        # satisfy the requested CPU mask
11479
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11480
                                max_requested_cpu + 1, instance.hypervisor)
11481

    
11482
    # osparams processing
11483
    if self.op.osparams:
11484
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11485
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11486
      self.os_inst = i_osdict # the new dict (without defaults)
11487
    else:
11488
      self.os_inst = {}
11489

    
11490
    self.warn = []
11491

    
11492
    #TODO(dynmem): do the appropriate check involving MINMEM
11493
    if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11494
        be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11495
      mem_check_list = [pnode]
11496
      if be_new[constants.BE_AUTO_BALANCE]:
11497
        # either we changed auto_balance to yes or it was from before
11498
        mem_check_list.extend(instance.secondary_nodes)
11499
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11500
                                                  instance.hypervisor)
11501
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11502
                                         [instance.hypervisor])
11503
      pninfo = nodeinfo[pnode]
11504
      msg = pninfo.fail_msg
11505
      if msg:
11506
        # Assume the primary node is unreachable and go ahead
11507
        self.warn.append("Can't get info from primary node %s: %s" %
11508
                         (pnode, msg))
11509
      else:
11510
        (_, _, (pnhvinfo, )) = pninfo.payload
11511
        if not isinstance(pnhvinfo.get("memory_free", None), int):
11512
          self.warn.append("Node data from primary node %s doesn't contain"
11513
                           " free memory information" % pnode)
11514
        elif instance_info.fail_msg:
11515
          self.warn.append("Can't get instance runtime information: %s" %
11516
                          instance_info.fail_msg)
11517
        else:
11518
          if instance_info.payload:
11519
            current_mem = int(instance_info.payload["memory"])
11520
          else:
11521
            # Assume instance not running
11522
            # (there is a slight race condition here, but it's not very
11523
            # probable, and we have no other way to check)
11524
            # TODO: Describe race condition
11525
            current_mem = 0
11526
          #TODO(dynmem): do the appropriate check involving MINMEM
11527
          miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11528
                      pnhvinfo["memory_free"])
11529
          if miss_mem > 0:
11530
            raise errors.OpPrereqError("This change will prevent the instance"
11531
                                       " from starting, due to %d MB of memory"
11532
                                       " missing on its primary node" %
11533
                                       miss_mem,
11534
                                       errors.ECODE_NORES)
11535

    
11536
      if be_new[constants.BE_AUTO_BALANCE]:
11537
        for node, nres in nodeinfo.items():
11538
          if node not in instance.secondary_nodes:
11539
            continue
11540
          nres.Raise("Can't get info from secondary node %s" % node,
11541
                     prereq=True, ecode=errors.ECODE_STATE)
11542
          (_, _, (nhvinfo, )) = nres.payload
11543
          if not isinstance(nhvinfo.get("memory_free", None), int):
11544
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11545
                                       " memory information" % node,
11546
                                       errors.ECODE_STATE)
11547
          #TODO(dynmem): do the appropriate check involving MINMEM
11548
          elif be_new[constants.BE_MAXMEM] > nhvinfo["memory_free"]:
11549
            raise errors.OpPrereqError("This change will prevent the instance"
11550
                                       " from failover to its secondary node"
11551
                                       " %s, due to not enough memory" % node,
11552
                                       errors.ECODE_STATE)
11553

    
11554
    # NIC processing
11555
    self.nic_pnew = {}
11556
    self.nic_pinst = {}
11557
    for nic_op, nic_dict in self.op.nics:
11558
      if nic_op == constants.DDM_REMOVE:
11559
        if not instance.nics:
11560
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11561
                                     errors.ECODE_INVAL)
11562
        continue
11563
      if nic_op != constants.DDM_ADD:
11564
        # an existing nic
11565
        if not instance.nics:
11566
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11567
                                     " no NICs" % nic_op,
11568
                                     errors.ECODE_INVAL)
11569
        if nic_op < 0 or nic_op >= len(instance.nics):
11570
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11571
                                     " are 0 to %d" %
11572
                                     (nic_op, len(instance.nics) - 1),
11573
                                     errors.ECODE_INVAL)
11574
        old_nic_params = instance.nics[nic_op].nicparams
11575
        old_nic_ip = instance.nics[nic_op].ip
11576
      else:
11577
        old_nic_params = {}
11578
        old_nic_ip = None
11579

    
11580
      update_params_dict = dict([(key, nic_dict[key])
11581
                                 for key in constants.NICS_PARAMETERS
11582
                                 if key in nic_dict])
11583

    
11584
      if "bridge" in nic_dict:
11585
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11586

    
11587
      new_nic_params = _GetUpdatedParams(old_nic_params,
11588
                                         update_params_dict)
11589
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11590
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11591
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11592
      self.nic_pinst[nic_op] = new_nic_params
11593
      self.nic_pnew[nic_op] = new_filled_nic_params
11594
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11595

    
11596
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11597
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11598
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11599
        if msg:
11600
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11601
          if self.op.force:
11602
            self.warn.append(msg)
11603
          else:
11604
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11605
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11606
        if constants.INIC_IP in nic_dict:
11607
          nic_ip = nic_dict[constants.INIC_IP]
11608
        else:
11609
          nic_ip = old_nic_ip
11610
        if nic_ip is None:
11611
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11612
                                     " on a routed nic", errors.ECODE_INVAL)
11613
      if constants.INIC_MAC in nic_dict:
11614
        nic_mac = nic_dict[constants.INIC_MAC]
11615
        if nic_mac is None:
11616
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11617
                                     errors.ECODE_INVAL)
11618
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11619
          # otherwise generate the mac
11620
          nic_dict[constants.INIC_MAC] = \
11621
            self.cfg.GenerateMAC(self.proc.GetECId())
11622
        else:
11623
          # or validate/reserve the current one
11624
          try:
11625
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11626
          except errors.ReservationError:
11627
            raise errors.OpPrereqError("MAC address %s already in use"
11628
                                       " in cluster" % nic_mac,
11629
                                       errors.ECODE_NOTUNIQUE)
11630

    
11631
    # DISK processing
11632
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11633
      raise errors.OpPrereqError("Disk operations not supported for"
11634
                                 " diskless instances",
11635
                                 errors.ECODE_INVAL)
11636
    for disk_op, _ in self.op.disks:
11637
      if disk_op == constants.DDM_REMOVE:
11638
        if len(instance.disks) == 1:
11639
          raise errors.OpPrereqError("Cannot remove the last disk of"
11640
                                     " an instance", errors.ECODE_INVAL)
11641
        _CheckInstanceState(self, instance, INSTANCE_DOWN,
11642
                            msg="cannot remove disks")
11643

    
11644
      if (disk_op == constants.DDM_ADD and
11645
          len(instance.disks) >= constants.MAX_DISKS):
11646
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11647
                                   " add more" % constants.MAX_DISKS,
11648
                                   errors.ECODE_STATE)
11649
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11650
        # an existing disk
11651
        if disk_op < 0 or disk_op >= len(instance.disks):
11652
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11653
                                     " are 0 to %d" %
11654
                                     (disk_op, len(instance.disks)),
11655
                                     errors.ECODE_INVAL)
11656

    
11657
    # disabling the instance
11658
    if self.op.offline_inst:
11659
      _CheckInstanceState(self, instance, INSTANCE_DOWN,
11660
                          msg="cannot change instance state to offline")
11661

    
11662
    # enabling the instance
11663
    if self.op.online_inst:
11664
      _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11665
                          msg="cannot make instance go online")
11666

    
11667
  def _ConvertPlainToDrbd(self, feedback_fn):
11668
    """Converts an instance from plain to drbd.
11669

11670
    """
11671
    feedback_fn("Converting template to drbd")
11672
    instance = self.instance
11673
    pnode = instance.primary_node
11674
    snode = self.op.remote_node
11675

    
11676
    assert instance.disk_template == constants.DT_PLAIN
11677

    
11678
    # create a fake disk info for _GenerateDiskTemplate
11679
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11680
                  constants.IDISK_VG: d.logical_id[0]}
11681
                 for d in instance.disks]
11682
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11683
                                      instance.name, pnode, [snode],
11684
                                      disk_info, None, None, 0, feedback_fn,
11685
                                      self.diskparams)
11686
    info = _GetInstanceInfoText(instance)
11687
    feedback_fn("Creating aditional volumes...")
11688
    # first, create the missing data and meta devices
11689
    for disk in new_disks:
11690
      # unfortunately this is... not too nice
11691
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11692
                            info, True)
11693
      for child in disk.children:
11694
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11695
    # at this stage, all new LVs have been created, we can rename the
11696
    # old ones
11697
    feedback_fn("Renaming original volumes...")
11698
    rename_list = [(o, n.children[0].logical_id)
11699
                   for (o, n) in zip(instance.disks, new_disks)]
11700
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11701
    result.Raise("Failed to rename original LVs")
11702

    
11703
    feedback_fn("Initializing DRBD devices...")
11704
    # all child devices are in place, we can now create the DRBD devices
11705
    for disk in new_disks:
11706
      for node in [pnode, snode]:
11707
        f_create = node == pnode
11708
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11709

    
11710
    # at this point, the instance has been modified
11711
    instance.disk_template = constants.DT_DRBD8
11712
    instance.disks = new_disks
11713
    self.cfg.Update(instance, feedback_fn)
11714

    
11715
    # Release node locks while waiting for sync
11716
    _ReleaseLocks(self, locking.LEVEL_NODE)
11717

    
11718
    # disks are created, waiting for sync
11719
    disk_abort = not _WaitForSync(self, instance,
11720
                                  oneshot=not self.op.wait_for_sync)
11721
    if disk_abort:
11722
      raise errors.OpExecError("There are some degraded disks for"
11723
                               " this instance, please cleanup manually")
11724

    
11725
    # Node resource locks will be released by caller
11726

    
11727
  def _ConvertDrbdToPlain(self, feedback_fn):
11728
    """Converts an instance from drbd to plain.
11729

11730
    """
11731
    instance = self.instance
11732

    
11733
    assert len(instance.secondary_nodes) == 1
11734
    assert instance.disk_template == constants.DT_DRBD8
11735

    
11736
    pnode = instance.primary_node
11737
    snode = instance.secondary_nodes[0]
11738
    feedback_fn("Converting template to plain")
11739

    
11740
    old_disks = instance.disks
11741
    new_disks = [d.children[0] for d in old_disks]
11742

    
11743
    # copy over size and mode
11744
    for parent, child in zip(old_disks, new_disks):
11745
      child.size = parent.size
11746
      child.mode = parent.mode
11747

    
11748
    # update instance structure
11749
    instance.disks = new_disks
11750
    instance.disk_template = constants.DT_PLAIN
11751
    self.cfg.Update(instance, feedback_fn)
11752

    
11753
    # Release locks in case removing disks takes a while
11754
    _ReleaseLocks(self, locking.LEVEL_NODE)
11755

    
11756
    feedback_fn("Removing volumes on the secondary node...")
11757
    for disk in old_disks:
11758
      self.cfg.SetDiskID(disk, snode)
11759
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11760
      if msg:
11761
        self.LogWarning("Could not remove block device %s on node %s,"
11762
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11763

    
11764
    feedback_fn("Removing unneeded volumes on the primary node...")
11765
    for idx, disk in enumerate(old_disks):
11766
      meta = disk.children[1]
11767
      self.cfg.SetDiskID(meta, pnode)
11768
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11769
      if msg:
11770
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11771
                        " continuing anyway: %s", idx, pnode, msg)
11772

    
11773
    # this is a DRBD disk, return its port to the pool
11774
    for disk in old_disks:
11775
      tcp_port = disk.logical_id[2]
11776
      self.cfg.AddTcpUdpPort(tcp_port)
11777

    
11778
    # Node resource locks will be released by caller
11779

    
11780
  def Exec(self, feedback_fn):
11781
    """Modifies an instance.
11782

11783
    All parameters take effect only at the next restart of the instance.
11784

11785
    """
11786
    # Process here the warnings from CheckPrereq, as we don't have a
11787
    # feedback_fn there.
11788
    for warn in self.warn:
11789
      feedback_fn("WARNING: %s" % warn)
11790

    
11791
    assert ((self.op.disk_template is None) ^
11792
            bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
11793
      "Not owning any node resource locks"
11794

    
11795
    result = []
11796
    instance = self.instance
11797
    # disk changes
11798
    for disk_op, disk_dict in self.op.disks:
11799
      if disk_op == constants.DDM_REMOVE:
11800
        # remove the last disk
11801
        device = instance.disks.pop()
11802
        device_idx = len(instance.disks)
11803
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11804
          self.cfg.SetDiskID(disk, node)
11805
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11806
          if msg:
11807
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11808
                            " continuing anyway", device_idx, node, msg)
11809
        result.append(("disk/%d" % device_idx, "remove"))
11810

    
11811
        # if this is a DRBD disk, return its port to the pool
11812
        if device.dev_type in constants.LDS_DRBD:
11813
          tcp_port = device.logical_id[2]
11814
          self.cfg.AddTcpUdpPort(tcp_port)
11815
      elif disk_op == constants.DDM_ADD:
11816
        # add a new disk
11817
        if instance.disk_template in (constants.DT_FILE,
11818
                                        constants.DT_SHARED_FILE):
11819
          file_driver, file_path = instance.disks[0].logical_id
11820
          file_path = os.path.dirname(file_path)
11821
        else:
11822
          file_driver = file_path = None
11823
        disk_idx_base = len(instance.disks)
11824
        new_disk = _GenerateDiskTemplate(self,
11825
                                         instance.disk_template,
11826
                                         instance.name, instance.primary_node,
11827
                                         instance.secondary_nodes,
11828
                                         [disk_dict],
11829
                                         file_path,
11830
                                         file_driver,
11831
                                         disk_idx_base,
11832
                                         feedback_fn,
11833
                                         self.diskparams)[0]
11834
        instance.disks.append(new_disk)
11835
        info = _GetInstanceInfoText(instance)
11836

    
11837
        logging.info("Creating volume %s for instance %s",
11838
                     new_disk.iv_name, instance.name)
11839
        # Note: this needs to be kept in sync with _CreateDisks
11840
        #HARDCODE
11841
        for node in instance.all_nodes:
11842
          f_create = node == instance.primary_node
11843
          try:
11844
            _CreateBlockDev(self, node, instance, new_disk,
11845
                            f_create, info, f_create)
11846
          except errors.OpExecError, err:
11847
            self.LogWarning("Failed to create volume %s (%s) on"
11848
                            " node %s: %s",
11849
                            new_disk.iv_name, new_disk, node, err)
11850
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11851
                       (new_disk.size, new_disk.mode)))
11852
      else:
11853
        # change a given disk
11854
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11855
        result.append(("disk.mode/%d" % disk_op,
11856
                       disk_dict[constants.IDISK_MODE]))
11857

    
11858
    if self.op.disk_template:
11859
      if __debug__:
11860
        check_nodes = set(instance.all_nodes)
11861
        if self.op.remote_node:
11862
          check_nodes.add(self.op.remote_node)
11863
        for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
11864
          owned = self.owned_locks(level)
11865
          assert not (check_nodes - owned), \
11866
            ("Not owning the correct locks, owning %r, expected at least %r" %
11867
             (owned, check_nodes))
11868

    
11869
      r_shut = _ShutdownInstanceDisks(self, instance)
11870
      if not r_shut:
11871
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11872
                                 " proceed with disk template conversion")
11873
      mode = (instance.disk_template, self.op.disk_template)
11874
      try:
11875
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11876
      except:
11877
        self.cfg.ReleaseDRBDMinors(instance.name)
11878
        raise
11879
      result.append(("disk_template", self.op.disk_template))
11880

    
11881
      assert instance.disk_template == self.op.disk_template, \
11882
        ("Expected disk template '%s', found '%s'" %
11883
         (self.op.disk_template, instance.disk_template))
11884

    
11885
    # Release node and resource locks if there are any (they might already have
11886
    # been released during disk conversion)
11887
    _ReleaseLocks(self, locking.LEVEL_NODE)
11888
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11889

    
11890
    # NIC changes
11891
    for nic_op, nic_dict in self.op.nics:
11892
      if nic_op == constants.DDM_REMOVE:
11893
        # remove the last nic
11894
        del instance.nics[-1]
11895
        result.append(("nic.%d" % len(instance.nics), "remove"))
11896
      elif nic_op == constants.DDM_ADD:
11897
        # mac and bridge should be set, by now
11898
        mac = nic_dict[constants.INIC_MAC]
11899
        ip = nic_dict.get(constants.INIC_IP, None)
11900
        nicparams = self.nic_pinst[constants.DDM_ADD]
11901
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11902
        instance.nics.append(new_nic)
11903
        result.append(("nic.%d" % (len(instance.nics) - 1),
11904
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11905
                       (new_nic.mac, new_nic.ip,
11906
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11907
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11908
                       )))
11909
      else:
11910
        for key in (constants.INIC_MAC, constants.INIC_IP):
11911
          if key in nic_dict:
11912
            setattr(instance.nics[nic_op], key, nic_dict[key])
11913
        if nic_op in self.nic_pinst:
11914
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11915
        for key, val in nic_dict.iteritems():
11916
          result.append(("nic.%s/%d" % (key, nic_op), val))
11917

    
11918
    # hvparams changes
11919
    if self.op.hvparams:
11920
      instance.hvparams = self.hv_inst
11921
      for key, val in self.op.hvparams.iteritems():
11922
        result.append(("hv/%s" % key, val))
11923

    
11924
    # beparams changes
11925
    if self.op.beparams:
11926
      instance.beparams = self.be_inst
11927
      for key, val in self.op.beparams.iteritems():
11928
        result.append(("be/%s" % key, val))
11929

    
11930
    # OS change
11931
    if self.op.os_name:
11932
      instance.os = self.op.os_name
11933

    
11934
    # osparams changes
11935
    if self.op.osparams:
11936
      instance.osparams = self.os_inst
11937
      for key, val in self.op.osparams.iteritems():
11938
        result.append(("os/%s" % key, val))
11939

    
11940
    # online/offline instance
11941
    if self.op.online_inst:
11942
      self.cfg.MarkInstanceDown(instance.name)
11943
      result.append(("admin_state", constants.ADMINST_DOWN))
11944
    if self.op.offline_inst:
11945
      self.cfg.MarkInstanceOffline(instance.name)
11946
      result.append(("admin_state", constants.ADMINST_OFFLINE))
11947

    
11948
    self.cfg.Update(instance, feedback_fn)
11949

    
11950
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
11951
                self.owned_locks(locking.LEVEL_NODE)), \
11952
      "All node locks should have been released by now"
11953

    
11954
    return result
11955

    
11956
  _DISK_CONVERSIONS = {
11957
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11958
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11959
    }
11960

    
11961

    
11962
class LUInstanceChangeGroup(LogicalUnit):
11963
  HPATH = "instance-change-group"
11964
  HTYPE = constants.HTYPE_INSTANCE
11965
  REQ_BGL = False
11966

    
11967
  def ExpandNames(self):
11968
    self.share_locks = _ShareAll()
11969
    self.needed_locks = {
11970
      locking.LEVEL_NODEGROUP: [],
11971
      locking.LEVEL_NODE: [],
11972
      }
11973

    
11974
    self._ExpandAndLockInstance()
11975

    
11976
    if self.op.target_groups:
11977
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11978
                                  self.op.target_groups)
11979
    else:
11980
      self.req_target_uuids = None
11981

    
11982
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11983

    
11984
  def DeclareLocks(self, level):
11985
    if level == locking.LEVEL_NODEGROUP:
11986
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11987

    
11988
      if self.req_target_uuids:
11989
        lock_groups = set(self.req_target_uuids)
11990

    
11991
        # Lock all groups used by instance optimistically; this requires going
11992
        # via the node before it's locked, requiring verification later on
11993
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11994
        lock_groups.update(instance_groups)
11995
      else:
11996
        # No target groups, need to lock all of them
11997
        lock_groups = locking.ALL_SET
11998

    
11999
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12000

    
12001
    elif level == locking.LEVEL_NODE:
12002
      if self.req_target_uuids:
12003
        # Lock all nodes used by instances
12004
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12005
        self._LockInstancesNodes()
12006

    
12007
        # Lock all nodes in all potential target groups
12008
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
12009
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
12010
        member_nodes = [node_name
12011
                        for group in lock_groups
12012
                        for node_name in self.cfg.GetNodeGroup(group).members]
12013
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12014
      else:
12015
        # Lock all nodes as all groups are potential targets
12016
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12017

    
12018
  def CheckPrereq(self):
12019
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12020
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12021
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12022

    
12023
    assert (self.req_target_uuids is None or
12024
            owned_groups.issuperset(self.req_target_uuids))
12025
    assert owned_instances == set([self.op.instance_name])
12026

    
12027
    # Get instance information
12028
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
12029

    
12030
    # Check if node groups for locked instance are still correct
12031
    assert owned_nodes.issuperset(self.instance.all_nodes), \
12032
      ("Instance %s's nodes changed while we kept the lock" %
12033
       self.op.instance_name)
12034

    
12035
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
12036
                                           owned_groups)
12037

    
12038
    if self.req_target_uuids:
12039
      # User requested specific target groups
12040
      self.target_uuids = self.req_target_uuids
12041
    else:
12042
      # All groups except those used by the instance are potential targets
12043
      self.target_uuids = owned_groups - inst_groups
12044

    
12045
    conflicting_groups = self.target_uuids & inst_groups
12046
    if conflicting_groups:
12047
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
12048
                                 " used by the instance '%s'" %
12049
                                 (utils.CommaJoin(conflicting_groups),
12050
                                  self.op.instance_name),
12051
                                 errors.ECODE_INVAL)
12052

    
12053
    if not self.target_uuids:
12054
      raise errors.OpPrereqError("There are no possible target groups",
12055
                                 errors.ECODE_INVAL)
12056

    
12057
  def BuildHooksEnv(self):
12058
    """Build hooks env.
12059

12060
    """
12061
    assert self.target_uuids
12062

    
12063
    env = {
12064
      "TARGET_GROUPS": " ".join(self.target_uuids),
12065
      }
12066

    
12067
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12068

    
12069
    return env
12070

    
12071
  def BuildHooksNodes(self):
12072
    """Build hooks nodes.
12073

12074
    """
12075
    mn = self.cfg.GetMasterNode()
12076
    return ([mn], [mn])
12077

    
12078
  def Exec(self, feedback_fn):
12079
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12080

    
12081
    assert instances == [self.op.instance_name], "Instance not locked"
12082

    
12083
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12084
                     instances=instances, target_groups=list(self.target_uuids))
12085

    
12086
    ial.Run(self.op.iallocator)
12087

    
12088
    if not ial.success:
12089
      raise errors.OpPrereqError("Can't compute solution for changing group of"
12090
                                 " instance '%s' using iallocator '%s': %s" %
12091
                                 (self.op.instance_name, self.op.iallocator,
12092
                                  ial.info),
12093
                                 errors.ECODE_NORES)
12094

    
12095
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12096

    
12097
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
12098
                 " instance '%s'", len(jobs), self.op.instance_name)
12099

    
12100
    return ResultWithJobs(jobs)
12101

    
12102

    
12103
class LUBackupQuery(NoHooksLU):
12104
  """Query the exports list
12105

12106
  """
12107
  REQ_BGL = False
12108

    
12109
  def ExpandNames(self):
12110
    self.needed_locks = {}
12111
    self.share_locks[locking.LEVEL_NODE] = 1
12112
    if not self.op.nodes:
12113
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12114
    else:
12115
      self.needed_locks[locking.LEVEL_NODE] = \
12116
        _GetWantedNodes(self, self.op.nodes)
12117

    
12118
  def Exec(self, feedback_fn):
12119
    """Compute the list of all the exported system images.
12120

12121
    @rtype: dict
12122
    @return: a dictionary with the structure node->(export-list)
12123
        where export-list is a list of the instances exported on
12124
        that node.
12125

12126
    """
12127
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
12128
    rpcresult = self.rpc.call_export_list(self.nodes)
12129
    result = {}
12130
    for node in rpcresult:
12131
      if rpcresult[node].fail_msg:
12132
        result[node] = False
12133
      else:
12134
        result[node] = rpcresult[node].payload
12135

    
12136
    return result
12137

    
12138

    
12139
class LUBackupPrepare(NoHooksLU):
12140
  """Prepares an instance for an export and returns useful information.
12141

12142
  """
12143
  REQ_BGL = False
12144

    
12145
  def ExpandNames(self):
12146
    self._ExpandAndLockInstance()
12147

    
12148
  def CheckPrereq(self):
12149
    """Check prerequisites.
12150

12151
    """
12152
    instance_name = self.op.instance_name
12153

    
12154
    self.instance = self.cfg.GetInstanceInfo(instance_name)
12155
    assert self.instance is not None, \
12156
          "Cannot retrieve locked instance %s" % self.op.instance_name
12157
    _CheckNodeOnline(self, self.instance.primary_node)
12158

    
12159
    self._cds = _GetClusterDomainSecret()
12160

    
12161
  def Exec(self, feedback_fn):
12162
    """Prepares an instance for an export.
12163

12164
    """
12165
    instance = self.instance
12166

    
12167
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
12168
      salt = utils.GenerateSecret(8)
12169

    
12170
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12171
      result = self.rpc.call_x509_cert_create(instance.primary_node,
12172
                                              constants.RIE_CERT_VALIDITY)
12173
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
12174

    
12175
      (name, cert_pem) = result.payload
12176

    
12177
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12178
                                             cert_pem)
12179

    
12180
      return {
12181
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12182
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12183
                          salt),
12184
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12185
        }
12186

    
12187
    return None
12188

    
12189

    
12190
class LUBackupExport(LogicalUnit):
12191
  """Export an instance to an image in the cluster.
12192

12193
  """
12194
  HPATH = "instance-export"
12195
  HTYPE = constants.HTYPE_INSTANCE
12196
  REQ_BGL = False
12197

    
12198
  def CheckArguments(self):
12199
    """Check the arguments.
12200

12201
    """
12202
    self.x509_key_name = self.op.x509_key_name
12203
    self.dest_x509_ca_pem = self.op.destination_x509_ca
12204

    
12205
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
12206
      if not self.x509_key_name:
12207
        raise errors.OpPrereqError("Missing X509 key name for encryption",
12208
                                   errors.ECODE_INVAL)
12209

    
12210
      if not self.dest_x509_ca_pem:
12211
        raise errors.OpPrereqError("Missing destination X509 CA",
12212
                                   errors.ECODE_INVAL)
12213

    
12214
  def ExpandNames(self):
12215
    self._ExpandAndLockInstance()
12216

    
12217
    # Lock all nodes for local exports
12218
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12219
      # FIXME: lock only instance primary and destination node
12220
      #
12221
      # Sad but true, for now we have do lock all nodes, as we don't know where
12222
      # the previous export might be, and in this LU we search for it and
12223
      # remove it from its current node. In the future we could fix this by:
12224
      #  - making a tasklet to search (share-lock all), then create the
12225
      #    new one, then one to remove, after
12226
      #  - removing the removal operation altogether
12227
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12228

    
12229
  def DeclareLocks(self, level):
12230
    """Last minute lock declaration."""
12231
    # All nodes are locked anyway, so nothing to do here.
12232

    
12233
  def BuildHooksEnv(self):
12234
    """Build hooks env.
12235

12236
    This will run on the master, primary node and target node.
12237

12238
    """
12239
    env = {
12240
      "EXPORT_MODE": self.op.mode,
12241
      "EXPORT_NODE": self.op.target_node,
12242
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12243
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12244
      # TODO: Generic function for boolean env variables
12245
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12246
      }
12247

    
12248
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12249

    
12250
    return env
12251

    
12252
  def BuildHooksNodes(self):
12253
    """Build hooks nodes.
12254

12255
    """
12256
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12257

    
12258
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12259
      nl.append(self.op.target_node)
12260

    
12261
    return (nl, nl)
12262

    
12263
  def CheckPrereq(self):
12264
    """Check prerequisites.
12265

12266
    This checks that the instance and node names are valid.
12267

12268
    """
12269
    instance_name = self.op.instance_name
12270

    
12271
    self.instance = self.cfg.GetInstanceInfo(instance_name)
12272
    assert self.instance is not None, \
12273
          "Cannot retrieve locked instance %s" % self.op.instance_name
12274
    _CheckNodeOnline(self, self.instance.primary_node)
12275

    
12276
    if (self.op.remove_instance and
12277
        self.instance.admin_state == constants.ADMINST_UP and
12278
        not self.op.shutdown):
12279
      raise errors.OpPrereqError("Can not remove instance without shutting it"
12280
                                 " down before")
12281

    
12282
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12283
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12284
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12285
      assert self.dst_node is not None
12286

    
12287
      _CheckNodeOnline(self, self.dst_node.name)
12288
      _CheckNodeNotDrained(self, self.dst_node.name)
12289

    
12290
      self._cds = None
12291
      self.dest_disk_info = None
12292
      self.dest_x509_ca = None
12293

    
12294
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12295
      self.dst_node = None
12296

    
12297
      if len(self.op.target_node) != len(self.instance.disks):
12298
        raise errors.OpPrereqError(("Received destination information for %s"
12299
                                    " disks, but instance %s has %s disks") %
12300
                                   (len(self.op.target_node), instance_name,
12301
                                    len(self.instance.disks)),
12302
                                   errors.ECODE_INVAL)
12303

    
12304
      cds = _GetClusterDomainSecret()
12305

    
12306
      # Check X509 key name
12307
      try:
12308
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12309
      except (TypeError, ValueError), err:
12310
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12311

    
12312
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12313
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12314
                                   errors.ECODE_INVAL)
12315

    
12316
      # Load and verify CA
12317
      try:
12318
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12319
      except OpenSSL.crypto.Error, err:
12320
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12321
                                   (err, ), errors.ECODE_INVAL)
12322

    
12323
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12324
      if errcode is not None:
12325
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12326
                                   (msg, ), errors.ECODE_INVAL)
12327

    
12328
      self.dest_x509_ca = cert
12329

    
12330
      # Verify target information
12331
      disk_info = []
12332
      for idx, disk_data in enumerate(self.op.target_node):
12333
        try:
12334
          (host, port, magic) = \
12335
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12336
        except errors.GenericError, err:
12337
          raise errors.OpPrereqError("Target info for disk %s: %s" %
12338
                                     (idx, err), errors.ECODE_INVAL)
12339

    
12340
        disk_info.append((host, port, magic))
12341

    
12342
      assert len(disk_info) == len(self.op.target_node)
12343
      self.dest_disk_info = disk_info
12344

    
12345
    else:
12346
      raise errors.ProgrammerError("Unhandled export mode %r" %
12347
                                   self.op.mode)
12348

    
12349
    # instance disk type verification
12350
    # TODO: Implement export support for file-based disks
12351
    for disk in self.instance.disks:
12352
      if disk.dev_type == constants.LD_FILE:
12353
        raise errors.OpPrereqError("Export not supported for instances with"
12354
                                   " file-based disks", errors.ECODE_INVAL)
12355

    
12356
  def _CleanupExports(self, feedback_fn):
12357
    """Removes exports of current instance from all other nodes.
12358

12359
    If an instance in a cluster with nodes A..D was exported to node C, its
12360
    exports will be removed from the nodes A, B and D.
12361

12362
    """
12363
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
12364

    
12365
    nodelist = self.cfg.GetNodeList()
12366
    nodelist.remove(self.dst_node.name)
12367

    
12368
    # on one-node clusters nodelist will be empty after the removal
12369
    # if we proceed the backup would be removed because OpBackupQuery
12370
    # substitutes an empty list with the full cluster node list.
12371
    iname = self.instance.name
12372
    if nodelist:
12373
      feedback_fn("Removing old exports for instance %s" % iname)
12374
      exportlist = self.rpc.call_export_list(nodelist)
12375
      for node in exportlist:
12376
        if exportlist[node].fail_msg:
12377
          continue
12378
        if iname in exportlist[node].payload:
12379
          msg = self.rpc.call_export_remove(node, iname).fail_msg
12380
          if msg:
12381
            self.LogWarning("Could not remove older export for instance %s"
12382
                            " on node %s: %s", iname, node, msg)
12383

    
12384
  def Exec(self, feedback_fn):
12385
    """Export an instance to an image in the cluster.
12386

12387
    """
12388
    assert self.op.mode in constants.EXPORT_MODES
12389

    
12390
    instance = self.instance
12391
    src_node = instance.primary_node
12392

    
12393
    if self.op.shutdown:
12394
      # shutdown the instance, but not the disks
12395
      feedback_fn("Shutting down instance %s" % instance.name)
12396
      result = self.rpc.call_instance_shutdown(src_node, instance,
12397
                                               self.op.shutdown_timeout)
12398
      # TODO: Maybe ignore failures if ignore_remove_failures is set
12399
      result.Raise("Could not shutdown instance %s on"
12400
                   " node %s" % (instance.name, src_node))
12401

    
12402
    # set the disks ID correctly since call_instance_start needs the
12403
    # correct drbd minor to create the symlinks
12404
    for disk in instance.disks:
12405
      self.cfg.SetDiskID(disk, src_node)
12406

    
12407
    activate_disks = (instance.admin_state != constants.ADMINST_UP)
12408

    
12409
    if activate_disks:
12410
      # Activate the instance disks if we'exporting a stopped instance
12411
      feedback_fn("Activating disks for %s" % instance.name)
12412
      _StartInstanceDisks(self, instance, None)
12413

    
12414
    try:
12415
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12416
                                                     instance)
12417

    
12418
      helper.CreateSnapshots()
12419
      try:
12420
        if (self.op.shutdown and
12421
            instance.admin_state == constants.ADMINST_UP and
12422
            not self.op.remove_instance):
12423
          assert not activate_disks
12424
          feedback_fn("Starting instance %s" % instance.name)
12425
          result = self.rpc.call_instance_start(src_node,
12426
                                                (instance, None, None), False)
12427
          msg = result.fail_msg
12428
          if msg:
12429
            feedback_fn("Failed to start instance: %s" % msg)
12430
            _ShutdownInstanceDisks(self, instance)
12431
            raise errors.OpExecError("Could not start instance: %s" % msg)
12432

    
12433
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
12434
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12435
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12436
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
12437
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12438

    
12439
          (key_name, _, _) = self.x509_key_name
12440

    
12441
          dest_ca_pem = \
12442
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12443
                                            self.dest_x509_ca)
12444

    
12445
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12446
                                                     key_name, dest_ca_pem,
12447
                                                     timeouts)
12448
      finally:
12449
        helper.Cleanup()
12450

    
12451
      # Check for backwards compatibility
12452
      assert len(dresults) == len(instance.disks)
12453
      assert compat.all(isinstance(i, bool) for i in dresults), \
12454
             "Not all results are boolean: %r" % dresults
12455

    
12456
    finally:
12457
      if activate_disks:
12458
        feedback_fn("Deactivating disks for %s" % instance.name)
12459
        _ShutdownInstanceDisks(self, instance)
12460

    
12461
    if not (compat.all(dresults) and fin_resu):
12462
      failures = []
12463
      if not fin_resu:
12464
        failures.append("export finalization")
12465
      if not compat.all(dresults):
12466
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12467
                               if not dsk)
12468
        failures.append("disk export: disk(s) %s" % fdsk)
12469

    
12470
      raise errors.OpExecError("Export failed, errors in %s" %
12471
                               utils.CommaJoin(failures))
12472

    
12473
    # At this point, the export was successful, we can cleanup/finish
12474

    
12475
    # Remove instance if requested
12476
    if self.op.remove_instance:
12477
      feedback_fn("Removing instance %s" % instance.name)
12478
      _RemoveInstance(self, feedback_fn, instance,
12479
                      self.op.ignore_remove_failures)
12480

    
12481
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12482
      self._CleanupExports(feedback_fn)
12483

    
12484
    return fin_resu, dresults
12485

    
12486

    
12487
class LUBackupRemove(NoHooksLU):
12488
  """Remove exports related to the named instance.
12489

12490
  """
12491
  REQ_BGL = False
12492

    
12493
  def ExpandNames(self):
12494
    self.needed_locks = {}
12495
    # We need all nodes to be locked in order for RemoveExport to work, but we
12496
    # don't need to lock the instance itself, as nothing will happen to it (and
12497
    # we can remove exports also for a removed instance)
12498
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12499

    
12500
  def Exec(self, feedback_fn):
12501
    """Remove any export.
12502

12503
    """
12504
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12505
    # If the instance was not found we'll try with the name that was passed in.
12506
    # This will only work if it was an FQDN, though.
12507
    fqdn_warn = False
12508
    if not instance_name:
12509
      fqdn_warn = True
12510
      instance_name = self.op.instance_name
12511

    
12512
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12513
    exportlist = self.rpc.call_export_list(locked_nodes)
12514
    found = False
12515
    for node in exportlist:
12516
      msg = exportlist[node].fail_msg
12517
      if msg:
12518
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12519
        continue
12520
      if instance_name in exportlist[node].payload:
12521
        found = True
12522
        result = self.rpc.call_export_remove(node, instance_name)
12523
        msg = result.fail_msg
12524
        if msg:
12525
          logging.error("Could not remove export for instance %s"
12526
                        " on node %s: %s", instance_name, node, msg)
12527

    
12528
    if fqdn_warn and not found:
12529
      feedback_fn("Export not found. If trying to remove an export belonging"
12530
                  " to a deleted instance please use its Fully Qualified"
12531
                  " Domain Name.")
12532

    
12533

    
12534
class LUGroupAdd(LogicalUnit):
12535
  """Logical unit for creating node groups.
12536

12537
  """
12538
  HPATH = "group-add"
12539
  HTYPE = constants.HTYPE_GROUP
12540
  REQ_BGL = False
12541

    
12542
  def ExpandNames(self):
12543
    # We need the new group's UUID here so that we can create and acquire the
12544
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12545
    # that it should not check whether the UUID exists in the configuration.
12546
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12547
    self.needed_locks = {}
12548
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12549

    
12550
  def CheckPrereq(self):
12551
    """Check prerequisites.
12552

12553
    This checks that the given group name is not an existing node group
12554
    already.
12555

12556
    """
12557
    try:
12558
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12559
    except errors.OpPrereqError:
12560
      pass
12561
    else:
12562
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12563
                                 " node group (UUID: %s)" %
12564
                                 (self.op.group_name, existing_uuid),
12565
                                 errors.ECODE_EXISTS)
12566

    
12567
    if self.op.ndparams:
12568
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12569

    
12570
    if self.op.diskparams:
12571
      for templ in constants.DISK_TEMPLATES:
12572
        if templ not in self.op.diskparams:
12573
          self.op.diskparams[templ] = {}
12574
        utils.ForceDictType(self.op.diskparams[templ], constants.DISK_DT_TYPES)
12575
    else:
12576
      self.op.diskparams = self.cfg.GetClusterInfo().diskparams
12577

    
12578
  def BuildHooksEnv(self):
12579
    """Build hooks env.
12580

12581
    """
12582
    return {
12583
      "GROUP_NAME": self.op.group_name,
12584
      }
12585

    
12586
  def BuildHooksNodes(self):
12587
    """Build hooks nodes.
12588

12589
    """
12590
    mn = self.cfg.GetMasterNode()
12591
    return ([mn], [mn])
12592

    
12593
  def Exec(self, feedback_fn):
12594
    """Add the node group to the cluster.
12595

12596
    """
12597
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12598
                                  uuid=self.group_uuid,
12599
                                  alloc_policy=self.op.alloc_policy,
12600
                                  ndparams=self.op.ndparams,
12601
                                  diskparams=self.op.diskparams)
12602

    
12603
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12604
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12605

    
12606

    
12607
class LUGroupAssignNodes(NoHooksLU):
12608
  """Logical unit for assigning nodes to groups.
12609

12610
  """
12611
  REQ_BGL = False
12612

    
12613
  def ExpandNames(self):
12614
    # These raise errors.OpPrereqError on their own:
12615
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12616
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12617

    
12618
    # We want to lock all the affected nodes and groups. We have readily
12619
    # available the list of nodes, and the *destination* group. To gather the
12620
    # list of "source" groups, we need to fetch node information later on.
12621
    self.needed_locks = {
12622
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12623
      locking.LEVEL_NODE: self.op.nodes,
12624
      }
12625

    
12626
  def DeclareLocks(self, level):
12627
    if level == locking.LEVEL_NODEGROUP:
12628
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12629

    
12630
      # Try to get all affected nodes' groups without having the group or node
12631
      # lock yet. Needs verification later in the code flow.
12632
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12633

    
12634
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12635

    
12636
  def CheckPrereq(self):
12637
    """Check prerequisites.
12638

12639
    """
12640
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12641
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12642
            frozenset(self.op.nodes))
12643

    
12644
    expected_locks = (set([self.group_uuid]) |
12645
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12646
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12647
    if actual_locks != expected_locks:
12648
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12649
                               " current groups are '%s', used to be '%s'" %
12650
                               (utils.CommaJoin(expected_locks),
12651
                                utils.CommaJoin(actual_locks)))
12652

    
12653
    self.node_data = self.cfg.GetAllNodesInfo()
12654
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12655
    instance_data = self.cfg.GetAllInstancesInfo()
12656

    
12657
    if self.group is None:
12658
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12659
                               (self.op.group_name, self.group_uuid))
12660

    
12661
    (new_splits, previous_splits) = \
12662
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12663
                                             for node in self.op.nodes],
12664
                                            self.node_data, instance_data)
12665

    
12666
    if new_splits:
12667
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12668

    
12669
      if not self.op.force:
12670
        raise errors.OpExecError("The following instances get split by this"
12671
                                 " change and --force was not given: %s" %
12672
                                 fmt_new_splits)
12673
      else:
12674
        self.LogWarning("This operation will split the following instances: %s",
12675
                        fmt_new_splits)
12676

    
12677
        if previous_splits:
12678
          self.LogWarning("In addition, these already-split instances continue"
12679
                          " to be split across groups: %s",
12680
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12681

    
12682
  def Exec(self, feedback_fn):
12683
    """Assign nodes to a new group.
12684

12685
    """
12686
    mods = [(node_name, self.group_uuid) for node_name in self.op.nodes]
12687

    
12688
    self.cfg.AssignGroupNodes(mods)
12689

    
12690
  @staticmethod
12691
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12692
    """Check for split instances after a node assignment.
12693

12694
    This method considers a series of node assignments as an atomic operation,
12695
    and returns information about split instances after applying the set of
12696
    changes.
12697

12698
    In particular, it returns information about newly split instances, and
12699
    instances that were already split, and remain so after the change.
12700

12701
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12702
    considered.
12703

12704
    @type changes: list of (node_name, new_group_uuid) pairs.
12705
    @param changes: list of node assignments to consider.
12706
    @param node_data: a dict with data for all nodes
12707
    @param instance_data: a dict with all instances to consider
12708
    @rtype: a two-tuple
12709
    @return: a list of instances that were previously okay and result split as a
12710
      consequence of this change, and a list of instances that were previously
12711
      split and this change does not fix.
12712

12713
    """
12714
    changed_nodes = dict((node, group) for node, group in changes
12715
                         if node_data[node].group != group)
12716

    
12717
    all_split_instances = set()
12718
    previously_split_instances = set()
12719

    
12720
    def InstanceNodes(instance):
12721
      return [instance.primary_node] + list(instance.secondary_nodes)
12722

    
12723
    for inst in instance_data.values():
12724
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12725
        continue
12726

    
12727
      instance_nodes = InstanceNodes(inst)
12728

    
12729
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12730
        previously_split_instances.add(inst.name)
12731

    
12732
      if len(set(changed_nodes.get(node, node_data[node].group)
12733
                 for node in instance_nodes)) > 1:
12734
        all_split_instances.add(inst.name)
12735

    
12736
    return (list(all_split_instances - previously_split_instances),
12737
            list(previously_split_instances & all_split_instances))
12738

    
12739

    
12740
class _GroupQuery(_QueryBase):
12741
  FIELDS = query.GROUP_FIELDS
12742

    
12743
  def ExpandNames(self, lu):
12744
    lu.needed_locks = {}
12745

    
12746
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12747
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12748

    
12749
    if not self.names:
12750
      self.wanted = [name_to_uuid[name]
12751
                     for name in utils.NiceSort(name_to_uuid.keys())]
12752
    else:
12753
      # Accept names to be either names or UUIDs.
12754
      missing = []
12755
      self.wanted = []
12756
      all_uuid = frozenset(self._all_groups.keys())
12757

    
12758
      for name in self.names:
12759
        if name in all_uuid:
12760
          self.wanted.append(name)
12761
        elif name in name_to_uuid:
12762
          self.wanted.append(name_to_uuid[name])
12763
        else:
12764
          missing.append(name)
12765

    
12766
      if missing:
12767
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12768
                                   utils.CommaJoin(missing),
12769
                                   errors.ECODE_NOENT)
12770

    
12771
  def DeclareLocks(self, lu, level):
12772
    pass
12773

    
12774
  def _GetQueryData(self, lu):
12775
    """Computes the list of node groups and their attributes.
12776

12777
    """
12778
    do_nodes = query.GQ_NODE in self.requested_data
12779
    do_instances = query.GQ_INST in self.requested_data
12780

    
12781
    group_to_nodes = None
12782
    group_to_instances = None
12783

    
12784
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12785
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12786
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12787
    # instance->node. Hence, we will need to process nodes even if we only need
12788
    # instance information.
12789
    if do_nodes or do_instances:
12790
      all_nodes = lu.cfg.GetAllNodesInfo()
12791
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12792
      node_to_group = {}
12793

    
12794
      for node in all_nodes.values():
12795
        if node.group in group_to_nodes:
12796
          group_to_nodes[node.group].append(node.name)
12797
          node_to_group[node.name] = node.group
12798

    
12799
      if do_instances:
12800
        all_instances = lu.cfg.GetAllInstancesInfo()
12801
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12802

    
12803
        for instance in all_instances.values():
12804
          node = instance.primary_node
12805
          if node in node_to_group:
12806
            group_to_instances[node_to_group[node]].append(instance.name)
12807

    
12808
        if not do_nodes:
12809
          # Do not pass on node information if it was not requested.
12810
          group_to_nodes = None
12811

    
12812
    return query.GroupQueryData([self._all_groups[uuid]
12813
                                 for uuid in self.wanted],
12814
                                group_to_nodes, group_to_instances)
12815

    
12816

    
12817
class LUGroupQuery(NoHooksLU):
12818
  """Logical unit for querying node groups.
12819

12820
  """
12821
  REQ_BGL = False
12822

    
12823
  def CheckArguments(self):
12824
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12825
                          self.op.output_fields, False)
12826

    
12827
  def ExpandNames(self):
12828
    self.gq.ExpandNames(self)
12829

    
12830
  def DeclareLocks(self, level):
12831
    self.gq.DeclareLocks(self, level)
12832

    
12833
  def Exec(self, feedback_fn):
12834
    return self.gq.OldStyleQuery(self)
12835

    
12836

    
12837
class LUGroupSetParams(LogicalUnit):
12838
  """Modifies the parameters of a node group.
12839

12840
  """
12841
  HPATH = "group-modify"
12842
  HTYPE = constants.HTYPE_GROUP
12843
  REQ_BGL = False
12844

    
12845
  def CheckArguments(self):
12846
    all_changes = [
12847
      self.op.ndparams,
12848
      self.op.diskparams,
12849
      self.op.alloc_policy,
12850
      ]
12851

    
12852
    if all_changes.count(None) == len(all_changes):
12853
      raise errors.OpPrereqError("Please pass at least one modification",
12854
                                 errors.ECODE_INVAL)
12855

    
12856
  def ExpandNames(self):
12857
    # This raises errors.OpPrereqError on its own:
12858
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12859

    
12860
    self.needed_locks = {
12861
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12862
      }
12863

    
12864
  def CheckPrereq(self):
12865
    """Check prerequisites.
12866

12867
    """
12868
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12869

    
12870
    if self.group is None:
12871
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12872
                               (self.op.group_name, self.group_uuid))
12873

    
12874
    if self.op.ndparams:
12875
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12876
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12877
      self.new_ndparams = new_ndparams
12878

    
12879
    if self.op.diskparams:
12880
      self.new_diskparams = dict()
12881
      for templ in constants.DISK_TEMPLATES:
12882
        if templ not in self.op.diskparams:
12883
          self.op.diskparams[templ] = {}
12884
        new_templ_params = _GetUpdatedParams(self.group.diskparams[templ],
12885
                                             self.op.diskparams[templ])
12886
        utils.ForceDictType(new_templ_params, constants.DISK_DT_TYPES)
12887
        self.new_diskparams[templ] = new_templ_params
12888

    
12889
  def BuildHooksEnv(self):
12890
    """Build hooks env.
12891

12892
    """
12893
    return {
12894
      "GROUP_NAME": self.op.group_name,
12895
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12896
      }
12897

    
12898
  def BuildHooksNodes(self):
12899
    """Build hooks nodes.
12900

12901
    """
12902
    mn = self.cfg.GetMasterNode()
12903
    return ([mn], [mn])
12904

    
12905
  def Exec(self, feedback_fn):
12906
    """Modifies the node group.
12907

12908
    """
12909
    result = []
12910

    
12911
    if self.op.ndparams:
12912
      self.group.ndparams = self.new_ndparams
12913
      result.append(("ndparams", str(self.group.ndparams)))
12914

    
12915
    if self.op.diskparams:
12916
      self.group.diskparams = self.new_diskparams
12917
      result.append(("diskparams", str(self.group.diskparams)))
12918

    
12919
    if self.op.alloc_policy:
12920
      self.group.alloc_policy = self.op.alloc_policy
12921

    
12922
    self.cfg.Update(self.group, feedback_fn)
12923
    return result
12924

    
12925

    
12926
class LUGroupRemove(LogicalUnit):
12927
  HPATH = "group-remove"
12928
  HTYPE = constants.HTYPE_GROUP
12929
  REQ_BGL = False
12930

    
12931
  def ExpandNames(self):
12932
    # This will raises errors.OpPrereqError on its own:
12933
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12934
    self.needed_locks = {
12935
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12936
      }
12937

    
12938
  def CheckPrereq(self):
12939
    """Check prerequisites.
12940

12941
    This checks that the given group name exists as a node group, that is
12942
    empty (i.e., contains no nodes), and that is not the last group of the
12943
    cluster.
12944

12945
    """
12946
    # Verify that the group is empty.
12947
    group_nodes = [node.name
12948
                   for node in self.cfg.GetAllNodesInfo().values()
12949
                   if node.group == self.group_uuid]
12950

    
12951
    if group_nodes:
12952
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12953
                                 " nodes: %s" %
12954
                                 (self.op.group_name,
12955
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12956
                                 errors.ECODE_STATE)
12957

    
12958
    # Verify the cluster would not be left group-less.
12959
    if len(self.cfg.GetNodeGroupList()) == 1:
12960
      raise errors.OpPrereqError("Group '%s' is the only group,"
12961
                                 " cannot be removed" %
12962
                                 self.op.group_name,
12963
                                 errors.ECODE_STATE)
12964

    
12965
  def BuildHooksEnv(self):
12966
    """Build hooks env.
12967

12968
    """
12969
    return {
12970
      "GROUP_NAME": self.op.group_name,
12971
      }
12972

    
12973
  def BuildHooksNodes(self):
12974
    """Build hooks nodes.
12975

12976
    """
12977
    mn = self.cfg.GetMasterNode()
12978
    return ([mn], [mn])
12979

    
12980
  def Exec(self, feedback_fn):
12981
    """Remove the node group.
12982

12983
    """
12984
    try:
12985
      self.cfg.RemoveNodeGroup(self.group_uuid)
12986
    except errors.ConfigurationError:
12987
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12988
                               (self.op.group_name, self.group_uuid))
12989

    
12990
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12991

    
12992

    
12993
class LUGroupRename(LogicalUnit):
12994
  HPATH = "group-rename"
12995
  HTYPE = constants.HTYPE_GROUP
12996
  REQ_BGL = False
12997

    
12998
  def ExpandNames(self):
12999
    # This raises errors.OpPrereqError on its own:
13000
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13001

    
13002
    self.needed_locks = {
13003
      locking.LEVEL_NODEGROUP: [self.group_uuid],
13004
      }
13005

    
13006
  def CheckPrereq(self):
13007
    """Check prerequisites.
13008

13009
    Ensures requested new name is not yet used.
13010

13011
    """
13012
    try:
13013
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
13014
    except errors.OpPrereqError:
13015
      pass
13016
    else:
13017
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
13018
                                 " node group (UUID: %s)" %
13019
                                 (self.op.new_name, new_name_uuid),
13020
                                 errors.ECODE_EXISTS)
13021

    
13022
  def BuildHooksEnv(self):
13023
    """Build hooks env.
13024

13025
    """
13026
    return {
13027
      "OLD_NAME": self.op.group_name,
13028
      "NEW_NAME": self.op.new_name,
13029
      }
13030

    
13031
  def BuildHooksNodes(self):
13032
    """Build hooks nodes.
13033

13034
    """
13035
    mn = self.cfg.GetMasterNode()
13036

    
13037
    all_nodes = self.cfg.GetAllNodesInfo()
13038
    all_nodes.pop(mn, None)
13039

    
13040
    run_nodes = [mn]
13041
    run_nodes.extend(node.name for node in all_nodes.values()
13042
                     if node.group == self.group_uuid)
13043

    
13044
    return (run_nodes, run_nodes)
13045

    
13046
  def Exec(self, feedback_fn):
13047
    """Rename the node group.
13048

13049
    """
13050
    group = self.cfg.GetNodeGroup(self.group_uuid)
13051

    
13052
    if group is None:
13053
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
13054
                               (self.op.group_name, self.group_uuid))
13055

    
13056
    group.name = self.op.new_name
13057
    self.cfg.Update(group, feedback_fn)
13058

    
13059
    return self.op.new_name
13060

    
13061

    
13062
class LUGroupEvacuate(LogicalUnit):
13063
  HPATH = "group-evacuate"
13064
  HTYPE = constants.HTYPE_GROUP
13065
  REQ_BGL = False
13066

    
13067
  def ExpandNames(self):
13068
    # This raises errors.OpPrereqError on its own:
13069
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
13070

    
13071
    if self.op.target_groups:
13072
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
13073
                                  self.op.target_groups)
13074
    else:
13075
      self.req_target_uuids = []
13076

    
13077
    if self.group_uuid in self.req_target_uuids:
13078
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
13079
                                 " as a target group (targets are %s)" %
13080
                                 (self.group_uuid,
13081
                                  utils.CommaJoin(self.req_target_uuids)),
13082
                                 errors.ECODE_INVAL)
13083

    
13084
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
13085

    
13086
    self.share_locks = _ShareAll()
13087
    self.needed_locks = {
13088
      locking.LEVEL_INSTANCE: [],
13089
      locking.LEVEL_NODEGROUP: [],
13090
      locking.LEVEL_NODE: [],
13091
      }
13092

    
13093
  def DeclareLocks(self, level):
13094
    if level == locking.LEVEL_INSTANCE:
13095
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
13096

    
13097
      # Lock instances optimistically, needs verification once node and group
13098
      # locks have been acquired
13099
      self.needed_locks[locking.LEVEL_INSTANCE] = \
13100
        self.cfg.GetNodeGroupInstances(self.group_uuid)
13101

    
13102
    elif level == locking.LEVEL_NODEGROUP:
13103
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
13104

    
13105
      if self.req_target_uuids:
13106
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
13107

    
13108
        # Lock all groups used by instances optimistically; this requires going
13109
        # via the node before it's locked, requiring verification later on
13110
        lock_groups.update(group_uuid
13111
                           for instance_name in
13112
                             self.owned_locks(locking.LEVEL_INSTANCE)
13113
                           for group_uuid in
13114
                             self.cfg.GetInstanceNodeGroups(instance_name))
13115
      else:
13116
        # No target groups, need to lock all of them
13117
        lock_groups = locking.ALL_SET
13118

    
13119
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
13120

    
13121
    elif level == locking.LEVEL_NODE:
13122
      # This will only lock the nodes in the group to be evacuated which
13123
      # contain actual instances
13124
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
13125
      self._LockInstancesNodes()
13126

    
13127
      # Lock all nodes in group to be evacuated and target groups
13128
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13129
      assert self.group_uuid in owned_groups
13130
      member_nodes = [node_name
13131
                      for group in owned_groups
13132
                      for node_name in self.cfg.GetNodeGroup(group).members]
13133
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
13134

    
13135
  def CheckPrereq(self):
13136
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
13137
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
13138
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
13139

    
13140
    assert owned_groups.issuperset(self.req_target_uuids)
13141
    assert self.group_uuid in owned_groups
13142

    
13143
    # Check if locked instances are still correct
13144
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
13145

    
13146
    # Get instance information
13147
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
13148

    
13149
    # Check if node groups for locked instances are still correct
13150
    for instance_name in owned_instances:
13151
      inst = self.instances[instance_name]
13152
      assert owned_nodes.issuperset(inst.all_nodes), \
13153
        "Instance %s's nodes changed while we kept the lock" % instance_name
13154

    
13155
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
13156
                                             owned_groups)
13157

    
13158
      assert self.group_uuid in inst_groups, \
13159
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
13160

    
13161
    if self.req_target_uuids:
13162
      # User requested specific target groups
13163
      self.target_uuids = self.req_target_uuids
13164
    else:
13165
      # All groups except the one to be evacuated are potential targets
13166
      self.target_uuids = [group_uuid for group_uuid in owned_groups
13167
                           if group_uuid != self.group_uuid]
13168

    
13169
      if not self.target_uuids:
13170
        raise errors.OpPrereqError("There are no possible target groups",
13171
                                   errors.ECODE_INVAL)
13172

    
13173
  def BuildHooksEnv(self):
13174
    """Build hooks env.
13175

13176
    """
13177
    return {
13178
      "GROUP_NAME": self.op.group_name,
13179
      "TARGET_GROUPS": " ".join(self.target_uuids),
13180
      }
13181

    
13182
  def BuildHooksNodes(self):
13183
    """Build hooks nodes.
13184

13185
    """
13186
    mn = self.cfg.GetMasterNode()
13187

    
13188
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13189

    
13190
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13191

    
13192
    return (run_nodes, run_nodes)
13193

    
13194
  def Exec(self, feedback_fn):
13195
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13196

    
13197
    assert self.group_uuid not in self.target_uuids
13198

    
13199
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13200
                     instances=instances, target_groups=self.target_uuids)
13201

    
13202
    ial.Run(self.op.iallocator)
13203

    
13204
    if not ial.success:
13205
      raise errors.OpPrereqError("Can't compute group evacuation using"
13206
                                 " iallocator '%s': %s" %
13207
                                 (self.op.iallocator, ial.info),
13208
                                 errors.ECODE_NORES)
13209

    
13210
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13211

    
13212
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13213
                 len(jobs), self.op.group_name)
13214

    
13215
    return ResultWithJobs(jobs)
13216

    
13217

    
13218
class TagsLU(NoHooksLU): # pylint: disable=W0223
13219
  """Generic tags LU.
13220

13221
  This is an abstract class which is the parent of all the other tags LUs.
13222

13223
  """
13224
  def ExpandNames(self):
13225
    self.group_uuid = None
13226
    self.needed_locks = {}
13227
    if self.op.kind == constants.TAG_NODE:
13228
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13229
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
13230
    elif self.op.kind == constants.TAG_INSTANCE:
13231
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13232
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13233
    elif self.op.kind == constants.TAG_NODEGROUP:
13234
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13235

    
13236
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13237
    # not possible to acquire the BGL based on opcode parameters)
13238

    
13239
  def CheckPrereq(self):
13240
    """Check prerequisites.
13241

13242
    """
13243
    if self.op.kind == constants.TAG_CLUSTER:
13244
      self.target = self.cfg.GetClusterInfo()
13245
    elif self.op.kind == constants.TAG_NODE:
13246
      self.target = self.cfg.GetNodeInfo(self.op.name)
13247
    elif self.op.kind == constants.TAG_INSTANCE:
13248
      self.target = self.cfg.GetInstanceInfo(self.op.name)
13249
    elif self.op.kind == constants.TAG_NODEGROUP:
13250
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
13251
    else:
13252
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13253
                                 str(self.op.kind), errors.ECODE_INVAL)
13254

    
13255

    
13256
class LUTagsGet(TagsLU):
13257
  """Returns the tags of a given object.
13258

13259
  """
13260
  REQ_BGL = False
13261

    
13262
  def ExpandNames(self):
13263
    TagsLU.ExpandNames(self)
13264

    
13265
    # Share locks as this is only a read operation
13266
    self.share_locks = _ShareAll()
13267

    
13268
  def Exec(self, feedback_fn):
13269
    """Returns the tag list.
13270

13271
    """
13272
    return list(self.target.GetTags())
13273

    
13274

    
13275
class LUTagsSearch(NoHooksLU):
13276
  """Searches the tags for a given pattern.
13277

13278
  """
13279
  REQ_BGL = False
13280

    
13281
  def ExpandNames(self):
13282
    self.needed_locks = {}
13283

    
13284
  def CheckPrereq(self):
13285
    """Check prerequisites.
13286

13287
    This checks the pattern passed for validity by compiling it.
13288

13289
    """
13290
    try:
13291
      self.re = re.compile(self.op.pattern)
13292
    except re.error, err:
13293
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13294
                                 (self.op.pattern, err), errors.ECODE_INVAL)
13295

    
13296
  def Exec(self, feedback_fn):
13297
    """Returns the tag list.
13298

13299
    """
13300
    cfg = self.cfg
13301
    tgts = [("/cluster", cfg.GetClusterInfo())]
13302
    ilist = cfg.GetAllInstancesInfo().values()
13303
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13304
    nlist = cfg.GetAllNodesInfo().values()
13305
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13306
    tgts.extend(("/nodegroup/%s" % n.name, n)
13307
                for n in cfg.GetAllNodeGroupsInfo().values())
13308
    results = []
13309
    for path, target in tgts:
13310
      for tag in target.GetTags():
13311
        if self.re.search(tag):
13312
          results.append((path, tag))
13313
    return results
13314

    
13315

    
13316
class LUTagsSet(TagsLU):
13317
  """Sets a tag on a given object.
13318

13319
  """
13320
  REQ_BGL = False
13321

    
13322
  def CheckPrereq(self):
13323
    """Check prerequisites.
13324

13325
    This checks the type and length of the tag name and value.
13326

13327
    """
13328
    TagsLU.CheckPrereq(self)
13329
    for tag in self.op.tags:
13330
      objects.TaggableObject.ValidateTag(tag)
13331

    
13332
  def Exec(self, feedback_fn):
13333
    """Sets the tag.
13334

13335
    """
13336
    try:
13337
      for tag in self.op.tags:
13338
        self.target.AddTag(tag)
13339
    except errors.TagError, err:
13340
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
13341
    self.cfg.Update(self.target, feedback_fn)
13342

    
13343

    
13344
class LUTagsDel(TagsLU):
13345
  """Delete a list of tags from a given object.
13346

13347
  """
13348
  REQ_BGL = False
13349

    
13350
  def CheckPrereq(self):
13351
    """Check prerequisites.
13352

13353
    This checks that we have the given tag.
13354

13355
    """
13356
    TagsLU.CheckPrereq(self)
13357
    for tag in self.op.tags:
13358
      objects.TaggableObject.ValidateTag(tag)
13359
    del_tags = frozenset(self.op.tags)
13360
    cur_tags = self.target.GetTags()
13361

    
13362
    diff_tags = del_tags - cur_tags
13363
    if diff_tags:
13364
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
13365
      raise errors.OpPrereqError("Tag(s) %s not found" %
13366
                                 (utils.CommaJoin(diff_names), ),
13367
                                 errors.ECODE_NOENT)
13368

    
13369
  def Exec(self, feedback_fn):
13370
    """Remove the tag from the object.
13371

13372
    """
13373
    for tag in self.op.tags:
13374
      self.target.RemoveTag(tag)
13375
    self.cfg.Update(self.target, feedback_fn)
13376

    
13377

    
13378
class LUTestDelay(NoHooksLU):
13379
  """Sleep for a specified amount of time.
13380

13381
  This LU sleeps on the master and/or nodes for a specified amount of
13382
  time.
13383

13384
  """
13385
  REQ_BGL = False
13386

    
13387
  def ExpandNames(self):
13388
    """Expand names and set required locks.
13389

13390
    This expands the node list, if any.
13391

13392
    """
13393
    self.needed_locks = {}
13394
    if self.op.on_nodes:
13395
      # _GetWantedNodes can be used here, but is not always appropriate to use
13396
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13397
      # more information.
13398
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13399
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13400

    
13401
  def _TestDelay(self):
13402
    """Do the actual sleep.
13403

13404
    """
13405
    if self.op.on_master:
13406
      if not utils.TestDelay(self.op.duration):
13407
        raise errors.OpExecError("Error during master delay test")
13408
    if self.op.on_nodes:
13409
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13410
      for node, node_result in result.items():
13411
        node_result.Raise("Failure during rpc call to node %s" % node)
13412

    
13413
  def Exec(self, feedback_fn):
13414
    """Execute the test delay opcode, with the wanted repetitions.
13415

13416
    """
13417
    if self.op.repeat == 0:
13418
      self._TestDelay()
13419
    else:
13420
      top_value = self.op.repeat - 1
13421
      for i in range(self.op.repeat):
13422
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13423
        self._TestDelay()
13424

    
13425

    
13426
class LUTestJqueue(NoHooksLU):
13427
  """Utility LU to test some aspects of the job queue.
13428

13429
  """
13430
  REQ_BGL = False
13431

    
13432
  # Must be lower than default timeout for WaitForJobChange to see whether it
13433
  # notices changed jobs
13434
  _CLIENT_CONNECT_TIMEOUT = 20.0
13435
  _CLIENT_CONFIRM_TIMEOUT = 60.0
13436

    
13437
  @classmethod
13438
  def _NotifyUsingSocket(cls, cb, errcls):
13439
    """Opens a Unix socket and waits for another program to connect.
13440

13441
    @type cb: callable
13442
    @param cb: Callback to send socket name to client
13443
    @type errcls: class
13444
    @param errcls: Exception class to use for errors
13445

13446
    """
13447
    # Using a temporary directory as there's no easy way to create temporary
13448
    # sockets without writing a custom loop around tempfile.mktemp and
13449
    # socket.bind
13450
    tmpdir = tempfile.mkdtemp()
13451
    try:
13452
      tmpsock = utils.PathJoin(tmpdir, "sock")
13453

    
13454
      logging.debug("Creating temporary socket at %s", tmpsock)
13455
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13456
      try:
13457
        sock.bind(tmpsock)
13458
        sock.listen(1)
13459

    
13460
        # Send details to client
13461
        cb(tmpsock)
13462

    
13463
        # Wait for client to connect before continuing
13464
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13465
        try:
13466
          (conn, _) = sock.accept()
13467
        except socket.error, err:
13468
          raise errcls("Client didn't connect in time (%s)" % err)
13469
      finally:
13470
        sock.close()
13471
    finally:
13472
      # Remove as soon as client is connected
13473
      shutil.rmtree(tmpdir)
13474

    
13475
    # Wait for client to close
13476
    try:
13477
      try:
13478
        # pylint: disable=E1101
13479
        # Instance of '_socketobject' has no ... member
13480
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13481
        conn.recv(1)
13482
      except socket.error, err:
13483
        raise errcls("Client failed to confirm notification (%s)" % err)
13484
    finally:
13485
      conn.close()
13486

    
13487
  def _SendNotification(self, test, arg, sockname):
13488
    """Sends a notification to the client.
13489

13490
    @type test: string
13491
    @param test: Test name
13492
    @param arg: Test argument (depends on test)
13493
    @type sockname: string
13494
    @param sockname: Socket path
13495

13496
    """
13497
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13498

    
13499
  def _Notify(self, prereq, test, arg):
13500
    """Notifies the client of a test.
13501

13502
    @type prereq: bool
13503
    @param prereq: Whether this is a prereq-phase test
13504
    @type test: string
13505
    @param test: Test name
13506
    @param arg: Test argument (depends on test)
13507

13508
    """
13509
    if prereq:
13510
      errcls = errors.OpPrereqError
13511
    else:
13512
      errcls = errors.OpExecError
13513

    
13514
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13515
                                                  test, arg),
13516
                                   errcls)
13517

    
13518
  def CheckArguments(self):
13519
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13520
    self.expandnames_calls = 0
13521

    
13522
  def ExpandNames(self):
13523
    checkargs_calls = getattr(self, "checkargs_calls", 0)
13524
    if checkargs_calls < 1:
13525
      raise errors.ProgrammerError("CheckArguments was not called")
13526

    
13527
    self.expandnames_calls += 1
13528

    
13529
    if self.op.notify_waitlock:
13530
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
13531

    
13532
    self.LogInfo("Expanding names")
13533

    
13534
    # Get lock on master node (just to get a lock, not for a particular reason)
13535
    self.needed_locks = {
13536
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13537
      }
13538

    
13539
  def Exec(self, feedback_fn):
13540
    if self.expandnames_calls < 1:
13541
      raise errors.ProgrammerError("ExpandNames was not called")
13542

    
13543
    if self.op.notify_exec:
13544
      self._Notify(False, constants.JQT_EXEC, None)
13545

    
13546
    self.LogInfo("Executing")
13547

    
13548
    if self.op.log_messages:
13549
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13550
      for idx, msg in enumerate(self.op.log_messages):
13551
        self.LogInfo("Sending log message %s", idx + 1)
13552
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13553
        # Report how many test messages have been sent
13554
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13555

    
13556
    if self.op.fail:
13557
      raise errors.OpExecError("Opcode failure was requested")
13558

    
13559
    return True
13560

    
13561

    
13562
class IAllocator(object):
13563
  """IAllocator framework.
13564

13565
  An IAllocator instance has three sets of attributes:
13566
    - cfg that is needed to query the cluster
13567
    - input data (all members of the _KEYS class attribute are required)
13568
    - four buffer attributes (in|out_data|text), that represent the
13569
      input (to the external script) in text and data structure format,
13570
      and the output from it, again in two formats
13571
    - the result variables from the script (success, info, nodes) for
13572
      easy usage
13573

13574
  """
13575
  # pylint: disable=R0902
13576
  # lots of instance attributes
13577

    
13578
  def __init__(self, cfg, rpc_runner, mode, **kwargs):
13579
    self.cfg = cfg
13580
    self.rpc = rpc_runner
13581
    # init buffer variables
13582
    self.in_text = self.out_text = self.in_data = self.out_data = None
13583
    # init all input fields so that pylint is happy
13584
    self.mode = mode
13585
    self.memory = self.disks = self.disk_template = None
13586
    self.os = self.tags = self.nics = self.vcpus = None
13587
    self.hypervisor = None
13588
    self.relocate_from = None
13589
    self.name = None
13590
    self.instances = None
13591
    self.evac_mode = None
13592
    self.target_groups = []
13593
    # computed fields
13594
    self.required_nodes = None
13595
    # init result fields
13596
    self.success = self.info = self.result = None
13597

    
13598
    try:
13599
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13600
    except KeyError:
13601
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13602
                                   " IAllocator" % self.mode)
13603

    
13604
    keyset = [n for (n, _) in keydata]
13605

    
13606
    for key in kwargs:
13607
      if key not in keyset:
13608
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13609
                                     " IAllocator" % key)
13610
      setattr(self, key, kwargs[key])
13611

    
13612
    for key in keyset:
13613
      if key not in kwargs:
13614
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13615
                                     " IAllocator" % key)
13616
    self._BuildInputData(compat.partial(fn, self), keydata)
13617

    
13618
  def _ComputeClusterData(self):
13619
    """Compute the generic allocator input data.
13620

13621
    This is the data that is independent of the actual operation.
13622

13623
    """
13624
    cfg = self.cfg
13625
    cluster_info = cfg.GetClusterInfo()
13626
    # cluster data
13627
    data = {
13628
      "version": constants.IALLOCATOR_VERSION,
13629
      "cluster_name": cfg.GetClusterName(),
13630
      "cluster_tags": list(cluster_info.GetTags()),
13631
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13632
      # we don't have job IDs
13633
      }
13634
    ninfo = cfg.GetAllNodesInfo()
13635
    iinfo = cfg.GetAllInstancesInfo().values()
13636
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13637

    
13638
    # node data
13639
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13640

    
13641
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13642
      hypervisor_name = self.hypervisor
13643
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13644
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13645
    else:
13646
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13647

    
13648
    node_data = self.rpc.call_node_info(node_list, [cfg.GetVGName()],
13649
                                        [hypervisor_name])
13650
    node_iinfo = \
13651
      self.rpc.call_all_instances_info(node_list,
13652
                                       cluster_info.enabled_hypervisors)
13653

    
13654
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13655

    
13656
    config_ndata = self._ComputeBasicNodeData(ninfo)
13657
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13658
                                                 i_list, config_ndata)
13659
    assert len(data["nodes"]) == len(ninfo), \
13660
        "Incomplete node data computed"
13661

    
13662
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13663

    
13664
    self.in_data = data
13665

    
13666
  @staticmethod
13667
  def _ComputeNodeGroupData(cfg):
13668
    """Compute node groups data.
13669

13670
    """
13671
    ng = dict((guuid, {
13672
      "name": gdata.name,
13673
      "alloc_policy": gdata.alloc_policy,
13674
      })
13675
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13676

    
13677
    return ng
13678

    
13679
  @staticmethod
13680
  def _ComputeBasicNodeData(node_cfg):
13681
    """Compute global node data.
13682

13683
    @rtype: dict
13684
    @returns: a dict of name: (node dict, node config)
13685

13686
    """
13687
    # fill in static (config-based) values
13688
    node_results = dict((ninfo.name, {
13689
      "tags": list(ninfo.GetTags()),
13690
      "primary_ip": ninfo.primary_ip,
13691
      "secondary_ip": ninfo.secondary_ip,
13692
      "offline": ninfo.offline,
13693
      "drained": ninfo.drained,
13694
      "master_candidate": ninfo.master_candidate,
13695
      "group": ninfo.group,
13696
      "master_capable": ninfo.master_capable,
13697
      "vm_capable": ninfo.vm_capable,
13698
      })
13699
      for ninfo in node_cfg.values())
13700

    
13701
    return node_results
13702

    
13703
  @staticmethod
13704
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13705
                              node_results):
13706
    """Compute global node data.
13707

13708
    @param node_results: the basic node structures as filled from the config
13709

13710
    """
13711
    #TODO(dynmem): compute the right data on MAX and MIN memory
13712
    # make a copy of the current dict
13713
    node_results = dict(node_results)
13714
    for nname, nresult in node_data.items():
13715
      assert nname in node_results, "Missing basic data for node %s" % nname
13716
      ninfo = node_cfg[nname]
13717

    
13718
      if not (ninfo.offline or ninfo.drained):
13719
        nresult.Raise("Can't get data for node %s" % nname)
13720
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13721
                                nname)
13722
        remote_info = _MakeLegacyNodeInfo(nresult.payload)
13723

    
13724
        for attr in ["memory_total", "memory_free", "memory_dom0",
13725
                     "vg_size", "vg_free", "cpu_total"]:
13726
          if attr not in remote_info:
13727
            raise errors.OpExecError("Node '%s' didn't return attribute"
13728
                                     " '%s'" % (nname, attr))
13729
          if not isinstance(remote_info[attr], int):
13730
            raise errors.OpExecError("Node '%s' returned invalid value"
13731
                                     " for '%s': %s" %
13732
                                     (nname, attr, remote_info[attr]))
13733
        # compute memory used by primary instances
13734
        i_p_mem = i_p_up_mem = 0
13735
        for iinfo, beinfo in i_list:
13736
          if iinfo.primary_node == nname:
13737
            i_p_mem += beinfo[constants.BE_MAXMEM]
13738
            if iinfo.name not in node_iinfo[nname].payload:
13739
              i_used_mem = 0
13740
            else:
13741
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13742
            i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
13743
            remote_info["memory_free"] -= max(0, i_mem_diff)
13744

    
13745
            if iinfo.admin_state == constants.ADMINST_UP:
13746
              i_p_up_mem += beinfo[constants.BE_MAXMEM]
13747

    
13748
        # compute memory used by instances
13749
        pnr_dyn = {
13750
          "total_memory": remote_info["memory_total"],
13751
          "reserved_memory": remote_info["memory_dom0"],
13752
          "free_memory": remote_info["memory_free"],
13753
          "total_disk": remote_info["vg_size"],
13754
          "free_disk": remote_info["vg_free"],
13755
          "total_cpus": remote_info["cpu_total"],
13756
          "i_pri_memory": i_p_mem,
13757
          "i_pri_up_memory": i_p_up_mem,
13758
          }
13759
        pnr_dyn.update(node_results[nname])
13760
        node_results[nname] = pnr_dyn
13761

    
13762
    return node_results
13763

    
13764
  @staticmethod
13765
  def _ComputeInstanceData(cluster_info, i_list):
13766
    """Compute global instance data.
13767

13768
    """
13769
    instance_data = {}
13770
    for iinfo, beinfo in i_list:
13771
      nic_data = []
13772
      for nic in iinfo.nics:
13773
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13774
        nic_dict = {
13775
          "mac": nic.mac,
13776
          "ip": nic.ip,
13777
          "mode": filled_params[constants.NIC_MODE],
13778
          "link": filled_params[constants.NIC_LINK],
13779
          }
13780
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13781
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13782
        nic_data.append(nic_dict)
13783
      pir = {
13784
        "tags": list(iinfo.GetTags()),
13785
        "admin_state": iinfo.admin_state,
13786
        "vcpus": beinfo[constants.BE_VCPUS],
13787
        "memory": beinfo[constants.BE_MAXMEM],
13788
        "os": iinfo.os,
13789
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13790
        "nics": nic_data,
13791
        "disks": [{constants.IDISK_SIZE: dsk.size,
13792
                   constants.IDISK_MODE: dsk.mode}
13793
                  for dsk in iinfo.disks],
13794
        "disk_template": iinfo.disk_template,
13795
        "hypervisor": iinfo.hypervisor,
13796
        }
13797
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13798
                                                 pir["disks"])
13799
      instance_data[iinfo.name] = pir
13800

    
13801
    return instance_data
13802

    
13803
  def _AddNewInstance(self):
13804
    """Add new instance data to allocator structure.
13805

13806
    This in combination with _AllocatorGetClusterData will create the
13807
    correct structure needed as input for the allocator.
13808

13809
    The checks for the completeness of the opcode must have already been
13810
    done.
13811

13812
    """
13813
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13814

    
13815
    if self.disk_template in constants.DTS_INT_MIRROR:
13816
      self.required_nodes = 2
13817
    else:
13818
      self.required_nodes = 1
13819

    
13820
    request = {
13821
      "name": self.name,
13822
      "disk_template": self.disk_template,
13823
      "tags": self.tags,
13824
      "os": self.os,
13825
      "vcpus": self.vcpus,
13826
      "memory": self.memory,
13827
      "disks": self.disks,
13828
      "disk_space_total": disk_space,
13829
      "nics": self.nics,
13830
      "required_nodes": self.required_nodes,
13831
      "hypervisor": self.hypervisor,
13832
      }
13833

    
13834
    return request
13835

    
13836
  def _AddRelocateInstance(self):
13837
    """Add relocate instance data to allocator structure.
13838

13839
    This in combination with _IAllocatorGetClusterData will create the
13840
    correct structure needed as input for the allocator.
13841

13842
    The checks for the completeness of the opcode must have already been
13843
    done.
13844

13845
    """
13846
    instance = self.cfg.GetInstanceInfo(self.name)
13847
    if instance is None:
13848
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13849
                                   " IAllocator" % self.name)
13850

    
13851
    if instance.disk_template not in constants.DTS_MIRRORED:
13852
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13853
                                 errors.ECODE_INVAL)
13854

    
13855
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13856
        len(instance.secondary_nodes) != 1:
13857
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13858
                                 errors.ECODE_STATE)
13859

    
13860
    self.required_nodes = 1
13861
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13862
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13863

    
13864
    request = {
13865
      "name": self.name,
13866
      "disk_space_total": disk_space,
13867
      "required_nodes": self.required_nodes,
13868
      "relocate_from": self.relocate_from,
13869
      }
13870
    return request
13871

    
13872
  def _AddNodeEvacuate(self):
13873
    """Get data for node-evacuate requests.
13874

13875
    """
13876
    return {
13877
      "instances": self.instances,
13878
      "evac_mode": self.evac_mode,
13879
      }
13880

    
13881
  def _AddChangeGroup(self):
13882
    """Get data for node-evacuate requests.
13883

13884
    """
13885
    return {
13886
      "instances": self.instances,
13887
      "target_groups": self.target_groups,
13888
      }
13889

    
13890
  def _BuildInputData(self, fn, keydata):
13891
    """Build input data structures.
13892

13893
    """
13894
    self._ComputeClusterData()
13895

    
13896
    request = fn()
13897
    request["type"] = self.mode
13898
    for keyname, keytype in keydata:
13899
      if keyname not in request:
13900
        raise errors.ProgrammerError("Request parameter %s is missing" %
13901
                                     keyname)
13902
      val = request[keyname]
13903
      if not keytype(val):
13904
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13905
                                     " validation, value %s, expected"
13906
                                     " type %s" % (keyname, val, keytype))
13907
    self.in_data["request"] = request
13908

    
13909
    self.in_text = serializer.Dump(self.in_data)
13910

    
13911
  _STRING_LIST = ht.TListOf(ht.TString)
13912
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13913
     # pylint: disable=E1101
13914
     # Class '...' has no 'OP_ID' member
13915
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13916
                          opcodes.OpInstanceMigrate.OP_ID,
13917
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13918
     })))
13919

    
13920
  _NEVAC_MOVED = \
13921
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13922
                       ht.TItems([ht.TNonEmptyString,
13923
                                  ht.TNonEmptyString,
13924
                                  ht.TListOf(ht.TNonEmptyString),
13925
                                 ])))
13926
  _NEVAC_FAILED = \
13927
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13928
                       ht.TItems([ht.TNonEmptyString,
13929
                                  ht.TMaybeString,
13930
                                 ])))
13931
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13932
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13933

    
13934
  _MODE_DATA = {
13935
    constants.IALLOCATOR_MODE_ALLOC:
13936
      (_AddNewInstance,
13937
       [
13938
        ("name", ht.TString),
13939
        ("memory", ht.TInt),
13940
        ("disks", ht.TListOf(ht.TDict)),
13941
        ("disk_template", ht.TString),
13942
        ("os", ht.TString),
13943
        ("tags", _STRING_LIST),
13944
        ("nics", ht.TListOf(ht.TDict)),
13945
        ("vcpus", ht.TInt),
13946
        ("hypervisor", ht.TString),
13947
        ], ht.TList),
13948
    constants.IALLOCATOR_MODE_RELOC:
13949
      (_AddRelocateInstance,
13950
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13951
       ht.TList),
13952
     constants.IALLOCATOR_MODE_NODE_EVAC:
13953
      (_AddNodeEvacuate, [
13954
        ("instances", _STRING_LIST),
13955
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13956
        ], _NEVAC_RESULT),
13957
     constants.IALLOCATOR_MODE_CHG_GROUP:
13958
      (_AddChangeGroup, [
13959
        ("instances", _STRING_LIST),
13960
        ("target_groups", _STRING_LIST),
13961
        ], _NEVAC_RESULT),
13962
    }
13963

    
13964
  def Run(self, name, validate=True, call_fn=None):
13965
    """Run an instance allocator and return the results.
13966

13967
    """
13968
    if call_fn is None:
13969
      call_fn = self.rpc.call_iallocator_runner
13970

    
13971
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13972
    result.Raise("Failure while running the iallocator script")
13973

    
13974
    self.out_text = result.payload
13975
    if validate:
13976
      self._ValidateResult()
13977

    
13978
  def _ValidateResult(self):
13979
    """Process the allocator results.
13980

13981
    This will process and if successful save the result in
13982
    self.out_data and the other parameters.
13983

13984
    """
13985
    try:
13986
      rdict = serializer.Load(self.out_text)
13987
    except Exception, err:
13988
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13989

    
13990
    if not isinstance(rdict, dict):
13991
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13992

    
13993
    # TODO: remove backwards compatiblity in later versions
13994
    if "nodes" in rdict and "result" not in rdict:
13995
      rdict["result"] = rdict["nodes"]
13996
      del rdict["nodes"]
13997

    
13998
    for key in "success", "info", "result":
13999
      if key not in rdict:
14000
        raise errors.OpExecError("Can't parse iallocator results:"
14001
                                 " missing key '%s'" % key)
14002
      setattr(self, key, rdict[key])
14003

    
14004
    if not self._result_check(self.result):
14005
      raise errors.OpExecError("Iallocator returned invalid result,"
14006
                               " expected %s, got %s" %
14007
                               (self._result_check, self.result),
14008
                               errors.ECODE_INVAL)
14009

    
14010
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
14011
      assert self.relocate_from is not None
14012
      assert self.required_nodes == 1
14013

    
14014
      node2group = dict((name, ndata["group"])
14015
                        for (name, ndata) in self.in_data["nodes"].items())
14016

    
14017
      fn = compat.partial(self._NodesToGroups, node2group,
14018
                          self.in_data["nodegroups"])
14019

    
14020
      instance = self.cfg.GetInstanceInfo(self.name)
14021
      request_groups = fn(self.relocate_from + [instance.primary_node])
14022
      result_groups = fn(rdict["result"] + [instance.primary_node])
14023

    
14024
      if self.success and not set(result_groups).issubset(request_groups):
14025
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
14026
                                 " differ from original groups (%s)" %
14027
                                 (utils.CommaJoin(result_groups),
14028
                                  utils.CommaJoin(request_groups)))
14029

    
14030
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14031
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
14032

    
14033
    self.out_data = rdict
14034

    
14035
  @staticmethod
14036
  def _NodesToGroups(node2group, groups, nodes):
14037
    """Returns a list of unique group names for a list of nodes.
14038

14039
    @type node2group: dict
14040
    @param node2group: Map from node name to group UUID
14041
    @type groups: dict
14042
    @param groups: Group information
14043
    @type nodes: list
14044
    @param nodes: Node names
14045

14046
    """
14047
    result = set()
14048

    
14049
    for node in nodes:
14050
      try:
14051
        group_uuid = node2group[node]
14052
      except KeyError:
14053
        # Ignore unknown node
14054
        pass
14055
      else:
14056
        try:
14057
          group = groups[group_uuid]
14058
        except KeyError:
14059
          # Can't find group, let's use UUID
14060
          group_name = group_uuid
14061
        else:
14062
          group_name = group["name"]
14063

    
14064
        result.add(group_name)
14065

    
14066
    return sorted(result)
14067

    
14068

    
14069
class LUTestAllocator(NoHooksLU):
14070
  """Run allocator tests.
14071

14072
  This LU runs the allocator tests
14073

14074
  """
14075
  def CheckPrereq(self):
14076
    """Check prerequisites.
14077

14078
    This checks the opcode parameters depending on the director and mode test.
14079

14080
    """
14081
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14082
      for attr in ["memory", "disks", "disk_template",
14083
                   "os", "tags", "nics", "vcpus"]:
14084
        if not hasattr(self.op, attr):
14085
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
14086
                                     attr, errors.ECODE_INVAL)
14087
      iname = self.cfg.ExpandInstanceName(self.op.name)
14088
      if iname is not None:
14089
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
14090
                                   iname, errors.ECODE_EXISTS)
14091
      if not isinstance(self.op.nics, list):
14092
        raise errors.OpPrereqError("Invalid parameter 'nics'",
14093
                                   errors.ECODE_INVAL)
14094
      if not isinstance(self.op.disks, list):
14095
        raise errors.OpPrereqError("Invalid parameter 'disks'",
14096
                                   errors.ECODE_INVAL)
14097
      for row in self.op.disks:
14098
        if (not isinstance(row, dict) or
14099
            constants.IDISK_SIZE not in row or
14100
            not isinstance(row[constants.IDISK_SIZE], int) or
14101
            constants.IDISK_MODE not in row or
14102
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
14103
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
14104
                                     " parameter", errors.ECODE_INVAL)
14105
      if self.op.hypervisor is None:
14106
        self.op.hypervisor = self.cfg.GetHypervisorType()
14107
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14108
      fname = _ExpandInstanceName(self.cfg, self.op.name)
14109
      self.op.name = fname
14110
      self.relocate_from = \
14111
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
14112
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
14113
                          constants.IALLOCATOR_MODE_NODE_EVAC):
14114
      if not self.op.instances:
14115
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
14116
      self.op.instances = _GetWantedInstances(self, self.op.instances)
14117
    else:
14118
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
14119
                                 self.op.mode, errors.ECODE_INVAL)
14120

    
14121
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
14122
      if self.op.allocator is None:
14123
        raise errors.OpPrereqError("Missing allocator name",
14124
                                   errors.ECODE_INVAL)
14125
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
14126
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
14127
                                 self.op.direction, errors.ECODE_INVAL)
14128

    
14129
  def Exec(self, feedback_fn):
14130
    """Run the allocator test.
14131

14132
    """
14133
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
14134
      ial = IAllocator(self.cfg, self.rpc,
14135
                       mode=self.op.mode,
14136
                       name=self.op.name,
14137
                       memory=self.op.memory,
14138
                       disks=self.op.disks,
14139
                       disk_template=self.op.disk_template,
14140
                       os=self.op.os,
14141
                       tags=self.op.tags,
14142
                       nics=self.op.nics,
14143
                       vcpus=self.op.vcpus,
14144
                       hypervisor=self.op.hypervisor,
14145
                       )
14146
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
14147
      ial = IAllocator(self.cfg, self.rpc,
14148
                       mode=self.op.mode,
14149
                       name=self.op.name,
14150
                       relocate_from=list(self.relocate_from),
14151
                       )
14152
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
14153
      ial = IAllocator(self.cfg, self.rpc,
14154
                       mode=self.op.mode,
14155
                       instances=self.op.instances,
14156
                       target_groups=self.op.target_groups)
14157
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
14158
      ial = IAllocator(self.cfg, self.rpc,
14159
                       mode=self.op.mode,
14160
                       instances=self.op.instances,
14161
                       evac_mode=self.op.evac_mode)
14162
    else:
14163
      raise errors.ProgrammerError("Uncatched mode %s in"
14164
                                   " LUTestAllocator.Exec", self.op.mode)
14165

    
14166
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
14167
      result = ial.in_text
14168
    else:
14169
      ial.Run(self.op.allocator, validate=False)
14170
      result = ial.out_text
14171
    return result
14172

    
14173

    
14174
#: Query type implementations
14175
_QUERY_IMPL = {
14176
  constants.QR_INSTANCE: _InstanceQuery,
14177
  constants.QR_NODE: _NodeQuery,
14178
  constants.QR_GROUP: _GroupQuery,
14179
  constants.QR_OS: _OsQuery,
14180
  }
14181

    
14182
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
14183

    
14184

    
14185
def _GetQueryImplementation(name):
14186
  """Returns the implemtnation for a query type.
14187

14188
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
14189

14190
  """
14191
  try:
14192
    return _QUERY_IMPL[name]
14193
  except KeyError:
14194
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14195
                               errors.ECODE_INVAL)